Coverage for rendercv/data/reader.py: 88%

97 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-26 00:25 +0000

1""" 

2The `rendercv.data.reader` module contains the functions that are used to read the input 

3file (YAML or JSON) and return them as an instance of `RenderCVDataModel`, which is a 

4Pydantic data model of RenderCV's data format. 

5""" 

6 

7import pathlib 

8import re 

9from typing import Optional 

10 

11import pydantic 

12import ruamel.yaml 

13 

14from . import models 

15from .models import entry_types 

16 

17 

18def make_given_keywords_bold_in_sections( 

19 sections_input: models.Sections, keywords: list[str] 

20) -> models.Sections: 

21 """Iterate over the dictionary recursively and make the given keywords bold. 

22 

23 Args: 

24 sections_input: TODO 

25 keywords: The keywords to make bold. 

26 

27 Returns: 

28 The dictionary with the given keywords bold. 

29 """ 

30 if sections_input is None: 

31 return None 

32 

33 for entries in sections_input.values(): 

34 for entry in entries: 

35 if isinstance(entry, str): 

36 entry_types.make_keywords_bold_in_a_string(entry, keywords) 

37 elif callable(getattr(entry, "make_keywords_bold", None)): 

38 entry = entry.make_keywords_bold( # NOQA: PLW2901 # type: ignore 

39 keywords 

40 ) 

41 

42 return sections_input 

43 

44 

45def get_error_message_and_location_and_value_from_a_custom_error( 

46 error_string: str, 

47) -> tuple[Optional[str], Optional[str], Optional[str]]: 

48 """Look at a string and figure out if it's a custom error message that has been 

49 sent from `rendercv.data.reader.read_input_file`. If it is, then return the custom 

50 message, location, and the input value. 

51 

52 This is done because sometimes we raise an error about a specific field in the model 

53 validation level, but Pydantic doesn't give us the exact location of the error 

54 because it's a model-level error. So, we raise a custom error with three string 

55 arguments: message, location, and input value. Those arguments then combined into a 

56 string by Python. This function is used to parse that custom error message and 

57 return the three values. 

58 

59 Args: 

60 error_string: The error message. 

61 

62 Returns: 

63 The custom message, location, and the input value. 

64 """ 

65 pattern = r"""\(['"](.*)['"], '(.*)', '(.*)'\)""" 

66 match = re.search(pattern, error_string) 

67 if match: 

68 return match.group(1), match.group(2), match.group(3) 

69 return None, None, None 

70 

71 

72def parse_validation_errors( 

73 exception: pydantic.ValidationError, 

74) -> list[dict[str, str]]: 

75 """Take a Pydantic validation error, parse it, and return a list of error 

76 dictionaries that contain the error messages, locations, and the input values. 

77 

78 Pydantic's `ValidationError` object is a complex object that contains a lot of 

79 information about the error. This function takes a `ValidationError` object and 

80 extracts the error messages, locations, and the input values. 

81 

82 Args: 

83 exception: The Pydantic validation error object. 

84 

85 Returns: 

86 A list of error dictionaries that contain the error messages, locations, and the 

87 input values. 

88 """ 

89 # This dictionary is used to convert the error messages that Pydantic returns to 

90 # more user-friendly messages. 

91 error_dictionary: dict[str, str] = { 

92 "Input should be 'present'": ( 

93 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY" 

94 ' format or "present"!' 

95 ), 

96 "Input should be a valid integer, unable to parse string as an integer": ( 

97 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY" 

98 " format!" 

99 ), 

100 "String should match pattern '\\d{4}-\\d{2}(-\\d{2})?'": ( 

101 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY" 

102 " format!" 

103 ), 

104 "String should match pattern '\\b10\\..*'": ( 

105 'A DOI prefix should always start with "10.". For example,' 

106 ' "10.1109/TASC.2023.3340648".' 

107 ), 

108 "URL scheme should be 'http' or 'https'": "This is not a valid URL!", 

109 "Field required": "This field is required!", 

110 "value is not a valid phone number": "This is not a valid phone number!", 

111 "month must be in 1..12": "The month must be between 1 and 12!", 

112 "day is out of range for month": "The day is out of range for the month!", 

113 "Extra inputs are not permitted": ( 

114 "This field is unknown for this object! Please remove it." 

115 ), 

116 "Input should be a valid string": "This field should be a string!", 

117 "Input should be a valid list": ( 

118 "This field should contain a list of items but it doesn't!" 

119 ), 

120 "value is not a valid color: string not recognised as a valid color": ( 

121 "This is not a valid color! Here are some examples of valid colors:" 

122 ' "red", "#ff0000", "rgb(255, 0, 0)", "hsl(0, 100%, 50%)"' 

123 ), 

124 } 

125 

126 unwanted_texts = ["value is not a valid email address: ", "Value error, "] 

127 

128 # Check if this is a section error. If it is, we need to handle it differently. 

129 # This is needed because how dm.validate_section_input function raises an exception. 

130 # This is done to tell the user which which EntryType RenderCV excepts to see. 

131 errors = exception.errors() 

132 for error_object in errors.copy(): 

133 if ( 

134 "There are problems with the entries." in error_object["msg"] 

135 and "ctx" in error_object 

136 ): 

137 location = error_object["loc"] 

138 ctx_object = error_object["ctx"] 

139 if "error" in ctx_object: 

140 inner_error_object = ctx_object["error"] 

141 if hasattr(inner_error_object, "__cause__"): 

142 cause_object = inner_error_object.__cause__ 

143 cause_object_errors = cause_object.errors() 

144 for cause_error_object in cause_object_errors: 

145 # we use [1:] to avoid `entries` location. It is a location for 

146 # RenderCV's own data model, not the user's data model. 

147 cause_error_object["loc"] = tuple( 

148 list(location) + list(cause_error_object["loc"][1:]) 

149 ) 

150 errors.extend(cause_object_errors) 

151 

152 # some locations are not really the locations in the input file, but some 

153 # information about the model coming from Pydantic. We need to remove them. 

154 # (e.g. avoid stuff like .end_date.literal['present']) 

155 unwanted_locations = ["tagged-union", "list", "literal", "int", "constrained-str"] 

156 for error_object in errors: 

157 location = [str(location_element) for location_element in error_object["loc"]] 

158 new_location = [str(location_element) for location_element in location] 

159 for location_element in location: 

160 for unwanted_location in unwanted_locations: 

161 if unwanted_location in location_element: 

162 new_location.remove(location_element) 

163 error_object["loc"] = new_location # type: ignore 

164 

165 # Parse all the errors and create a new list of errors. 

166 new_errors: list[dict[str, str]] = [] 

167 for error_object in errors: 

168 message = error_object["msg"] 

169 location = ".".join(error_object["loc"]) # type: ignore 

170 input = error_object["input"] 

171 

172 # Check if this is a custom error message: 

173 custom_message, custom_location, custom_input_value = ( 

174 get_error_message_and_location_and_value_from_a_custom_error(message) 

175 ) 

176 if custom_message is not None: 

177 message = custom_message 

178 if custom_location: 

179 # If the custom location is not empty, then add it to the location. 

180 location = f"{location}.{custom_location}" 

181 input = custom_input_value 

182 

183 # Don't show unwanted texts in the error message: 

184 for unwanted_text in unwanted_texts: 

185 message = message.replace(unwanted_text, "") 

186 

187 # Convert the error message to a more user-friendly message if it's in the 

188 # error_dictionary: 

189 if message in error_dictionary: 

190 message = error_dictionary[message] 

191 

192 # Special case for end_date because Pydantic returns multiple end_date errors 

193 # since it has multiple valid formats: 

194 if "end_date" in location: 

195 message = ( 

196 "This is not a valid end date! Please use either YYYY-MM-DD, YYYY-MM," 

197 ' or YYYY format or "present"!' 

198 ) 

199 

200 # If the input is a dictionary or a list (the model itself fails to validate), 

201 # then don't show the input. It looks confusing and it is not helpful. 

202 if isinstance(input, dict | list): 

203 input = "" 

204 

205 new_error = { 

206 "loc": tuple(location.split(".")), 

207 "msg": message, 

208 "input": str(input), 

209 } 

210 

211 # if new_error is not in new_errors, then add it to new_errors 

212 if new_error not in new_errors: 

213 new_errors.append(new_error) 

214 

215 return new_errors 

216 

217 

218def read_a_yaml_file(file_path_or_contents: pathlib.Path | str) -> dict: 

219 """Read a YAML file and return its content as a dictionary. The YAML file can be 

220 given as a path to the file or as the contents of the file as a string. 

221 

222 Args: 

223 file_path_or_contents: The path to the YAML file or the contents of the YAML 

224 file as a string. 

225 

226 Returns: 

227 The content of the YAML file as a dictionary. 

228 """ 

229 

230 if isinstance(file_path_or_contents, pathlib.Path): 

231 # Check if the file exists: 

232 if not file_path_or_contents.exists(): 

233 message = f"The input file {file_path_or_contents} doesn't exist!" 

234 raise FileNotFoundError(message) 

235 

236 # Check the file extension: 

237 accepted_extensions = [".yaml", ".yml", ".json", ".json5"] 

238 if file_path_or_contents.suffix not in accepted_extensions: 

239 user_friendly_accepted_extensions = [ 

240 f"[green]{ext}[/green]" for ext in accepted_extensions 

241 ] 

242 user_friendly_accepted_extensions = ", ".join( 

243 user_friendly_accepted_extensions 

244 ) 

245 message = ( 

246 "The input file should have one of the following extensions:" 

247 f" {user_friendly_accepted_extensions}. The input file is" 

248 f" {file_path_or_contents}." 

249 ) 

250 raise ValueError(message) 

251 

252 file_content = file_path_or_contents.read_text(encoding="utf-8") 

253 else: 

254 file_content = file_path_or_contents 

255 

256 yaml_as_a_dictionary: dict = ruamel.yaml.YAML().load(file_content) 

257 

258 if yaml_as_a_dictionary is None: 

259 message = "The input file is empty!" 

260 raise ValueError(message) 

261 

262 return yaml_as_a_dictionary 

263 

264 

265def validate_input_dictionary_and_return_the_data_model( 

266 input_dictionary: dict, 

267 context: Optional[dict] = None, 

268) -> models.RenderCVDataModel: 

269 """Validate the input dictionary by creating an instance of `RenderCVDataModel`, 

270 which is a Pydantic data model of RenderCV's data format. 

271 

272 Args: 

273 input_dictionary: The input dictionary. 

274 context: The context dictionary that is used to validate the input dictionary. 

275 It's used to send the input file path with the context object, but it's not 

276 required. 

277 

278 Returns: 

279 The data model. 

280 """ 

281 # Validate the parsed dictionary by creating an instance of RenderCVDataModel: 

282 data_model = models.RenderCVDataModel.model_validate( 

283 input_dictionary, context=context 

284 ) 

285 

286 # If the `bold_keywords` field is provided in the `rendercv_settings`, make the 

287 # given keywords bold in the `cv.sections` field: 

288 if data_model.rendercv_settings and data_model.rendercv_settings.bold_keywords: 

289 data_model.cv.sections_input = make_given_keywords_bold_in_sections( 

290 data_model.cv.sections_input, 

291 data_model.rendercv_settings.bold_keywords, 

292 ) 

293 

294 return data_model 

295 

296 

297def read_input_file( 

298 file_path_or_contents: pathlib.Path | str, 

299) -> models.RenderCVDataModel: 

300 """Read the input file (YAML or JSON) and return them as an instance of 

301 `RenderCVDataModel`, which is a Pydantic data model of RenderCV's data format. 

302 

303 Args: 

304 file_path_or_contents: The path to the input file or the contents of the input 

305 file as a string. 

306 

307 Returns: 

308 The data model. 

309 """ 

310 input_as_dictionary = read_a_yaml_file(file_path_or_contents) 

311 

312 return validate_input_dictionary_and_return_the_data_model(input_as_dictionary)