Coverage for rendercv/data/reader.py: 88%

1"""

2The `rendercv.data.reader` module contains the functions that are used to read the input

3file (YAML or JSON) and return them as an instance of `RenderCVDataModel`, which is a

4Pydantic data model of RenderCV's data format.

5"""

7import pathlib (empty)

8import re (empty)

9from typing import Optional (empty)

11import pydantic (empty)

12import ruamel.yaml (empty)

14from . import models (empty)

15from .models import entry_types (empty)

18def make_given_keywords_bold_in_sections( (empty)

19 sections_input: models.Sections, keywords: list[str]

20) -> models.Sections:

21 """Iterate over the dictionary recursively and make the given keywords bold.

23 Args:

24 sections_input: TODO

25 keywords: The keywords to make bold.

27 Returns:

28 The dictionary with the given keywords bold.

29 """

30 if sections_input is None:

31 return None

33 for entries in sections_input.values():

34 for entry in entries:

35 if isinstance(entry, str):

36 entry_types.make_keywords_bold_in_a_string(entry, keywords)

37 elif callable(getattr(entry, "make_keywords_bold", None)):

38 entry = entry.make_keywords_bold( # NOQA: PLW2901 # type: ignore

39 keywords

40 )

42 return sections_input

45def get_error_message_and_location_and_value_from_a_custom_error( (empty)

46 error_string: str,

47) -> tuple[Optional[str], Optional[str], Optional[str]]:

48 """Look at a string and figure out if it's a custom error message that has been

49 sent from `rendercv.data.reader.read_input_file`. If it is, then return the custom

50 message, location, and the input value.

52 This is done because sometimes we raise an error about a specific field in the model

53 validation level, but Pydantic doesn't give us the exact location of the error

54 because it's a model-level error. So, we raise a custom error with three string

55 arguments: message, location, and input value. Those arguments then combined into a

56 string by Python. This function is used to parse that custom error message and

57 return the three values.

59 Args:

60 error_string: The error message.

62 Returns:

63 The custom message, location, and the input value.

64 """

65 pattern = r"""\(['"](.*)['"], '(.*)', '(.*)'\)""" (empty)

66 match = re.search(pattern, error_string) (empty)

67 if match: (empty)

68 return match.group(1), match.group(2), match.group(3) (empty)

69 return None, None, None (empty)

72def parse_validation_errors( (empty)

73 exception: pydantic.ValidationError,

74) -> list[dict[str, str]]:

75 """Take a Pydantic validation error, parse it, and return a list of error

76 dictionaries that contain the error messages, locations, and the input values.

78 Pydantic's `ValidationError` object is a complex object that contains a lot of

79 information about the error. This function takes a `ValidationError` object and

80 extracts the error messages, locations, and the input values.

82 Args:

83 exception: The Pydantic validation error object.

85 Returns:

86 A list of error dictionaries that contain the error messages, locations, and the

87 input values.

88 """

89 # This dictionary is used to convert the error messages that Pydantic returns to

90 # more user-friendly messages.

91 error_dictionary: dict[str, str] = { (empty)

92 "Input should be 'present'": (

93 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"

94 ' format or "present"!'

95 ),

96 "Input should be a valid integer, unable to parse string as an integer": (

97 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"

98 " format!"

99 ),

100 "String should match pattern '\\d{4}-\\d{2}(-\\d{2})?'": (

101 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"

102 " format!"

103 ),

104 "String should match pattern '\\b10\\..*'": (

105 'A DOI prefix should always start with "10.". For example,'

106 ' "10.1109/TASC.2023.3340648".'

107 ),

108 "URL scheme should be 'http' or 'https'": "This is not a valid URL!",

109 "Field required": "This field is required!",

110 "value is not a valid phone number": "This is not a valid phone number!",

111 "month must be in 1..12": "The month must be between 1 and 12!",

112 "day is out of range for month": "The day is out of range for the month!",

113 "Extra inputs are not permitted": (

114 "This field is unknown for this object! Please remove it."

115 ),

116 "Input should be a valid string": "This field should be a string!",

117 "Input should be a valid list": (

118 "This field should contain a list of items but it doesn't!"

119 ),

120 "value is not a valid color: string not recognised as a valid color": (

121 "This is not a valid color! Here are some examples of valid colors:"

122 ' "red", "#ff0000", "rgb(255, 0, 0)", "hsl(0, 100%, 50%)"'

123 ),

124 }

125

126 unwanted_texts = ["value is not a valid email address: ", "Value error, "] (empty)

127

128 # Check if this is a section error. If it is, we need to handle it differently.

129 # This is needed because how dm.validate_section_input function raises an exception.

130 # This is done to tell the user which which EntryType RenderCV excepts to see.

131 errors = exception.errors() (empty)

132 for error_object in errors.copy(): (empty)

133 if (

134 "There are problems with the entries." in error_object["msg"]

135 and "ctx" in error_object

136 ):

137 location = error_object["loc"] (empty)

138 ctx_object = error_object["ctx"] (empty)

139 if "error" in ctx_object: (empty)

140 inner_error_object = ctx_object["error"] (empty)

141 if hasattr(inner_error_object, "__cause__"): (empty)

142 cause_object = inner_error_object.__cause__ (empty)

143 cause_object_errors = cause_object.errors() (empty)

144 for cause_error_object in cause_object_errors: (empty)

145 # we use [1:] to avoid `entries` location. It is a location for

146 # RenderCV's own data model, not the user's data model.

147 cause_error_object["loc"] = tuple( (empty)

148 list(location) + list(cause_error_object["loc"][1:])

149 )

150 errors.extend(cause_object_errors) (empty)

151

152 # some locations are not really the locations in the input file, but some

153 # information about the model coming from Pydantic. We need to remove them.

154 # (e.g. avoid stuff like .end_date.literal['present'])

155 unwanted_locations = ["tagged-union", "list", "literal", "int", "constrained-str"] (empty)

156 for error_object in errors: (empty)

157 location = [str(location_element) for location_element in error_object["loc"]] (empty)

158 new_location = [str(location_element) for location_element in location] (empty)

159 for location_element in location: (empty)

160 for unwanted_location in unwanted_locations: (empty)

161 if unwanted_location in location_element: (empty)

162 new_location.remove(location_element)

163 error_object["loc"] = new_location # type: ignore (empty)

164

165 # Parse all the errors and create a new list of errors.

166 new_errors: list[dict[str, str]] = [] (empty)

167 for error_object in errors: (empty)

168 message = error_object["msg"] (empty)

169 location = ".".join(error_object["loc"]) # type: ignore (empty)

170 input = error_object["input"] (empty)

171

172 # Check if this is a custom error message:

173 custom_message, custom_location, custom_input_value = ( (empty)

174 get_error_message_and_location_and_value_from_a_custom_error(message)

175 )

176 if custom_message is not None: (empty)

177 message = custom_message (empty)

178 if custom_location: (empty)

179 # If the custom location is not empty, then add it to the location.

180 location = f"{location}.{custom_location}"

181 input = custom_input_value (empty)

182

183 # Don't show unwanted texts in the error message:

184 for unwanted_text in unwanted_texts: (empty)

185 message = message.replace(unwanted_text, "") (empty)

186

187 # Convert the error message to a more user-friendly message if it's in the

188 # error_dictionary:

189 if message in error_dictionary: (empty)

190 message = error_dictionary[message] (empty)

191

192 # Special case for end_date because Pydantic returns multiple end_date errors

193 # since it has multiple valid formats:

194 if "end_date" in location: (empty)

195 message = ( (empty)

196 "This is not a valid end date! Please use either YYYY-MM-DD, YYYY-MM,"

197 ' or YYYY format or "present"!'

198 )

199

200 # If the input is a dictionary or a list (the model itself fails to validate),

201 # then don't show the input. It looks confusing and it is not helpful.

202 if isinstance(input, dict | list): (empty)

203 input = "" (empty)

204

205 new_error = { (empty)

206 "loc": tuple(location.split(".")),

207 "msg": message,

208 "input": str(input),

209 }

210

211 # if new_error is not in new_errors, then add it to new_errors

212 if new_error not in new_errors: (empty)

213 new_errors.append(new_error) (empty)

214

215 return new_errors (empty)

216

217

218def read_a_yaml_file(file_path_or_contents: pathlib.Path | str) -> dict: (empty)

219 """Read a YAML file and return its content as a dictionary. The YAML file can be

220 given as a path to the file or as the contents of the file as a string.

221

222 Args:

223 file_path_or_contents: The path to the YAML file or the contents of the YAML

224 file as a string.

225

226 Returns:

227 The content of the YAML file as a dictionary.

228 """

229

230 if isinstance(file_path_or_contents, pathlib.Path): (empty)

231 # Check if the file exists:

232 if not file_path_or_contents.exists(): (empty)

233 message = f"The input file {file_path_or_contents} doesn't exist!" (empty)

234 raise FileNotFoundError(message) (empty)

235

236 # Check the file extension:

237 accepted_extensions = [".yaml", ".yml", ".json", ".json5"] (empty)

238 if file_path_or_contents.suffix not in accepted_extensions: (empty)

239 user_friendly_accepted_extensions = [ (empty)

240 f"[green]{ext}[/green]" for ext in accepted_extensions

241 ]

242 user_friendly_accepted_extensions = ", ".join( (empty)

243 user_friendly_accepted_extensions

244 )

245 message = ( (empty)

246 "The input file should have one of the following extensions:"

247 f" {user_friendly_accepted_extensions}. The input file is"

248 f" {file_path_or_contents}."

249 )

250 raise ValueError(message) (empty)

251

252 file_content = file_path_or_contents.read_text(encoding="utf-8") (empty)

253 else:

254 file_content = file_path_or_contents (empty)

255

256 yaml_as_a_dictionary: dict = ruamel.yaml.YAML().load(file_content) (empty)

257

258 if yaml_as_a_dictionary is None: (empty)

259 message = "The input file is empty!" (empty)

260 raise ValueError(message) (empty)

261

262 return yaml_as_a_dictionary (empty)

263

264

265def validate_input_dictionary_and_return_the_data_model( (empty)

266 input_dictionary: dict,

267 context: Optional[dict] = None,

268) -> models.RenderCVDataModel:

269 """Validate the input dictionary by creating an instance of `RenderCVDataModel`,

270 which is a Pydantic data model of RenderCV's data format.

271

272 Args:

273 input_dictionary: The input dictionary.

274 context: The context dictionary that is used to validate the input dictionary.

275 It's used to send the input file path with the context object, but it's not

276 required.

277

278 Returns:

279 The data model.

280 """

281 # Validate the parsed dictionary by creating an instance of RenderCVDataModel:

282 data_model = models.RenderCVDataModel.model_validate( (empty)

283 input_dictionary, context=context

284 )

285

286 # If the `bold_keywords` field is provided in the `rendercv_settings`, make the

287 # given keywords bold in the `cv.sections` field:

288 if data_model.rendercv_settings and data_model.rendercv_settings.bold_keywords: (empty)

289 data_model.cv.sections_input = make_given_keywords_bold_in_sections(

290 data_model.cv.sections_input,

291 data_model.rendercv_settings.bold_keywords,

292 )

293

294 return data_model (empty)

295

296

297def read_input_file( (empty)

298 file_path_or_contents: pathlib.Path | str,

299) -> models.RenderCVDataModel:

300 """Read the input file (YAML or JSON) and return them as an instance of

301 `RenderCVDataModel`, which is a Pydantic data model of RenderCV's data format.

302

303 Args:

304 file_path_or_contents: The path to the input file or the contents of the input

305 file as a string.

306

307 Returns:

308 The data model.

309 """

310 input_as_dictionary = read_a_yaml_file(file_path_or_contents) (empty)

311

312 return validate_input_dictionary_and_return_the_data_model(input_as_dictionary) (empty)