Coverage for rendercv/data/reader.py: 88%
97 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-26 00:25 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-26 00:25 +0000
1"""
2The `rendercv.data.reader` module contains the functions that are used to read the input
3file (YAML or JSON) and return them as an instance of `RenderCVDataModel`, which is a
4Pydantic data model of RenderCV's data format.
5"""
7import pathlib
8import re
9from typing import Optional
11import pydantic
12import ruamel.yaml
14from . import models
15from .models import entry_types
18def make_given_keywords_bold_in_sections(
19 sections_input: models.Sections, keywords: list[str]
20) -> models.Sections:
21 """Iterate over the dictionary recursively and make the given keywords bold.
23 Args:
24 sections_input: TODO
25 keywords: The keywords to make bold.
27 Returns:
28 The dictionary with the given keywords bold.
29 """
30 if sections_input is None:
31 return None
33 for entries in sections_input.values():
34 for entry in entries:
35 if isinstance(entry, str):
36 entry_types.make_keywords_bold_in_a_string(entry, keywords)
37 elif callable(getattr(entry, "make_keywords_bold", None)):
38 entry = entry.make_keywords_bold( # NOQA: PLW2901 # type: ignore
39 keywords
40 )
42 return sections_input
45def get_error_message_and_location_and_value_from_a_custom_error(
46 error_string: str,
47) -> tuple[Optional[str], Optional[str], Optional[str]]:
48 """Look at a string and figure out if it's a custom error message that has been
49 sent from `rendercv.data.reader.read_input_file`. If it is, then return the custom
50 message, location, and the input value.
52 This is done because sometimes we raise an error about a specific field in the model
53 validation level, but Pydantic doesn't give us the exact location of the error
54 because it's a model-level error. So, we raise a custom error with three string
55 arguments: message, location, and input value. Those arguments then combined into a
56 string by Python. This function is used to parse that custom error message and
57 return the three values.
59 Args:
60 error_string: The error message.
62 Returns:
63 The custom message, location, and the input value.
64 """
65 pattern = r"""\(['"](.*)['"], '(.*)', '(.*)'\)"""
66 match = re.search(pattern, error_string)
67 if match:
68 return match.group(1), match.group(2), match.group(3)
69 return None, None, None
72def parse_validation_errors(
73 exception: pydantic.ValidationError,
74) -> list[dict[str, str]]:
75 """Take a Pydantic validation error, parse it, and return a list of error
76 dictionaries that contain the error messages, locations, and the input values.
78 Pydantic's `ValidationError` object is a complex object that contains a lot of
79 information about the error. This function takes a `ValidationError` object and
80 extracts the error messages, locations, and the input values.
82 Args:
83 exception: The Pydantic validation error object.
85 Returns:
86 A list of error dictionaries that contain the error messages, locations, and the
87 input values.
88 """
89 # This dictionary is used to convert the error messages that Pydantic returns to
90 # more user-friendly messages.
91 error_dictionary: dict[str, str] = {
92 "Input should be 'present'": (
93 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"
94 ' format or "present"!'
95 ),
96 "Input should be a valid integer, unable to parse string as an integer": (
97 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"
98 " format!"
99 ),
100 "String should match pattern '\\d{4}-\\d{2}(-\\d{2})?'": (
101 "This is not a valid date! Please use either YYYY-MM-DD, YYYY-MM, or YYYY"
102 " format!"
103 ),
104 "String should match pattern '\\b10\\..*'": (
105 'A DOI prefix should always start with "10.". For example,'
106 ' "10.1109/TASC.2023.3340648".'
107 ),
108 "URL scheme should be 'http' or 'https'": "This is not a valid URL!",
109 "Field required": "This field is required!",
110 "value is not a valid phone number": "This is not a valid phone number!",
111 "month must be in 1..12": "The month must be between 1 and 12!",
112 "day is out of range for month": "The day is out of range for the month!",
113 "Extra inputs are not permitted": (
114 "This field is unknown for this object! Please remove it."
115 ),
116 "Input should be a valid string": "This field should be a string!",
117 "Input should be a valid list": (
118 "This field should contain a list of items but it doesn't!"
119 ),
120 "value is not a valid color: string not recognised as a valid color": (
121 "This is not a valid color! Here are some examples of valid colors:"
122 ' "red", "#ff0000", "rgb(255, 0, 0)", "hsl(0, 100%, 50%)"'
123 ),
124 }
126 unwanted_texts = ["value is not a valid email address: ", "Value error, "]
128 # Check if this is a section error. If it is, we need to handle it differently.
129 # This is needed because how dm.validate_section_input function raises an exception.
130 # This is done to tell the user which which EntryType RenderCV excepts to see.
131 errors = exception.errors()
132 for error_object in errors.copy():
133 if (
134 "There are problems with the entries." in error_object["msg"]
135 and "ctx" in error_object
136 ):
137 location = error_object["loc"]
138 ctx_object = error_object["ctx"]
139 if "error" in ctx_object:
140 inner_error_object = ctx_object["error"]
141 if hasattr(inner_error_object, "__cause__"):
142 cause_object = inner_error_object.__cause__
143 cause_object_errors = cause_object.errors()
144 for cause_error_object in cause_object_errors:
145 # we use [1:] to avoid `entries` location. It is a location for
146 # RenderCV's own data model, not the user's data model.
147 cause_error_object["loc"] = tuple(
148 list(location) + list(cause_error_object["loc"][1:])
149 )
150 errors.extend(cause_object_errors)
152 # some locations are not really the locations in the input file, but some
153 # information about the model coming from Pydantic. We need to remove them.
154 # (e.g. avoid stuff like .end_date.literal['present'])
155 unwanted_locations = ["tagged-union", "list", "literal", "int", "constrained-str"]
156 for error_object in errors:
157 location = [str(location_element) for location_element in error_object["loc"]]
158 new_location = [str(location_element) for location_element in location]
159 for location_element in location:
160 for unwanted_location in unwanted_locations:
161 if unwanted_location in location_element:
162 new_location.remove(location_element)
163 error_object["loc"] = new_location # type: ignore
165 # Parse all the errors and create a new list of errors.
166 new_errors: list[dict[str, str]] = []
167 for error_object in errors:
168 message = error_object["msg"]
169 location = ".".join(error_object["loc"]) # type: ignore
170 input = error_object["input"]
172 # Check if this is a custom error message:
173 custom_message, custom_location, custom_input_value = (
174 get_error_message_and_location_and_value_from_a_custom_error(message)
175 )
176 if custom_message is not None:
177 message = custom_message
178 if custom_location:
179 # If the custom location is not empty, then add it to the location.
180 location = f"{location}.{custom_location}"
181 input = custom_input_value
183 # Don't show unwanted texts in the error message:
184 for unwanted_text in unwanted_texts:
185 message = message.replace(unwanted_text, "")
187 # Convert the error message to a more user-friendly message if it's in the
188 # error_dictionary:
189 if message in error_dictionary:
190 message = error_dictionary[message]
192 # Special case for end_date because Pydantic returns multiple end_date errors
193 # since it has multiple valid formats:
194 if "end_date" in location:
195 message = (
196 "This is not a valid end date! Please use either YYYY-MM-DD, YYYY-MM,"
197 ' or YYYY format or "present"!'
198 )
200 # If the input is a dictionary or a list (the model itself fails to validate),
201 # then don't show the input. It looks confusing and it is not helpful.
202 if isinstance(input, dict | list):
203 input = ""
205 new_error = {
206 "loc": tuple(location.split(".")),
207 "msg": message,
208 "input": str(input),
209 }
211 # if new_error is not in new_errors, then add it to new_errors
212 if new_error not in new_errors:
213 new_errors.append(new_error)
215 return new_errors
218def read_a_yaml_file(file_path_or_contents: pathlib.Path | str) -> dict:
219 """Read a YAML file and return its content as a dictionary. The YAML file can be
220 given as a path to the file or as the contents of the file as a string.
222 Args:
223 file_path_or_contents: The path to the YAML file or the contents of the YAML
224 file as a string.
226 Returns:
227 The content of the YAML file as a dictionary.
228 """
230 if isinstance(file_path_or_contents, pathlib.Path):
231 # Check if the file exists:
232 if not file_path_or_contents.exists():
233 message = f"The input file {file_path_or_contents} doesn't exist!"
234 raise FileNotFoundError(message)
236 # Check the file extension:
237 accepted_extensions = [".yaml", ".yml", ".json", ".json5"]
238 if file_path_or_contents.suffix not in accepted_extensions:
239 user_friendly_accepted_extensions = [
240 f"[green]{ext}[/green]" for ext in accepted_extensions
241 ]
242 user_friendly_accepted_extensions = ", ".join(
243 user_friendly_accepted_extensions
244 )
245 message = (
246 "The input file should have one of the following extensions:"
247 f" {user_friendly_accepted_extensions}. The input file is"
248 f" {file_path_or_contents}."
249 )
250 raise ValueError(message)
252 file_content = file_path_or_contents.read_text(encoding="utf-8")
253 else:
254 file_content = file_path_or_contents
256 yaml_as_a_dictionary: dict = ruamel.yaml.YAML().load(file_content)
258 if yaml_as_a_dictionary is None:
259 message = "The input file is empty!"
260 raise ValueError(message)
262 return yaml_as_a_dictionary
265def validate_input_dictionary_and_return_the_data_model(
266 input_dictionary: dict,
267 context: Optional[dict] = None,
268) -> models.RenderCVDataModel:
269 """Validate the input dictionary by creating an instance of `RenderCVDataModel`,
270 which is a Pydantic data model of RenderCV's data format.
272 Args:
273 input_dictionary: The input dictionary.
274 context: The context dictionary that is used to validate the input dictionary.
275 It's used to send the input file path with the context object, but it's not
276 required.
278 Returns:
279 The data model.
280 """
281 # Validate the parsed dictionary by creating an instance of RenderCVDataModel:
282 data_model = models.RenderCVDataModel.model_validate(
283 input_dictionary, context=context
284 )
286 # If the `bold_keywords` field is provided in the `rendercv_settings`, make the
287 # given keywords bold in the `cv.sections` field:
288 if data_model.rendercv_settings and data_model.rendercv_settings.bold_keywords:
289 data_model.cv.sections_input = make_given_keywords_bold_in_sections(
290 data_model.cv.sections_input,
291 data_model.rendercv_settings.bold_keywords,
292 )
294 return data_model
297def read_input_file(
298 file_path_or_contents: pathlib.Path | str,
299) -> models.RenderCVDataModel:
300 """Read the input file (YAML or JSON) and return them as an instance of
301 `RenderCVDataModel`, which is a Pydantic data model of RenderCV's data format.
303 Args:
304 file_path_or_contents: The path to the input file or the contents of the input
305 file as a string.
307 Returns:
308 The data model.
309 """
310 input_as_dictionary = read_a_yaml_file(file_path_or_contents)
312 return validate_input_dictionary_and_return_the_data_model(input_as_dictionary)