Coverage for configzen/data.py: 42%

78 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-15 02:42 +0000

1"""Interfaces for encapsulation of configuring and using data formats.""" 

2 

3from __future__ import annotations 1abcdefgh

4 

5from abc import ABCMeta, abstractmethod 1abcdefgh

6from collections.abc import Callable, Mapping, MutableMapping, MutableSequence, Sequence 1abcdefgh

7from functools import partial 1abcdefgh

8from itertools import zip_longest 1abcdefgh

9from typing import ( 1abcdefgh

10 TYPE_CHECKING, 

11 Any, 

12 AnyStr, 

13 Generic, 

14 Literal, 

15 TypedDict, 

16 TypeVar, 

17 cast, 

18) 

19 

20from runtime_generics import runtime_generic, type_check 1abcdefgh

21 

22if TYPE_CHECKING: 1abcdefgh

23 from typing import IO, ClassVar, overload 

24 

25 from typing_extensions import TypeAlias, Unpack 

26 

27 from configzen.sources import ConfigSource 

28 

29 Data: TypeAlias = MutableMapping[str, object] 

30 

31 

32__all__ = ( 1abcdefgh

33 "DataFormat", 

34 "DataFormatOptions", 

35 "DataFormatOptionsType", 

36 "BinaryDataFormat", 

37 "TextDataFormat", 

38) 

39 

40 

41class DataFormatOptions(TypedDict, total=False): 1abcdefgh

42 """Base class for indicating possible options to configure a data format.""" 

43 

44 

45DataFormatOptionsType = TypeVar("DataFormatOptionsType", bound=DataFormatOptions) 1abcdefgh

46 

47 

48@runtime_generic 1abcdefgh

49class DataFormat(Generic[DataFormatOptionsType, AnyStr], metaclass=ABCMeta): 1abcdefgh

50 """ 

51 Core interface for configuring and using any data format through within configzen. 

52 

53 Do not use this class directly. 

54 If you need to implement your own data format, implement a subclass of: 

55 - BinaryDataFormat, if it is a bitwise data format, or 

56 - TextDataFormat, if it is a text data format. 

57 """ 

58 

59 extension_registry: ClassVar[dict[str, type[DataFormat[Any, Any]]]] = {} 1abcdefgh

60 

61 default_extension: ClassVar[str] 1abcdefgh

62 file_extensions: ClassVar[set[str]] 1abcdefgh

63 option_name: ClassVar[str] 1abcdefgh

64 

65 def __init__(self, options: DataFormatOptionsType | None = None) -> None: 1abcdefgh

66 self.configure(**(options or {})) 

67 

68 @classmethod 1abcdefgh

69 def for_extension( 1abcdefg

70 cls, 

71 extension_name: str, 

72 options: DataFormatOptionsType | None = None, 

73 ) -> DataFormat[Any, Any]: 

74 """Create a data format instance for an extension.""" 

75 return cls.extension_registry[extension_name](options) 

76 

77 if TYPE_CHECKING: 1abcdefgh

78 

79 @overload 

80 def is_binary( 

81 self: DataFormat[DataFormatOptionsType, bytes], 

82 ) -> Literal[True]: ... 

83 

84 @overload 

85 def is_binary( 

86 self: DataFormat[DataFormatOptionsType, str], 

87 ) -> Literal[False]: ... 

88 

89 def is_binary(self) -> bool: 1abcdefgh

90 """Return whether the data format is bitwise.""" 

91 return type_check(self, DataFormat[Any, bytes]) 

92 

93 # Unpack[DataFormatOptionsType] cannot be used here, 

94 # because this functionality is not supported by mypy yet. 

95 # Override the **options annotation in your subclass of DataFormat with 

96 # the subclass of DataFormatOptions corresponding to your subclass of DataFormat. 

97 def configure(self, **options: Unpack[DataFormatOptions]) -> None: 1abcdefgh

98 """ 

99 Configure the data format. 

100 

101 Every data format provides its own options, related to comments, indentation, 

102 and other format-specific features. 

103 """ 

104 

105 @abstractmethod 1abcdefgh

106 def load(self, stream: IO[AnyStr]) -> Data: 1abcdefgh

107 """ 

108 Load the data from a stream. 

109 

110 Return a mutable mapping representing the loaded data 

111 which is mutation-sensitive (for round-trip processing). 

112 

113 Every configuration source transforms the input data into a stream 

114 to be processed by the data format, because most data format libraries 

115 operate on streams. 

116 

117 This method is called by the config instance. 

118 """ 

119 

120 @abstractmethod 1abcdefgh

121 def dump(self, data: Data, stream: IO[AnyStr]) -> None: 1abcdefgh

122 """ 

123 Load the data from a stream. 

124 

125 Every configuration source transforms the input data into a stream 

126 to be processed by the data format, because most libraries implementing 

127 data formats operate on streams. 

128 

129 This method is called by the config instance. 

130 """ 

131 

132 @classmethod 1abcdefgh

133 def register_file_extensions(cls) -> None: 1abcdefgh

134 """Register the file extensions supported by this data format.""" 

135 cls.extension_registry.update(dict.fromkeys(cls.file_extensions, cls)) 1abcdefgh

136 

137 def validate_source(self, source: ConfigSource[Any, AnyStr]) -> None: 1abcdefgh

138 """Validate the config source.""" 

139 if self.is_binary() and not source.is_binary(): 

140 msg = ( 

141 f"{source} is not a binary source, " 

142 f"but {self.__class__.__name__} is a binary data format" 

143 ) 

144 raise TypeError(msg) 

145 

146 def roundtrip_update_mapping( 1abcdefgh

147 self, 

148 roundtrip_data: Data, 

149 mergeable_data: MutableMapping[str, object], 

150 ) -> None: 

151 """ 

152 Update the loaded data in a round-trip manner. 

153 

154 Use values from the configuration altered programmatically in runtime, 

155 while keeping the structure and comments of the original data. 

156 

157 Parameters 

158 ---------- 

159 roundtrip_data 

160 The data to be updated. Stores the original structure, comments etc. 

161 mergeable_data 

162 The new values to be merged into the loaded data. 

163 

164 """ 

165 return roundtrip_update_mapping( 

166 roundtrip_data, 

167 mergeable_data, 

168 _recursive_update_mapping=self.roundtrip_update_mapping, 

169 _recursive_update_sequence=self.roundtrip_update_sequence, 

170 ) 

171 

172 def roundtrip_update_sequence( 1abcdefgh

173 self, 

174 roundtrip_data: MutableSequence[object], 

175 mergeable_data: Sequence[object], 

176 ) -> None: 

177 """Merge new data sequence without losing comments.""" 

178 return roundtrip_update_sequence( 

179 roundtrip_data, 

180 mergeable_data, 

181 _recursive_update_mapping=self.roundtrip_update_mapping, 

182 _recursive_update_sequence=self.roundtrip_update_sequence, 

183 ) 

184 

185 def __init_subclass__(cls, *, skip_hook: bool = False) -> None: 1abcdefgh

186 """Subclass hook. Pass skip_hook=True to skip it.""" 

187 if not skip_hook: 187 ↛ exitline 187 didn't return from function '__init_subclass__', because the condition on line 187 was always true1abcdefgh

188 if getattr(cls, "option_name", None) is None: 188 ↛ 189line 188 didn't jump to line 1891abcdefgh

189 msg = ( 

190 f"{cls.__name__} must have an option_name attribute " 

191 "if it is not a class with skip_hook=True parameter" 

192 ) 

193 raise TypeError(msg) 

194 if getattr(cls, "file_extensions", None) is None: 1abcdefgh

195 cls.file_extensions = set() 1abcdefgh

196 cls.file_extensions.add(cls.default_extension) 1abcdefgh

197 cls.register_file_extensions() 1abcdefgh

198 

199 

200BinaryDataFormat = DataFormat[DataFormatOptionsType, bytes] 1abcdefgh

201""" 

202Core interface for configuring and using binary data formats through 

203within configzen. 

204 

205Do not use this class directly. 

206If you need to implement your own binary data format, 

207implement a subclass of this class. 

208Remember to ensure that your subclass is executed, so that it gets registered 

209to the registry of data formats. 

210""" 

211 

212 

213TextDataFormat = DataFormat[DataFormatOptionsType, str] 1abcdefgh

214""" 

215Core interface for configuring and using text data formats through 

216within configzen. 

217 

218Do not use this class directly. 

219If you need to implement your own text data format, 

220implement a subclass of this class. 

221Remember to ensure that your subclass is executed, so that it gets registered 

222to the registry of data formats. 

223""" 

224 

225 

226def roundtrip_update_mapping( 1abcdefgh

227 roundtrip_data: Data, 

228 mergeable_data: MutableMapping[str, object], 

229 *, 

230 _recursive_update_mapping: Callable[[Data, MutableMapping[str, object]], None] 

231 | None = None, 

232 _recursive_update_sequence: Callable[ 

233 [MutableSequence[object], Sequence[object]], 

234 None, 

235 ] 

236 | None = None, 

237) -> None: 

238 """Update a mapping without losing recursively attached metadata.""" 

239 if _recursive_update_mapping is None: 

240 _recursive_update_mapping = partial( 

241 roundtrip_update_mapping, 

242 _recursive_update_sequence=_recursive_update_sequence, 

243 ) 

244 if _recursive_update_sequence is None: 

245 _recursive_update_sequence = partial( 

246 roundtrip_update_sequence, 

247 _recursive_update_mapping=_recursive_update_mapping, 

248 ) 

249 for key, value in roundtrip_data.items(): 

250 if key in mergeable_data: 

251 new_value = mergeable_data.pop(key) 

252 if isinstance(value, MutableMapping): 

253 # Coerce it's a dict to ensure it has the .pop() method 

254 _recursive_update_mapping( 

255 value, 

256 dict(cast("Mapping[str, object]", new_value)), 

257 ) 

258 elif isinstance(value, MutableSequence): 

259 _recursive_update_sequence( 

260 value, 

261 cast("MutableSequence[object]", new_value), 

262 ) 

263 else: 

264 roundtrip_data[key] = new_value 

265 for key, value in mergeable_data.items(): 

266 roundtrip_data[key] = value 

267 

268 

269def roundtrip_update_sequence( 1abcdefgh

270 roundtrip_data: MutableSequence[object], 

271 mergeable_data: Sequence[object], 

272 *, 

273 _recursive_update_mapping: Callable[[Data, MutableMapping[str, object]], None] 

274 | None = None, 

275 _recursive_update_sequence: Callable[ 

276 [MutableSequence[object], Sequence[object]], 

277 None, 

278 ] 

279 | None = None, 

280) -> None: 

281 """Update a sequence without losing recursively attached metadata.""" 

282 if _recursive_update_mapping is None: 

283 _recursive_update_mapping = partial( 

284 roundtrip_update_mapping, 

285 _recursive_update_sequence=_recursive_update_sequence, 

286 ) 

287 if _recursive_update_sequence is None: 

288 _recursive_update_sequence = partial( 

289 roundtrip_update_sequence, 

290 _recursive_update_mapping=_recursive_update_mapping, 

291 ) 

292 sequence_length = len(mergeable_data) 

293 for i, (roundtrip_item, mergeable_item) in enumerate( 

294 zip_longest( 

295 roundtrip_data, 

296 mergeable_data, 

297 ), 

298 ): 

299 if i >= sequence_length: 

300 roundtrip_data[i] = roundtrip_item 

301 elif isinstance(roundtrip_item, MutableMapping): 

302 _recursive_update_mapping( 

303 roundtrip_item, 

304 dict(cast("Mapping[str, object]", mergeable_item)), 

305 ) 

306 elif isinstance(roundtrip_item, MutableSequence): 

307 _recursive_update_sequence( 

308 roundtrip_item, 

309 cast("list[object]", mergeable_item), 

310 )