Coverage for pydantic_graph/pydantic_graph/persistence/file.py: 99.15%

1from __future__ import annotations as _annotations (empty)

3import asyncio (empty)

4import secrets (empty)

5from collections.abc import AsyncIterator (empty)

6from contextlib import AsyncExitStack, asynccontextmanager (empty)

7from dataclasses import dataclass, field (empty)

8from pathlib import Path (empty)

9from time import perf_counter (empty)

10from typing import Any (empty)

12import pydantic (empty)

14from .. import _utils as _graph_utils, exceptions (empty)

15from ..nodes import BaseNode, End (empty)

16from . import ( (empty)

17 BaseStatePersistence,

18 EndSnapshot,

19 NodeSnapshot,

20 RunEndT,

21 Snapshot,

22 SnapshotStatus,

23 StateT,

24 _utils,

25 build_snapshot_list_type_adapter,

26)

29@dataclass (empty)

30class FileStatePersistence(BaseStatePersistence[StateT, RunEndT]): (empty)

31 """File based state persistence that hold graph run state in a JSON file."""

33 json_file: Path (empty)

34 """Path to the JSON file where the snapshots are stored. (empty)

36 You should use a different file for each graph run, but a single file should be reused for multiple

37 steps of the same run.

39 For example if you have a run ID of the form `run_123abc`, you might create a `FileStatePersistence` thus:

41 ```py

42 from pathlib import Path

44 from pydantic_graph import FullStatePersistence

46 run_id = 'run_123abc'

47 persistence = FullStatePersistence(Path('runs') / f'{run_id}.json')

48 ```

49 """

50 _snapshots_type_adapter: pydantic.TypeAdapter[list[Snapshot[StateT, RunEndT]]] | None = field( (empty)

51 default=None, init=False, repr=False

52 )

54 async def snapshot_node(self, state: StateT, next_node: BaseNode[StateT, Any, RunEndT]) -> None: (empty)

55 await self._append_save(NodeSnapshot(state=state, node=next_node)) (empty)

57 async def snapshot_node_if_new( (empty)

58 self, snapshot_id: str, state: StateT, next_node: BaseNode[StateT, Any, RunEndT]

59 ) -> None:

60 async with self._lock(): (empty)

61 snapshots = await self.load_all() (empty)

62 if not any(s.id == snapshot_id for s in snapshots): 62 ↛ exitline 62 didn't jump to the function exit(empty)

63 await self._append_save(NodeSnapshot(state=state, node=next_node), lock=False) (empty)

65 async def snapshot_end(self, state: StateT, end: End[RunEndT]) -> None: (empty)

66 await self._append_save(EndSnapshot(state=state, result=end)) (empty)

68 @asynccontextmanager (empty)

69 async def record_run(self, snapshot_id: str) -> AsyncIterator[None]: (empty)

70 async with self._lock(): (empty)

71 snapshots = await self.load_all() (empty)

72 try: (empty)

73 snapshot = next(s for s in snapshots if s.id == snapshot_id) (empty)

74 except StopIteration as e: (empty)

75 raise LookupError(f'No snapshot found with id={snapshot_id!r}') from e (empty)

77 assert isinstance(snapshot, NodeSnapshot), 'Only NodeSnapshot can be recorded' (empty)

78 exceptions.GraphNodeStatusError.check(snapshot.status) (empty)

79 snapshot.status = 'running' (empty)

80 snapshot.start_ts = _utils.now_utc() (empty)

81 await self._save(snapshots) (empty)

83 start = perf_counter() (empty)

84 try: (empty)

85 yield (empty)

86 except Exception: (empty)

87 duration = perf_counter() - start (empty)

88 async with self._lock(): (empty)

89 await _graph_utils.run_in_executor(self._after_run_sync, snapshot_id, duration, 'error') (empty)

90 raise (empty)

91 else:

92 snapshot.duration = perf_counter() - start (empty)

93 async with self._lock(): (empty)

94 await _graph_utils.run_in_executor(self._after_run_sync, snapshot_id, snapshot.duration, 'success') (empty)

96 async def load_next(self) -> NodeSnapshot[StateT, RunEndT] | None: (empty)

97 async with self._lock(): (empty)

98 snapshots = await self.load_all() (empty)

99 if snapshot := next((s for s in snapshots if isinstance(s, NodeSnapshot) and s.status == 'created'), None): (empty)

100 snapshot.status = 'pending' (empty)

101 await self._save(snapshots) (empty)

102 return snapshot (empty)

103

104 def should_set_types(self) -> bool: (empty)

105 """Whether types need to be set."""

106 return self._snapshots_type_adapter is None (empty)

107

108 def set_types(self, state_type: type[StateT], run_end_type: type[RunEndT]) -> None: (empty)

109 self._snapshots_type_adapter = build_snapshot_list_type_adapter(state_type, run_end_type) (empty)

110

111 async def load_all(self) -> list[Snapshot[StateT, RunEndT]]: (empty)

112 return await _graph_utils.run_in_executor(self._load_sync) (empty)

113

114 def _load_sync(self) -> list[Snapshot[StateT, RunEndT]]: (empty)

115 assert self._snapshots_type_adapter is not None, 'snapshots type adapter must be set' (empty)

116 try: (empty)

117 content = self.json_file.read_bytes() (empty)

118 except FileNotFoundError: (empty)

119 return [] (empty)

120 else:

121 return self._snapshots_type_adapter.validate_json(content) (empty)

122

123 def _after_run_sync(self, snapshot_id: str, duration: float, status: SnapshotStatus) -> None: (empty)

124 snapshots = self._load_sync() (empty)

125 snapshot = next(s for s in snapshots if s.id == snapshot_id) (empty)

126 assert isinstance(snapshot, NodeSnapshot), 'Only NodeSnapshot can be recorded' (empty)

127 snapshot.duration = duration (empty)

128 snapshot.status = status (empty)

129 self._save_sync(snapshots) (empty)

130

131 async def _save(self, snapshots: list[Snapshot[StateT, RunEndT]]) -> None: (empty)

132 await _graph_utils.run_in_executor(self._save_sync, snapshots) (empty)

133

134 def _save_sync(self, snapshots: list[Snapshot[StateT, RunEndT]]) -> None: (empty)

135 assert self._snapshots_type_adapter is not None, 'snapshots type adapter must be set' (empty)

136 self.json_file.write_bytes(self._snapshots_type_adapter.dump_json(snapshots, indent=2)) (empty)

137

138 async def _append_save(self, snapshot: Snapshot[StateT, RunEndT], *, lock: bool = True) -> None: (empty)

139 assert self._snapshots_type_adapter is not None, 'snapshots type adapter must be set' (empty)

140 async with AsyncExitStack() as stack: (empty)

141 if lock: (empty)

142 await stack.enter_async_context(self._lock()) (empty)

143 snapshots = await self.load_all() (empty)

144 snapshots.append(snapshot) (empty)

145 await self._save(snapshots) (empty)

146

147 @asynccontextmanager (empty)

148 async def _lock(self, *, timeout: float = 1.0) -> AsyncIterator[None]: (empty)

149 """Lock a file by checking and writing a `.pydantic-graph-persistence-lock` to it.

150

151 Args:

152 timeout: how long to wait for the lock

153

154 Returns: an async context manager that holds the lock

155 """

156 lock_file = self.json_file.parent / f'{self.json_file.name}.pydantic-graph-persistence-lock' (empty)

157 lock_id = secrets.token_urlsafe().encode() (empty)

158 await asyncio.wait_for(_get_lock(lock_file, lock_id), timeout=timeout) (empty)

159 try: (empty)

160 yield (empty)

161 finally:

162 await _graph_utils.run_in_executor(lock_file.unlink, missing_ok=True) (empty)

163

164

165async def _get_lock(lock_file: Path, lock_id: bytes): (empty)

166 # TODO replace with inline code and `asyncio.timeout` when we drop 3.9

167 while not await _graph_utils.run_in_executor(_file_append_check, lock_file, lock_id): (empty)

168 await asyncio.sleep(0.01) (empty)

169

170

171def _file_append_check(file: Path, content: bytes) -> bool: (empty)

172 if file.exists(): (empty)

173 return False (empty)

174

175 with file.open(mode='ab') as f: (empty)

176 f.write(content + b'\n') (empty)

177

178 return file.read_bytes().startswith(content) (empty)