Coverage for pydantic_ai_slim/pydantic_ai/models/__init__.py: 97.20%

111 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-28 17:27 +0000

1"""Logic related to making requests to an LLM. 

2 

3The aim here is to make a common interface for different LLMs, so that the rest of the code can be agnostic to the 

4specific LLM being used. 

5""" 

6 

7from __future__ import annotations as _annotations 

8 

9from abc import ABC, abstractmethod 

10from collections.abc import AsyncIterator, Iterator 

11from contextlib import asynccontextmanager, contextmanager 

12from dataclasses import dataclass, field 

13from datetime import datetime 

14from functools import cache 

15from typing import TYPE_CHECKING, cast 

16 

17import httpx 

18from typing_extensions import Literal, TypeAliasType 

19 

20from .._parts_manager import ModelResponsePartsManager 

21from ..exceptions import UserError 

22from ..messages import ModelMessage, ModelResponse, ModelResponseStreamEvent 

23from ..settings import ModelSettings 

24from ..usage import Usage 

25 

26if TYPE_CHECKING: 

27 from ..tools import ToolDefinition 

28 

29 

30KnownModelName = TypeAliasType( 

31 'KnownModelName', 

32 Literal[ 

33 'anthropic:claude-3-7-sonnet-latest', 

34 'anthropic:claude-3-5-haiku-latest', 

35 'anthropic:claude-3-5-sonnet-latest', 

36 'anthropic:claude-3-opus-latest', 

37 'claude-3-7-sonnet-latest', 

38 'claude-3-5-haiku-latest', 

39 'bedrock:amazon.titan-tg1-large', 

40 'bedrock:amazon.titan-text-lite-v1', 

41 'bedrock:amazon.titan-text-express-v1', 

42 'bedrock:us.amazon.nova-pro-v1:0', 

43 'bedrock:us.amazon.nova-lite-v1:0', 

44 'bedrock:us.amazon.nova-micro-v1:0', 

45 'bedrock:anthropic.claude-3-5-sonnet-20241022-v2:0', 

46 'bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0', 

47 'bedrock:anthropic.claude-3-5-haiku-20241022-v1:0', 

48 'bedrock:us.anthropic.claude-3-5-haiku-20241022-v1:0', 

49 'bedrock:anthropic.claude-instant-v1', 

50 'bedrock:anthropic.claude-v2:1', 

51 'bedrock:anthropic.claude-v2', 

52 'bedrock:anthropic.claude-3-sonnet-20240229-v1:0', 

53 'bedrock:us.anthropic.claude-3-sonnet-20240229-v1:0', 

54 'bedrock:anthropic.claude-3-haiku-20240307-v1:0', 

55 'bedrock:us.anthropic.claude-3-haiku-20240307-v1:0', 

56 'bedrock:anthropic.claude-3-opus-20240229-v1:0', 

57 'bedrock:us.anthropic.claude-3-opus-20240229-v1:0', 

58 'bedrock:anthropic.claude-3-5-sonnet-20240620-v1:0', 

59 'bedrock:us.anthropic.claude-3-5-sonnet-20240620-v1:0', 

60 'bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0', 

61 'bedrock:us.anthropic.claude-3-7-sonnet-20250219-v1:0', 

62 'bedrock:cohere.command-text-v14', 

63 'bedrock:cohere.command-r-v1:0', 

64 'bedrock:cohere.command-r-plus-v1:0', 

65 'bedrock:cohere.command-light-text-v14', 

66 'bedrock:meta.llama3-8b-instruct-v1:0', 

67 'bedrock:meta.llama3-70b-instruct-v1:0', 

68 'bedrock:meta.llama3-1-8b-instruct-v1:0', 

69 'bedrock:us.meta.llama3-1-8b-instruct-v1:0', 

70 'bedrock:meta.llama3-1-70b-instruct-v1:0', 

71 'bedrock:us.meta.llama3-1-70b-instruct-v1:0', 

72 'bedrock:meta.llama3-1-405b-instruct-v1:0', 

73 'bedrock:us.meta.llama3-2-11b-instruct-v1:0', 

74 'bedrock:us.meta.llama3-2-90b-instruct-v1:0', 

75 'bedrock:us.meta.llama3-2-1b-instruct-v1:0', 

76 'bedrock:us.meta.llama3-2-3b-instruct-v1:0', 

77 'bedrock:us.meta.llama3-3-70b-instruct-v1:0', 

78 'bedrock:mistral.mistral-7b-instruct-v0:2', 

79 'bedrock:mistral.mixtral-8x7b-instruct-v0:1', 

80 'bedrock:mistral.mistral-large-2402-v1:0', 

81 'bedrock:mistral.mistral-large-2407-v1:0', 

82 'claude-3-5-sonnet-latest', 

83 'claude-3-opus-latest', 

84 'cohere:c4ai-aya-expanse-32b', 

85 'cohere:c4ai-aya-expanse-8b', 

86 'cohere:command', 

87 'cohere:command-light', 

88 'cohere:command-light-nightly', 

89 'cohere:command-nightly', 

90 'cohere:command-r', 

91 'cohere:command-r-03-2024', 

92 'cohere:command-r-08-2024', 

93 'cohere:command-r-plus', 

94 'cohere:command-r-plus-04-2024', 

95 'cohere:command-r-plus-08-2024', 

96 'cohere:command-r7b-12-2024', 

97 'deepseek:deepseek-chat', 

98 'deepseek:deepseek-reasoner', 

99 'google-gla:gemini-1.0-pro', 

100 'google-gla:gemini-1.5-flash', 

101 'google-gla:gemini-1.5-flash-8b', 

102 'google-gla:gemini-1.5-pro', 

103 'google-gla:gemini-2.0-flash-exp', 

104 'google-gla:gemini-2.0-flash-thinking-exp-01-21', 

105 'google-gla:gemini-exp-1206', 

106 'google-gla:gemini-2.0-flash', 

107 'google-gla:gemini-2.0-flash-lite-preview-02-05', 

108 'google-gla:gemini-2.0-pro-exp-02-05', 

109 'google-vertex:gemini-1.0-pro', 

110 'google-vertex:gemini-1.5-flash', 

111 'google-vertex:gemini-1.5-flash-8b', 

112 'google-vertex:gemini-1.5-pro', 

113 'google-vertex:gemini-2.0-flash-exp', 

114 'google-vertex:gemini-2.0-flash-thinking-exp-01-21', 

115 'google-vertex:gemini-exp-1206', 

116 'google-vertex:gemini-2.0-flash', 

117 'google-vertex:gemini-2.0-flash-lite-preview-02-05', 

118 'google-vertex:gemini-2.0-pro-exp-02-05', 

119 'gpt-3.5-turbo', 

120 'gpt-3.5-turbo-0125', 

121 'gpt-3.5-turbo-0301', 

122 'gpt-3.5-turbo-0613', 

123 'gpt-3.5-turbo-1106', 

124 'gpt-3.5-turbo-16k', 

125 'gpt-3.5-turbo-16k-0613', 

126 'gpt-4', 

127 'gpt-4-0125-preview', 

128 'gpt-4-0314', 

129 'gpt-4-0613', 

130 'gpt-4-1106-preview', 

131 'gpt-4-32k', 

132 'gpt-4-32k-0314', 

133 'gpt-4-32k-0613', 

134 'gpt-4-turbo', 

135 'gpt-4-turbo-2024-04-09', 

136 'gpt-4-turbo-preview', 

137 'gpt-4-vision-preview', 

138 'gpt-4o', 

139 'gpt-4o-2024-05-13', 

140 'gpt-4o-2024-08-06', 

141 'gpt-4o-2024-11-20', 

142 'gpt-4o-audio-preview', 

143 'gpt-4o-audio-preview-2024-10-01', 

144 'gpt-4o-audio-preview-2024-12-17', 

145 'gpt-4o-mini', 

146 'gpt-4o-mini-2024-07-18', 

147 'gpt-4o-mini-audio-preview', 

148 'gpt-4o-mini-audio-preview-2024-12-17', 

149 'gpt-4o-mini-search-preview', 

150 'gpt-4o-mini-search-preview-2025-03-11', 

151 'gpt-4o-search-preview', 

152 'gpt-4o-search-preview-2025-03-11', 

153 'groq:gemma2-9b-it', 

154 'groq:llama-3.1-8b-instant', 

155 'groq:llama-3.2-11b-vision-preview', 

156 'groq:llama-3.2-1b-preview', 

157 'groq:llama-3.2-3b-preview', 

158 'groq:llama-3.2-90b-vision-preview', 

159 'groq:llama-3.3-70b-specdec', 

160 'groq:llama-3.3-70b-versatile', 

161 'groq:llama3-70b-8192', 

162 'groq:llama3-8b-8192', 

163 'groq:mixtral-8x7b-32768', 

164 'mistral:codestral-latest', 

165 'mistral:mistral-large-latest', 

166 'mistral:mistral-moderation-latest', 

167 'mistral:mistral-small-latest', 

168 'o1', 

169 'o1-2024-12-17', 

170 'o1-mini', 

171 'o1-mini-2024-09-12', 

172 'o1-preview', 

173 'o1-preview-2024-09-12', 

174 'o3-mini', 

175 'o3-mini-2025-01-31', 

176 'openai:chatgpt-4o-latest', 

177 'openai:gpt-3.5-turbo', 

178 'openai:gpt-3.5-turbo-0125', 

179 'openai:gpt-3.5-turbo-0301', 

180 'openai:gpt-3.5-turbo-0613', 

181 'openai:gpt-3.5-turbo-1106', 

182 'openai:gpt-3.5-turbo-16k', 

183 'openai:gpt-3.5-turbo-16k-0613', 

184 'openai:gpt-4', 

185 'openai:gpt-4-0125-preview', 

186 'openai:gpt-4-0314', 

187 'openai:gpt-4-0613', 

188 'openai:gpt-4-1106-preview', 

189 'openai:gpt-4-32k', 

190 'openai:gpt-4-32k-0314', 

191 'openai:gpt-4-32k-0613', 

192 'openai:gpt-4-turbo', 

193 'openai:gpt-4-turbo-2024-04-09', 

194 'openai:gpt-4-turbo-preview', 

195 'openai:gpt-4-vision-preview', 

196 'openai:gpt-4o', 

197 'openai:gpt-4o-2024-05-13', 

198 'openai:gpt-4o-2024-08-06', 

199 'openai:gpt-4o-2024-11-20', 

200 'openai:gpt-4o-audio-preview', 

201 'openai:gpt-4o-audio-preview-2024-10-01', 

202 'openai:gpt-4o-audio-preview-2024-12-17', 

203 'openai:gpt-4o-mini', 

204 'openai:gpt-4o-mini-2024-07-18', 

205 'openai:gpt-4o-mini-audio-preview', 

206 'openai:gpt-4o-mini-audio-preview-2024-12-17', 

207 'openai:gpt-4o-mini-search-preview', 

208 'openai:gpt-4o-mini-search-preview-2025-03-11', 

209 'openai:gpt-4o-search-preview', 

210 'openai:gpt-4o-search-preview-2025-03-11', 

211 'openai:o1', 

212 'openai:o1-2024-12-17', 

213 'openai:o1-mini', 

214 'openai:o1-mini-2024-09-12', 

215 'openai:o1-preview', 

216 'openai:o1-preview-2024-09-12', 

217 'openai:o3-mini', 

218 'openai:o3-mini-2025-01-31', 

219 'test', 

220 ], 

221) 

222"""Known model names that can be used with the `model` parameter of [`Agent`][pydantic_ai.Agent]. 

223 

224`KnownModelName` is provided as a concise way to specify a model. 

225""" 

226 

227 

228@dataclass 

229class ModelRequestParameters: 

230 """Configuration for an agent's request to a model, specifically related to tools and result handling.""" 

231 

232 function_tools: list[ToolDefinition] 

233 allow_text_result: bool 

234 result_tools: list[ToolDefinition] 

235 

236 

237class Model(ABC): 

238 """Abstract class for a model.""" 

239 

240 @abstractmethod 

241 async def request( 

242 self, 

243 messages: list[ModelMessage], 

244 model_settings: ModelSettings | None, 

245 model_request_parameters: ModelRequestParameters, 

246 ) -> tuple[ModelResponse, Usage]: 

247 """Make a request to the model.""" 

248 raise NotImplementedError() 

249 

250 @asynccontextmanager 

251 async def request_stream( 

252 self, 

253 messages: list[ModelMessage], 

254 model_settings: ModelSettings | None, 

255 model_request_parameters: ModelRequestParameters, 

256 ) -> AsyncIterator[StreamedResponse]: 

257 """Make a request to the model and return a streaming response.""" 

258 # This method is not required, but you need to implement it if you want to support streamed responses 

259 raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}') 

260 # yield is required to make this a generator for type checking 

261 # noinspection PyUnreachableCode 

262 yield # pragma: no cover 

263 

264 @property 

265 @abstractmethod 

266 def model_name(self) -> str: 

267 """The model name.""" 

268 raise NotImplementedError() 

269 

270 @property 

271 @abstractmethod 

272 def system(self) -> str: 

273 """The system / model provider, ex: openai. 

274 

275 Use to populate the `gen_ai.system` OpenTelemetry semantic convention attribute, 

276 so should use well-known values listed in 

277 https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/#gen-ai-system 

278 when applicable. 

279 """ 

280 raise NotImplementedError() 

281 

282 @property 

283 def base_url(self) -> str | None: 

284 """The base URL for the provider API, if available.""" 

285 return None 

286 

287 

288@dataclass 

289class StreamedResponse(ABC): 

290 """Streamed response from an LLM when calling a tool.""" 

291 

292 _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False) 

293 _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False) 

294 _usage: Usage = field(default_factory=Usage, init=False) 

295 

296 def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]: 

297 """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.""" 

298 if self._event_iterator is None: 

299 self._event_iterator = self._get_event_iterator() 

300 return self._event_iterator 

301 

302 @abstractmethod 

303 async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: 

304 """Return an async iterator of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s. 

305 

306 This method should be implemented by subclasses to translate the vendor-specific stream of events into 

307 pydantic_ai-format events. 

308 

309 It should use the `_parts_manager` to handle deltas, and should update the `_usage` attributes as it goes. 

310 """ 

311 raise NotImplementedError() 

312 # noinspection PyUnreachableCode 

313 yield 

314 

315 def get(self) -> ModelResponse: 

316 """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far.""" 

317 return ModelResponse( 

318 parts=self._parts_manager.get_parts(), model_name=self.model_name, timestamp=self.timestamp 

319 ) 

320 

321 def usage(self) -> Usage: 

322 """Get the usage of the response so far. This will not be the final usage until the stream is exhausted.""" 

323 return self._usage 

324 

325 @property 

326 @abstractmethod 

327 def model_name(self) -> str: 

328 """Get the model name of the response.""" 

329 raise NotImplementedError() 

330 

331 @property 

332 @abstractmethod 

333 def timestamp(self) -> datetime: 

334 """Get the timestamp of the response.""" 

335 raise NotImplementedError() 

336 

337 

338ALLOW_MODEL_REQUESTS = True 

339"""Whether to allow requests to models. 

340 

341This global setting allows you to disable request to most models, e.g. to make sure you don't accidentally 

342make costly requests to a model during tests. 

343 

344The testing models [`TestModel`][pydantic_ai.models.test.TestModel] and 

345[`FunctionModel`][pydantic_ai.models.function.FunctionModel] are no affected by this setting. 

346""" 

347 

348 

349def check_allow_model_requests() -> None: 

350 """Check if model requests are allowed. 

351 

352 If you're defining your own models that have costs or latency associated with their use, you should call this in 

353 [`Model.request`][pydantic_ai.models.Model.request] and [`Model.request_stream`][pydantic_ai.models.Model.request_stream]. 

354 

355 Raises: 

356 RuntimeError: If model requests are not allowed. 

357 """ 

358 if not ALLOW_MODEL_REQUESTS: 

359 raise RuntimeError('Model requests are not allowed, since ALLOW_MODEL_REQUESTS is False') 

360 

361 

362@contextmanager 

363def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]: 

364 """Context manager to temporarily override [`ALLOW_MODEL_REQUESTS`][pydantic_ai.models.ALLOW_MODEL_REQUESTS]. 

365 

366 Args: 

367 allow_model_requests: Whether to allow model requests within the context. 

368 """ 

369 global ALLOW_MODEL_REQUESTS 

370 old_value = ALLOW_MODEL_REQUESTS 

371 ALLOW_MODEL_REQUESTS = allow_model_requests # pyright: ignore[reportConstantRedefinition] 

372 try: 

373 yield 

374 finally: 

375 ALLOW_MODEL_REQUESTS = old_value # pyright: ignore[reportConstantRedefinition] 

376 

377 

378def infer_model(model: Model | KnownModelName) -> Model: 

379 """Infer the model from the name.""" 

380 if isinstance(model, Model): 

381 return model 

382 elif model == 'test': 

383 from .test import TestModel 

384 

385 return TestModel() 

386 

387 try: 

388 provider, model_name = model.split(':', maxsplit=1) 

389 provider = cast(str, provider) 

390 except ValueError: 

391 model_name = model 

392 # TODO(Marcelo): We should deprecate this way. 

393 if model_name.startswith(('gpt', 'o1', 'o3')): 

394 provider = 'openai' 

395 elif model_name.startswith('claude'): 

396 provider = 'anthropic' 

397 elif model_name.startswith('gemini'): 

398 provider = 'google-gla' 

399 else: 

400 raise UserError(f'Unknown model: {model}') 

401 

402 if provider == 'vertexai': 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 provider = 'google-vertex' 

404 

405 if provider == 'cohere': 

406 from .cohere import CohereModel 

407 

408 return CohereModel(model_name, provider=provider) 

409 elif provider in ('deepseek', 'openai'): 

410 from .openai import OpenAIModel 

411 

412 return OpenAIModel(model_name, provider=provider) 

413 elif provider in ('google-gla', 'google-vertex'): 

414 from .gemini import GeminiModel 

415 

416 return GeminiModel(model_name, provider=provider) 

417 elif provider == 'groq': 

418 from .groq import GroqModel 

419 

420 return GroqModel(model_name, provider=provider) 

421 elif provider == 'mistral': 

422 from .mistral import MistralModel 

423 

424 return MistralModel(model_name, provider=provider) 

425 elif provider == 'anthropic': 

426 from .anthropic import AnthropicModel 

427 

428 return AnthropicModel(model_name, provider=provider) 

429 elif provider == 'bedrock': 429 ↛ 434line 429 didn't jump to line 434 because the condition on line 429 was always true

430 from .bedrock import BedrockConverseModel 

431 

432 return BedrockConverseModel(model_name, provider=provider) 

433 else: 

434 raise UserError(f'Unknown model: {model}') 

435 

436 

437def cached_async_http_client(*, provider: str | None = None, timeout: int = 600, connect: int = 5) -> httpx.AsyncClient: 

438 """Cached HTTPX async client that creates a separate client for each provider. 

439 

440 The client is cached based on the provider parameter. If provider is None, it's used for non-provider specific 

441 requests (like downloading images). Multiple agents and calls can share the same client when they use the same provider. 

442 

443 There are good reasons why in production you should use a `httpx.AsyncClient` as an async context manager as 

444 described in [encode/httpx#2026](https://github.com/encode/httpx/pull/2026), but when experimenting or showing 

445 examples, it's very useful not to. 

446 

447 The default timeouts match those of OpenAI, 

448 see <https://github.com/openai/openai-python/blob/v1.54.4/src/openai/_constants.py#L9>. 

449 """ 

450 client = _cached_async_http_client(provider=provider, timeout=timeout, connect=connect) 

451 if client.is_closed: 

452 # This happens if the context manager is used, so we need to create a new client. 

453 _cached_async_http_client.cache_clear() 

454 client = _cached_async_http_client(provider=provider, timeout=timeout, connect=connect) 

455 return client 

456 

457 

458@cache 

459def _cached_async_http_client(provider: str | None, timeout: int = 600, connect: int = 5) -> httpx.AsyncClient: 

460 return httpx.AsyncClient( 

461 transport=_cached_async_http_transport(), 

462 timeout=httpx.Timeout(timeout=timeout, connect=connect), 

463 headers={'User-Agent': get_user_agent()}, 

464 ) 

465 

466 

467@cache 

468def _cached_async_http_transport() -> httpx.AsyncHTTPTransport: 

469 return httpx.AsyncHTTPTransport() 

470 

471 

472@cache 

473def get_user_agent() -> str: 

474 """Get the user agent string for the HTTP client.""" 

475 from .. import __version__ 

476 

477 return f'pydantic-ai/{__version__}'