Coverage for pydantic_ai_slim/pydantic_ai/models/__init__.py: 97.20%
111 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-28 17:27 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-28 17:27 +0000
1"""Logic related to making requests to an LLM.
3The aim here is to make a common interface for different LLMs, so that the rest of the code can be agnostic to the
4specific LLM being used.
5"""
7from __future__ import annotations as _annotations
9from abc import ABC, abstractmethod
10from collections.abc import AsyncIterator, Iterator
11from contextlib import asynccontextmanager, contextmanager
12from dataclasses import dataclass, field
13from datetime import datetime
14from functools import cache
15from typing import TYPE_CHECKING, cast
17import httpx
18from typing_extensions import Literal, TypeAliasType
20from .._parts_manager import ModelResponsePartsManager
21from ..exceptions import UserError
22from ..messages import ModelMessage, ModelResponse, ModelResponseStreamEvent
23from ..settings import ModelSettings
24from ..usage import Usage
26if TYPE_CHECKING:
27 from ..tools import ToolDefinition
30KnownModelName = TypeAliasType(
31 'KnownModelName',
32 Literal[
33 'anthropic:claude-3-7-sonnet-latest',
34 'anthropic:claude-3-5-haiku-latest',
35 'anthropic:claude-3-5-sonnet-latest',
36 'anthropic:claude-3-opus-latest',
37 'claude-3-7-sonnet-latest',
38 'claude-3-5-haiku-latest',
39 'bedrock:amazon.titan-tg1-large',
40 'bedrock:amazon.titan-text-lite-v1',
41 'bedrock:amazon.titan-text-express-v1',
42 'bedrock:us.amazon.nova-pro-v1:0',
43 'bedrock:us.amazon.nova-lite-v1:0',
44 'bedrock:us.amazon.nova-micro-v1:0',
45 'bedrock:anthropic.claude-3-5-sonnet-20241022-v2:0',
46 'bedrock:us.anthropic.claude-3-5-sonnet-20241022-v2:0',
47 'bedrock:anthropic.claude-3-5-haiku-20241022-v1:0',
48 'bedrock:us.anthropic.claude-3-5-haiku-20241022-v1:0',
49 'bedrock:anthropic.claude-instant-v1',
50 'bedrock:anthropic.claude-v2:1',
51 'bedrock:anthropic.claude-v2',
52 'bedrock:anthropic.claude-3-sonnet-20240229-v1:0',
53 'bedrock:us.anthropic.claude-3-sonnet-20240229-v1:0',
54 'bedrock:anthropic.claude-3-haiku-20240307-v1:0',
55 'bedrock:us.anthropic.claude-3-haiku-20240307-v1:0',
56 'bedrock:anthropic.claude-3-opus-20240229-v1:0',
57 'bedrock:us.anthropic.claude-3-opus-20240229-v1:0',
58 'bedrock:anthropic.claude-3-5-sonnet-20240620-v1:0',
59 'bedrock:us.anthropic.claude-3-5-sonnet-20240620-v1:0',
60 'bedrock:anthropic.claude-3-7-sonnet-20250219-v1:0',
61 'bedrock:us.anthropic.claude-3-7-sonnet-20250219-v1:0',
62 'bedrock:cohere.command-text-v14',
63 'bedrock:cohere.command-r-v1:0',
64 'bedrock:cohere.command-r-plus-v1:0',
65 'bedrock:cohere.command-light-text-v14',
66 'bedrock:meta.llama3-8b-instruct-v1:0',
67 'bedrock:meta.llama3-70b-instruct-v1:0',
68 'bedrock:meta.llama3-1-8b-instruct-v1:0',
69 'bedrock:us.meta.llama3-1-8b-instruct-v1:0',
70 'bedrock:meta.llama3-1-70b-instruct-v1:0',
71 'bedrock:us.meta.llama3-1-70b-instruct-v1:0',
72 'bedrock:meta.llama3-1-405b-instruct-v1:0',
73 'bedrock:us.meta.llama3-2-11b-instruct-v1:0',
74 'bedrock:us.meta.llama3-2-90b-instruct-v1:0',
75 'bedrock:us.meta.llama3-2-1b-instruct-v1:0',
76 'bedrock:us.meta.llama3-2-3b-instruct-v1:0',
77 'bedrock:us.meta.llama3-3-70b-instruct-v1:0',
78 'bedrock:mistral.mistral-7b-instruct-v0:2',
79 'bedrock:mistral.mixtral-8x7b-instruct-v0:1',
80 'bedrock:mistral.mistral-large-2402-v1:0',
81 'bedrock:mistral.mistral-large-2407-v1:0',
82 'claude-3-5-sonnet-latest',
83 'claude-3-opus-latest',
84 'cohere:c4ai-aya-expanse-32b',
85 'cohere:c4ai-aya-expanse-8b',
86 'cohere:command',
87 'cohere:command-light',
88 'cohere:command-light-nightly',
89 'cohere:command-nightly',
90 'cohere:command-r',
91 'cohere:command-r-03-2024',
92 'cohere:command-r-08-2024',
93 'cohere:command-r-plus',
94 'cohere:command-r-plus-04-2024',
95 'cohere:command-r-plus-08-2024',
96 'cohere:command-r7b-12-2024',
97 'deepseek:deepseek-chat',
98 'deepseek:deepseek-reasoner',
99 'google-gla:gemini-1.0-pro',
100 'google-gla:gemini-1.5-flash',
101 'google-gla:gemini-1.5-flash-8b',
102 'google-gla:gemini-1.5-pro',
103 'google-gla:gemini-2.0-flash-exp',
104 'google-gla:gemini-2.0-flash-thinking-exp-01-21',
105 'google-gla:gemini-exp-1206',
106 'google-gla:gemini-2.0-flash',
107 'google-gla:gemini-2.0-flash-lite-preview-02-05',
108 'google-gla:gemini-2.0-pro-exp-02-05',
109 'google-vertex:gemini-1.0-pro',
110 'google-vertex:gemini-1.5-flash',
111 'google-vertex:gemini-1.5-flash-8b',
112 'google-vertex:gemini-1.5-pro',
113 'google-vertex:gemini-2.0-flash-exp',
114 'google-vertex:gemini-2.0-flash-thinking-exp-01-21',
115 'google-vertex:gemini-exp-1206',
116 'google-vertex:gemini-2.0-flash',
117 'google-vertex:gemini-2.0-flash-lite-preview-02-05',
118 'google-vertex:gemini-2.0-pro-exp-02-05',
119 'gpt-3.5-turbo',
120 'gpt-3.5-turbo-0125',
121 'gpt-3.5-turbo-0301',
122 'gpt-3.5-turbo-0613',
123 'gpt-3.5-turbo-1106',
124 'gpt-3.5-turbo-16k',
125 'gpt-3.5-turbo-16k-0613',
126 'gpt-4',
127 'gpt-4-0125-preview',
128 'gpt-4-0314',
129 'gpt-4-0613',
130 'gpt-4-1106-preview',
131 'gpt-4-32k',
132 'gpt-4-32k-0314',
133 'gpt-4-32k-0613',
134 'gpt-4-turbo',
135 'gpt-4-turbo-2024-04-09',
136 'gpt-4-turbo-preview',
137 'gpt-4-vision-preview',
138 'gpt-4o',
139 'gpt-4o-2024-05-13',
140 'gpt-4o-2024-08-06',
141 'gpt-4o-2024-11-20',
142 'gpt-4o-audio-preview',
143 'gpt-4o-audio-preview-2024-10-01',
144 'gpt-4o-audio-preview-2024-12-17',
145 'gpt-4o-mini',
146 'gpt-4o-mini-2024-07-18',
147 'gpt-4o-mini-audio-preview',
148 'gpt-4o-mini-audio-preview-2024-12-17',
149 'gpt-4o-mini-search-preview',
150 'gpt-4o-mini-search-preview-2025-03-11',
151 'gpt-4o-search-preview',
152 'gpt-4o-search-preview-2025-03-11',
153 'groq:gemma2-9b-it',
154 'groq:llama-3.1-8b-instant',
155 'groq:llama-3.2-11b-vision-preview',
156 'groq:llama-3.2-1b-preview',
157 'groq:llama-3.2-3b-preview',
158 'groq:llama-3.2-90b-vision-preview',
159 'groq:llama-3.3-70b-specdec',
160 'groq:llama-3.3-70b-versatile',
161 'groq:llama3-70b-8192',
162 'groq:llama3-8b-8192',
163 'groq:mixtral-8x7b-32768',
164 'mistral:codestral-latest',
165 'mistral:mistral-large-latest',
166 'mistral:mistral-moderation-latest',
167 'mistral:mistral-small-latest',
168 'o1',
169 'o1-2024-12-17',
170 'o1-mini',
171 'o1-mini-2024-09-12',
172 'o1-preview',
173 'o1-preview-2024-09-12',
174 'o3-mini',
175 'o3-mini-2025-01-31',
176 'openai:chatgpt-4o-latest',
177 'openai:gpt-3.5-turbo',
178 'openai:gpt-3.5-turbo-0125',
179 'openai:gpt-3.5-turbo-0301',
180 'openai:gpt-3.5-turbo-0613',
181 'openai:gpt-3.5-turbo-1106',
182 'openai:gpt-3.5-turbo-16k',
183 'openai:gpt-3.5-turbo-16k-0613',
184 'openai:gpt-4',
185 'openai:gpt-4-0125-preview',
186 'openai:gpt-4-0314',
187 'openai:gpt-4-0613',
188 'openai:gpt-4-1106-preview',
189 'openai:gpt-4-32k',
190 'openai:gpt-4-32k-0314',
191 'openai:gpt-4-32k-0613',
192 'openai:gpt-4-turbo',
193 'openai:gpt-4-turbo-2024-04-09',
194 'openai:gpt-4-turbo-preview',
195 'openai:gpt-4-vision-preview',
196 'openai:gpt-4o',
197 'openai:gpt-4o-2024-05-13',
198 'openai:gpt-4o-2024-08-06',
199 'openai:gpt-4o-2024-11-20',
200 'openai:gpt-4o-audio-preview',
201 'openai:gpt-4o-audio-preview-2024-10-01',
202 'openai:gpt-4o-audio-preview-2024-12-17',
203 'openai:gpt-4o-mini',
204 'openai:gpt-4o-mini-2024-07-18',
205 'openai:gpt-4o-mini-audio-preview',
206 'openai:gpt-4o-mini-audio-preview-2024-12-17',
207 'openai:gpt-4o-mini-search-preview',
208 'openai:gpt-4o-mini-search-preview-2025-03-11',
209 'openai:gpt-4o-search-preview',
210 'openai:gpt-4o-search-preview-2025-03-11',
211 'openai:o1',
212 'openai:o1-2024-12-17',
213 'openai:o1-mini',
214 'openai:o1-mini-2024-09-12',
215 'openai:o1-preview',
216 'openai:o1-preview-2024-09-12',
217 'openai:o3-mini',
218 'openai:o3-mini-2025-01-31',
219 'test',
220 ],
221)
222"""Known model names that can be used with the `model` parameter of [`Agent`][pydantic_ai.Agent].
224`KnownModelName` is provided as a concise way to specify a model.
225"""
228@dataclass
229class ModelRequestParameters:
230 """Configuration for an agent's request to a model, specifically related to tools and result handling."""
232 function_tools: list[ToolDefinition]
233 allow_text_result: bool
234 result_tools: list[ToolDefinition]
237class Model(ABC):
238 """Abstract class for a model."""
240 @abstractmethod
241 async def request(
242 self,
243 messages: list[ModelMessage],
244 model_settings: ModelSettings | None,
245 model_request_parameters: ModelRequestParameters,
246 ) -> tuple[ModelResponse, Usage]:
247 """Make a request to the model."""
248 raise NotImplementedError()
250 @asynccontextmanager
251 async def request_stream(
252 self,
253 messages: list[ModelMessage],
254 model_settings: ModelSettings | None,
255 model_request_parameters: ModelRequestParameters,
256 ) -> AsyncIterator[StreamedResponse]:
257 """Make a request to the model and return a streaming response."""
258 # This method is not required, but you need to implement it if you want to support streamed responses
259 raise NotImplementedError(f'Streamed requests not supported by this {self.__class__.__name__}')
260 # yield is required to make this a generator for type checking
261 # noinspection PyUnreachableCode
262 yield # pragma: no cover
264 @property
265 @abstractmethod
266 def model_name(self) -> str:
267 """The model name."""
268 raise NotImplementedError()
270 @property
271 @abstractmethod
272 def system(self) -> str:
273 """The system / model provider, ex: openai.
275 Use to populate the `gen_ai.system` OpenTelemetry semantic convention attribute,
276 so should use well-known values listed in
277 https://opentelemetry.io/docs/specs/semconv/attributes-registry/gen-ai/#gen-ai-system
278 when applicable.
279 """
280 raise NotImplementedError()
282 @property
283 def base_url(self) -> str | None:
284 """The base URL for the provider API, if available."""
285 return None
288@dataclass
289class StreamedResponse(ABC):
290 """Streamed response from an LLM when calling a tool."""
292 _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False)
293 _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False)
294 _usage: Usage = field(default_factory=Usage, init=False)
296 def __aiter__(self) -> AsyncIterator[ModelResponseStreamEvent]:
297 """Stream the response as an async iterable of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s."""
298 if self._event_iterator is None:
299 self._event_iterator = self._get_event_iterator()
300 return self._event_iterator
302 @abstractmethod
303 async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
304 """Return an async iterator of [`ModelResponseStreamEvent`][pydantic_ai.messages.ModelResponseStreamEvent]s.
306 This method should be implemented by subclasses to translate the vendor-specific stream of events into
307 pydantic_ai-format events.
309 It should use the `_parts_manager` to handle deltas, and should update the `_usage` attributes as it goes.
310 """
311 raise NotImplementedError()
312 # noinspection PyUnreachableCode
313 yield
315 def get(self) -> ModelResponse:
316 """Build a [`ModelResponse`][pydantic_ai.messages.ModelResponse] from the data received from the stream so far."""
317 return ModelResponse(
318 parts=self._parts_manager.get_parts(), model_name=self.model_name, timestamp=self.timestamp
319 )
321 def usage(self) -> Usage:
322 """Get the usage of the response so far. This will not be the final usage until the stream is exhausted."""
323 return self._usage
325 @property
326 @abstractmethod
327 def model_name(self) -> str:
328 """Get the model name of the response."""
329 raise NotImplementedError()
331 @property
332 @abstractmethod
333 def timestamp(self) -> datetime:
334 """Get the timestamp of the response."""
335 raise NotImplementedError()
338ALLOW_MODEL_REQUESTS = True
339"""Whether to allow requests to models.
341This global setting allows you to disable request to most models, e.g. to make sure you don't accidentally
342make costly requests to a model during tests.
344The testing models [`TestModel`][pydantic_ai.models.test.TestModel] and
345[`FunctionModel`][pydantic_ai.models.function.FunctionModel] are no affected by this setting.
346"""
349def check_allow_model_requests() -> None:
350 """Check if model requests are allowed.
352 If you're defining your own models that have costs or latency associated with their use, you should call this in
353 [`Model.request`][pydantic_ai.models.Model.request] and [`Model.request_stream`][pydantic_ai.models.Model.request_stream].
355 Raises:
356 RuntimeError: If model requests are not allowed.
357 """
358 if not ALLOW_MODEL_REQUESTS:
359 raise RuntimeError('Model requests are not allowed, since ALLOW_MODEL_REQUESTS is False')
362@contextmanager
363def override_allow_model_requests(allow_model_requests: bool) -> Iterator[None]:
364 """Context manager to temporarily override [`ALLOW_MODEL_REQUESTS`][pydantic_ai.models.ALLOW_MODEL_REQUESTS].
366 Args:
367 allow_model_requests: Whether to allow model requests within the context.
368 """
369 global ALLOW_MODEL_REQUESTS
370 old_value = ALLOW_MODEL_REQUESTS
371 ALLOW_MODEL_REQUESTS = allow_model_requests # pyright: ignore[reportConstantRedefinition]
372 try:
373 yield
374 finally:
375 ALLOW_MODEL_REQUESTS = old_value # pyright: ignore[reportConstantRedefinition]
378def infer_model(model: Model | KnownModelName) -> Model:
379 """Infer the model from the name."""
380 if isinstance(model, Model):
381 return model
382 elif model == 'test':
383 from .test import TestModel
385 return TestModel()
387 try:
388 provider, model_name = model.split(':', maxsplit=1)
389 provider = cast(str, provider)
390 except ValueError:
391 model_name = model
392 # TODO(Marcelo): We should deprecate this way.
393 if model_name.startswith(('gpt', 'o1', 'o3')):
394 provider = 'openai'
395 elif model_name.startswith('claude'):
396 provider = 'anthropic'
397 elif model_name.startswith('gemini'):
398 provider = 'google-gla'
399 else:
400 raise UserError(f'Unknown model: {model}')
402 if provider == 'vertexai': 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true
403 provider = 'google-vertex'
405 if provider == 'cohere':
406 from .cohere import CohereModel
408 return CohereModel(model_name, provider=provider)
409 elif provider in ('deepseek', 'openai'):
410 from .openai import OpenAIModel
412 return OpenAIModel(model_name, provider=provider)
413 elif provider in ('google-gla', 'google-vertex'):
414 from .gemini import GeminiModel
416 return GeminiModel(model_name, provider=provider)
417 elif provider == 'groq':
418 from .groq import GroqModel
420 return GroqModel(model_name, provider=provider)
421 elif provider == 'mistral':
422 from .mistral import MistralModel
424 return MistralModel(model_name, provider=provider)
425 elif provider == 'anthropic':
426 from .anthropic import AnthropicModel
428 return AnthropicModel(model_name, provider=provider)
429 elif provider == 'bedrock': 429 ↛ 434line 429 didn't jump to line 434 because the condition on line 429 was always true
430 from .bedrock import BedrockConverseModel
432 return BedrockConverseModel(model_name, provider=provider)
433 else:
434 raise UserError(f'Unknown model: {model}')
437def cached_async_http_client(*, provider: str | None = None, timeout: int = 600, connect: int = 5) -> httpx.AsyncClient:
438 """Cached HTTPX async client that creates a separate client for each provider.
440 The client is cached based on the provider parameter. If provider is None, it's used for non-provider specific
441 requests (like downloading images). Multiple agents and calls can share the same client when they use the same provider.
443 There are good reasons why in production you should use a `httpx.AsyncClient` as an async context manager as
444 described in [encode/httpx#2026](https://github.com/encode/httpx/pull/2026), but when experimenting or showing
445 examples, it's very useful not to.
447 The default timeouts match those of OpenAI,
448 see <https://github.com/openai/openai-python/blob/v1.54.4/src/openai/_constants.py#L9>.
449 """
450 client = _cached_async_http_client(provider=provider, timeout=timeout, connect=connect)
451 if client.is_closed:
452 # This happens if the context manager is used, so we need to create a new client.
453 _cached_async_http_client.cache_clear()
454 client = _cached_async_http_client(provider=provider, timeout=timeout, connect=connect)
455 return client
458@cache
459def _cached_async_http_client(provider: str | None, timeout: int = 600, connect: int = 5) -> httpx.AsyncClient:
460 return httpx.AsyncClient(
461 transport=_cached_async_http_transport(),
462 timeout=httpx.Timeout(timeout=timeout, connect=connect),
463 headers={'User-Agent': get_user_agent()},
464 )
467@cache
468def _cached_async_http_transport() -> httpx.AsyncHTTPTransport:
469 return httpx.AsyncHTTPTransport()
472@cache
473def get_user_agent() -> str:
474 """Get the user agent string for the HTTP client."""
475 from .. import __version__
477 return f'pydantic-ai/{__version__}'