Coverage for pydantic_ai_slim/pydantic_ai/models/groq.py: 95.54%

1from __future__ import annotations as _annotations (empty)

3from collections.abc import AsyncIterable, AsyncIterator, Iterable (empty)

4from contextlib import asynccontextmanager (empty)

5from dataclasses import dataclass, field (empty)

6from datetime import datetime, timezone (empty)

7from itertools import chain (empty)

8from typing import Literal, cast, overload (empty)

10from httpx import AsyncClient as AsyncHTTPClient (empty)

11from typing_extensions import assert_never (empty)

13from .. import UnexpectedModelBehavior, _utils, usage (empty)

14from .._utils import guard_tool_call_id as _guard_tool_call_id (empty)

15from ..messages import ( (empty)

16 ModelMessage,

17 ModelRequest,

18 ModelResponse,

19 ModelResponsePart,

20 ModelResponseStreamEvent,

21 RetryPromptPart,

22 SystemPromptPart,

23 TextPart,

24 ToolCallPart,

25 ToolReturnPart,

26 UserPromptPart,

27)

28from ..settings import ModelSettings (empty)

29from ..tools import ToolDefinition (empty)

30from . import ( (empty)

31 AgentModel,

32 Model,

33 StreamedResponse,

34 cached_async_http_client,

35 check_allow_model_requests,

36)

38try: (empty)

39 from groq import NOT_GIVEN, AsyncGroq, AsyncStream (empty)

40 from groq.types import chat (empty)

41 from groq.types.chat import ChatCompletion, ChatCompletionChunk (empty)

42except ImportError as _import_error: (empty)

43 raise ImportError( (empty)

44 'Please install `groq` to use the Groq model, '

45 "you can use the `groq` optional group — `pip install 'pydantic-ai-slim[groq]'`"

46 ) from _import_error

48GroqModelName = Literal[ (empty)

49 'llama-3.3-70b-versatile',

50 'llama-3.1-70b-versatile',

51 'llama3-groq-70b-8192-tool-use-preview',

52 'llama3-groq-8b-8192-tool-use-preview',

53 'llama-3.1-70b-specdec',

54 'llama-3.1-8b-instant',

55 'llama-3.2-1b-preview',

56 'llama-3.2-3b-preview',

57 'llama-3.2-11b-vision-preview',

58 'llama-3.2-90b-vision-preview',

59 'llama3-70b-8192',

60 'llama3-8b-8192',

61 'mixtral-8x7b-32768',

62 'gemma2-9b-it',

63 'gemma-7b-it',

64]

65"""Named Groq models. (empty)

67See [the Groq docs](https://console.groq.com/docs/models) for a full list.

68"""

71class GroqModelSettings(ModelSettings): (empty)

72 """Settings used for a Groq model request."""

74 # This class is a placeholder for any future groq-specific settings

77@dataclass(init=False) (empty)

78class GroqModel(Model): (empty)

79 """A model that uses the Groq API.

81 Internally, this uses the [Groq Python client](https://github.com/groq/groq-python) to interact with the API.

83 Apart from `__init__`, all methods are private or match those of the base class.

84 """

86 model_name: GroqModelName (empty)

87 client: AsyncGroq = field(repr=False) (empty)

89 def __init__( (empty)

90 self,

91 model_name: GroqModelName,

92 *,

93 api_key: str | None = None,

94 groq_client: AsyncGroq | None = None,

95 http_client: AsyncHTTPClient | None = None,

96 ):

97 """Initialize a Groq model.

99 Args:

100 model_name: The name of the Groq model to use. List of model names available

101 [here](https://console.groq.com/docs/models).

102 api_key: The API key to use for authentication, if not provided, the `GROQ_API_KEY` environment variable

103 will be used if available.

104 groq_client: An existing

105 [`AsyncGroq`](https://github.com/groq/groq-python?tab=readme-ov-file#async-usage)

106 client to use, if provided, `api_key` and `http_client` must be `None`.

107 http_client: An existing `httpx.AsyncClient` to use for making HTTP requests.

108 """

109 self.model_name = model_name (empty)

110 if groq_client is not None: (empty)

111 assert http_client is None, 'Cannot provide both `groq_client` and `http_client`' (empty)

112 assert api_key is None, 'Cannot provide both `groq_client` and `api_key`' (empty)

113 self.client = groq_client (empty)

114 elif http_client is not None: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true(empty)

115 self.client = AsyncGroq(api_key=api_key, http_client=http_client)

116 else:

117 self.client = AsyncGroq(api_key=api_key, http_client=cached_async_http_client()) (empty)

118

119 async def agent_model( (empty)

120 self,

121 *,

122 function_tools: list[ToolDefinition],

123 allow_text_result: bool,

124 result_tools: list[ToolDefinition],

125 ) -> AgentModel:

126 check_allow_model_requests() (empty)

127 tools = [self._map_tool_definition(r) for r in function_tools] (empty)

128 if result_tools: (empty)

129 tools += [self._map_tool_definition(r) for r in result_tools] (empty)

130 return GroqAgentModel( (empty)

131 self.client,

132 self.model_name,

133 allow_text_result,

134 tools,

135 )

136

137 def name(self) -> str: (empty)

138 return f'groq:{self.model_name}' (empty)

139

140 @staticmethod (empty)

141 def _map_tool_definition(f: ToolDefinition) -> chat.ChatCompletionToolParam: (empty)

142 return { (empty)

143 'type': 'function',

144 'function': {

145 'name': f.name,

146 'description': f.description,

147 'parameters': f.parameters_json_schema,

148 },

149 }

150

151

152@dataclass (empty)

153class GroqAgentModel(AgentModel): (empty)

154 """Implementation of `AgentModel` for Groq models."""

155

156 client: AsyncGroq (empty)

157 model_name: str (empty)

158 allow_text_result: bool (empty)

159 tools: list[chat.ChatCompletionToolParam] (empty)

160

161 async def request( (empty)

162 self, messages: list[ModelMessage], model_settings: ModelSettings | None

163 ) -> tuple[ModelResponse, usage.Usage]:

164 response = await self._completions_create(messages, False, cast(GroqModelSettings, model_settings or {})) (empty)

165 return self._process_response(response), _map_usage(response) (empty)

166

167 @asynccontextmanager (empty)

168 async def request_stream( (empty)

169 self, messages: list[ModelMessage], model_settings: ModelSettings | None

170 ) -> AsyncIterator[StreamedResponse]:

171 response = await self._completions_create(messages, True, cast(GroqModelSettings, model_settings or {})) (empty)

172 async with response: (empty)

173 yield await self._process_streamed_response(response) (empty)

174

175 @overload (empty)

176 async def _completions_create( (empty)

177 self, messages: list[ModelMessage], stream: Literal[True], model_settings: GroqModelSettings (empty)

178 ) -> AsyncStream[ChatCompletionChunk]: (empty)

179 pass

180

181 @overload (empty)

182 async def _completions_create( (empty)

183 self, messages: list[ModelMessage], stream: Literal[False], model_settings: GroqModelSettings (empty)

184 ) -> chat.ChatCompletion: (empty)

185 pass

186

187 async def _completions_create( (empty)

188 self, messages: list[ModelMessage], stream: bool, model_settings: GroqModelSettings

189 ) -> chat.ChatCompletion | AsyncStream[ChatCompletionChunk]:

190 # standalone function to make it easier to override

191 if not self.tools: (empty)

192 tool_choice: Literal['none', 'required', 'auto'] | None = None (empty)

193 elif not self.allow_text_result: (empty)

194 tool_choice = 'required' (empty)

195 else:

196 tool_choice = 'auto' (empty)

197

198 groq_messages = list(chain(*(self._map_message(m) for m in messages))) (empty)

199

200 return await self.client.chat.completions.create( (empty)

201 model=str(self.model_name),

202 messages=groq_messages,

203 n=1,

204 parallel_tool_calls=model_settings.get('parallel_tool_calls', True if self.tools else NOT_GIVEN),

205 tools=self.tools or NOT_GIVEN,

206 tool_choice=tool_choice or NOT_GIVEN,

207 stream=stream,

208 max_tokens=model_settings.get('max_tokens', NOT_GIVEN),

209 temperature=model_settings.get('temperature', NOT_GIVEN),

210 top_p=model_settings.get('top_p', NOT_GIVEN),

211 timeout=model_settings.get('timeout', NOT_GIVEN),

212 seed=model_settings.get('seed', NOT_GIVEN),

213 presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),

214 frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),

215 logit_bias=model_settings.get('logit_bias', NOT_GIVEN),

216 )

217

218 def _process_response(self, response: chat.ChatCompletion) -> ModelResponse: (empty)

219 """Process a non-streamed response, and prepare a message to return."""

220 timestamp = datetime.fromtimestamp(response.created, tz=timezone.utc) (empty)

221 choice = response.choices[0] (empty)

222 items: list[ModelResponsePart] = [] (empty)

223 if choice.message.content is not None: (empty)

224 items.append(TextPart(content=choice.message.content)) (empty)

225 if choice.message.tool_calls is not None: (empty)

226 for c in choice.message.tool_calls: (empty)

227 items.append(ToolCallPart(tool_name=c.function.name, args=c.function.arguments, tool_call_id=c.id)) (empty)

228 return ModelResponse(items, model_name=self.model_name, timestamp=timestamp) (empty)

229

230 async def _process_streamed_response(self, response: AsyncStream[ChatCompletionChunk]) -> GroqStreamedResponse: (empty)

231 """Process a streamed response, and prepare a streaming response to return."""

232 peekable_response = _utils.PeekableAsyncStream(response) (empty)

233 first_chunk = await peekable_response.peek() (empty)

234 if isinstance(first_chunk, _utils.Unset): 234 ↛ 235line 234 didn't jump to line 235 because the condition on line 234 was never true(empty)

235 raise UnexpectedModelBehavior('Streamed response ended without content or tool calls')

236

237 return GroqStreamedResponse( (empty)

238 _response=peekable_response,

239 _model_name=self.model_name,

240 _timestamp=datetime.fromtimestamp(first_chunk.created, tz=timezone.utc),

241 )

242

243 @classmethod (empty)

244 def _map_message(cls, message: ModelMessage) -> Iterable[chat.ChatCompletionMessageParam]: (empty)

245 """Just maps a `pydantic_ai.Message` to a `groq.types.ChatCompletionMessageParam`."""

246 if isinstance(message, ModelRequest): (empty)

247 yield from cls._map_user_message(message) (empty)

248 elif isinstance(message, ModelResponse): (empty)

249 texts: list[str] = [] (empty)

250 tool_calls: list[chat.ChatCompletionMessageToolCallParam] = [] (empty)

251 for item in message.parts: (empty)

252 if isinstance(item, TextPart): (empty)

253 texts.append(item.content) (empty)

254 elif isinstance(item, ToolCallPart): (empty)

255 tool_calls.append(_map_tool_call(item)) (empty)

256 else:

257 assert_never(item)

258 message_param = chat.ChatCompletionAssistantMessageParam(role='assistant') (empty)

259 if texts: (empty)

260 # Note: model responses from this model should only have one text item, so the following

261 # shouldn't merge multiple texts into one unless you switch models between runs:

262 message_param['content'] = '\n\n'.join(texts) (empty)

263 if tool_calls: (empty)

264 message_param['tool_calls'] = tool_calls (empty)

265 yield message_param (empty)

266 else:

267 assert_never(message)

268

269 @classmethod (empty)

270 def _map_user_message(cls, message: ModelRequest) -> Iterable[chat.ChatCompletionMessageParam]: (empty)

271 for part in message.parts: (empty)

272 if isinstance(part, SystemPromptPart): (empty)

273 yield chat.ChatCompletionSystemMessageParam(role='system', content=part.content) (empty)

274 elif isinstance(part, UserPromptPart): (empty)

275 yield chat.ChatCompletionUserMessageParam(role='user', content=part.content) (empty)

276 elif isinstance(part, ToolReturnPart): (empty)

277 yield chat.ChatCompletionToolMessageParam( (empty)

278 role='tool',

279 tool_call_id=_guard_tool_call_id(t=part, model_source='Groq'),

280 content=part.model_response_str(),

281 )

282 elif isinstance(part, RetryPromptPart): 282 ↛ 271line 282 didn't jump to line 271 because the condition on line 282 was always true(empty)

283 if part.tool_name is None: 283 ↛ 284line 283 didn't jump to line 284 because the condition on line 283 was never true(empty)

284 yield chat.ChatCompletionUserMessageParam(role='user', content=part.model_response())

285 else:

286 yield chat.ChatCompletionToolMessageParam( (empty)

287 role='tool',

288 tool_call_id=_guard_tool_call_id(t=part, model_source='Groq'),

289 content=part.model_response(),

290 )

291

292

293@dataclass (empty)

294class GroqStreamedResponse(StreamedResponse): (empty)

295 """Implementation of `StreamedResponse` for Groq models."""

296

297 _response: AsyncIterable[ChatCompletionChunk] (empty)

298 _timestamp: datetime (empty)

299

300 async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]: (empty)

301 async for chunk in self._response: (empty)

302 self._usage += _map_usage(chunk) (empty)

303

304 try: (empty)

305 choice = chunk.choices[0] (empty)

306 except IndexError: (empty)

307 continue (empty)

308

309 # Handle the text part of the response

310 content = choice.delta.content (empty)

311 if content is not None: (empty)

312 yield self._parts_manager.handle_text_delta(vendor_part_id='content', content=content) (empty)

313

314 # Handle the tool calls

315 for dtc in choice.delta.tool_calls or []: (empty)

316 maybe_event = self._parts_manager.handle_tool_call_delta( (empty)

317 vendor_part_id=dtc.index,

318 tool_name=dtc.function and dtc.function.name,

319 args=dtc.function and dtc.function.arguments,

320 tool_call_id=dtc.id,

321 )

322 if maybe_event is not None: (empty)

323 yield maybe_event (empty)

324

325 def timestamp(self) -> datetime: (empty)

326 return self._timestamp (empty)

327

328

329def _map_tool_call(t: ToolCallPart) -> chat.ChatCompletionMessageToolCallParam: (empty)

330 return chat.ChatCompletionMessageToolCallParam( (empty)

331 id=_guard_tool_call_id(t=t, model_source='Groq'),

332 type='function',

333 function={'name': t.tool_name, 'arguments': t.args_as_json_str()},

334 )

335

336

337def _map_usage(completion: ChatCompletionChunk | ChatCompletion) -> usage.Usage: (empty)

338 response_usage = None (empty)

339 if isinstance(completion, ChatCompletion): (empty)

340 response_usage = completion.usage (empty)

341 elif completion.x_groq is not None: 341 ↛ 342line 341 didn't jump to line 342 because the condition on line 341 was never true(empty)

342 response_usage = completion.x_groq.usage

343

344 if response_usage is None: (empty)

345 return usage.Usage() (empty)

346

347 return usage.Usage( (empty)

348 request_tokens=response_usage.prompt_tokens,

349 response_tokens=response_usage.completion_tokens,

350 total_tokens=response_usage.total_tokens,

351 )