caarleexx commited on
Commit
26825f0
·
verified ·
1 Parent(s): 16c7733

Upload groq_client.py

Browse files
Files changed (1) hide show
  1. llm/clients/groq_client.py +76 -144
llm/clients/groq_client.py CHANGED
@@ -1,36 +1,24 @@
1
- """Cliente Groq usando HTTP requests diretos (curl-style) - Compatível com LLMManager."""
2
 
3
  import os
4
  import json
5
  import logging
6
  from typing import Optional, Dict, AsyncGenerator
7
- from dataclasses import dataclass
8
- import requests
9
  import aiohttp
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
 
14
- @dataclass
15
- class GroqResponse:
16
- """Resposta do Groq - compatível com interface existente."""
17
- content: str
18
- tokens_input: int
19
- tokens_output: int
20
- finish_reason: str
21
-
22
- @property
23
- def total_tokens(self) -> int:
24
- """Total de tokens (input + output)."""
25
- return self.tokens_input + self.tokens_output
26
-
27
-
28
  class GroqClient:
29
- """Cliente Groq usando requests HTTP diretos (curl-style)."""
 
 
 
30
 
31
  def __init__(self, api_key: Optional[str] = None):
32
  """
33
- Inicializa cliente Groq com requests.
34
 
35
  Args:
36
  api_key: API key (se None, usa GROQ_API_KEY)
@@ -43,60 +31,53 @@ class GroqClient:
43
  self.default_timeout = int(os.getenv('GROQ_TIMEOUT', '120'))
44
  logger.info("✅ GroqClient inicializado (HTTP requests)")
45
 
46
- async def generate(self,
47
- prompt: str,
48
- system_prompt: Optional[str] = None,
49
- model: str = "llama-3.3-70b-versatile",
50
- temperature: float = 0.7,
51
- max_tokens: Optional[int] = None,
52
- top_p: float = 0.9,
53
- response_format: Optional[Dict] = None) -> GroqResponse:
 
54
  """
55
- Gera resposta usando Groq via HTTP requests (curl-style).
56
- Método assíncrono compatível com LLMManager.
57
 
58
  Args:
59
- prompt: Prompt do usuário
60
- system_prompt: Prompt do sistema (com schema JSON)
61
  model: Modelo a usar
 
62
  temperature: Temperatura (0-2)
63
- max_tokens: Máximo de tokens na resposta
64
- top_p: Top-p sampling
65
- response_format: Formato da resposta (ex: {"type": "json_object"})
66
 
67
  Returns:
68
- GroqResponse com content, tokens e metadados
69
  """
70
- # Monta headers
71
  headers = {
72
  "Authorization": f"Bearer {self.api_key}",
73
  "Content-Type": "application/json"
74
  }
75
 
76
- # Monta messages
77
- messages = []
78
- if system_prompt:
79
- messages.append({"role": "system", "content": system_prompt})
80
- messages.append({"role": "user", "content": prompt})
81
-
82
- # Monta payload
83
  payload = {
84
  "model": model,
85
  "messages": messages,
86
  "temperature": temperature,
87
- "top_p": top_p,
88
  }
89
 
90
  if max_tokens:
91
  payload["max_tokens"] = max_tokens
92
 
93
- # Adiciona response_format se fornecido
94
  if response_format:
95
  payload["response_format"] = response_format
96
 
 
 
 
 
 
97
  try:
98
- # Requisição HTTP POST assíncrona usando aiohttp
99
- logger.debug(f"📤 Enviando request para Groq: model={model}, temp={temperature}")
100
 
101
  async with aiohttp.ClientSession() as session:
102
  async with session.post(
@@ -105,28 +86,29 @@ class GroqClient:
105
  json=payload,
106
  timeout=aiohttp.ClientTimeout(total=self.default_timeout)
107
  ) as response:
108
- # Verifica erros HTTP
109
  response.raise_for_status()
110
-
111
- # Parse response JSON
112
  data = await response.json()
113
 
114
- # Extrai dados
115
  content = data['choices'][0]['message']['content']
116
  finish_reason = data['choices'][0]['finish_reason']
117
 
118
  usage = data.get('usage', {})
119
  tokens_input = usage.get('prompt_tokens', 0)
120
  tokens_output = usage.get('completion_tokens', 0)
 
121
 
122
- logger.info(f"✅ Groq response: {tokens_input + tokens_output} tokens, finish={finish_reason}")
123
 
124
- return GroqResponse(
125
- content=content,
126
- tokens_input=tokens_input,
127
- tokens_output=tokens_output,
128
- finish_reason=finish_reason
129
- )
 
 
 
130
 
131
  except aiohttp.ClientResponseError as e:
132
  logger.error(f"❌ Groq HTTP Error {e.status}: {e.message}")
@@ -137,110 +119,65 @@ class GroqClient:
137
  raise
138
 
139
  except Exception as e:
140
- logger.error(f"❌ Groq erro inesperado: {e}")
141
  raise
142
 
143
- def generate_sync(self,
144
- prompt: str,
145
- system_prompt: Optional[str] = None,
146
- model: str = "llama-3.3-70b-versatile",
147
- temperature: float = 0.7,
148
- max_tokens: Optional[int] = None,
149
- response_format: Optional[Dict] = None) -> GroqResponse:
 
 
150
  """
151
- Versão síncrona do generate (usando requests).
152
 
153
  Args:
154
  prompt: Prompt do usuário
155
- system_prompt: System prompt
156
  model: Modelo
157
  temperature: Temperatura
158
  max_tokens: Max tokens
159
- response_format: Formato resposta
160
 
161
  Returns:
162
- GroqResponse
163
  """
164
- headers = {
165
- "Authorization": f"Bearer {self.api_key}",
166
- "Content-Type": "application/json"
167
- }
168
-
169
  messages = []
170
  if system_prompt:
171
  messages.append({"role": "system", "content": system_prompt})
172
  messages.append({"role": "user", "content": prompt})
173
 
174
- payload = {
175
- "model": model,
176
- "messages": messages,
177
- "temperature": temperature,
178
- }
179
-
180
- if max_tokens:
181
- payload["max_tokens"] = max_tokens
182
- if response_format:
183
- payload["response_format"] = response_format
184
-
185
- try:
186
- response = requests.post(
187
- self.base_url,
188
- headers=headers,
189
- json=payload,
190
- timeout=self.default_timeout
191
- )
192
-
193
- response.raise_for_status()
194
- data = response.json()
195
-
196
- content = data['choices'][0]['message']['content']
197
- finish_reason = data['choices'][0]['finish_reason']
198
-
199
- usage = data.get('usage', {})
200
- tokens_input = usage.get('prompt_tokens', 0)
201
- tokens_output = usage.get('completion_tokens', 0)
202
-
203
- return GroqResponse(
204
- content=content,
205
- tokens_input=tokens_input,
206
- tokens_output=tokens_output,
207
- finish_reason=finish_reason
208
- )
209
-
210
- except requests.exceptions.HTTPError as e:
211
- status_code = e.response.status_code if e.response else 0
212
- error_body = e.response.text if e.response else str(e)
213
- logger.error(f"❌ Groq HTTP Error {status_code}: {error_body}")
214
- raise
215
-
216
- except requests.exceptions.Timeout:
217
- logger.error(f"❌ Groq timeout após {self.default_timeout}s")
218
- raise
219
-
220
- except Exception as e:
221
- logger.error(f"❌ Groq erro inesperado: {e}")
222
- raise
223
-
224
- async def generate_stream(self,
225
- prompt: str,
226
- system_prompt: Optional[str] = None,
227
- model: str = "llama-3.3-70b-versatile",
228
- temperature: float = 0.7,
229
- max_tokens: Optional[int] = None,
230
- top_p: float = 0.9) -> AsyncGenerator[str, None]:
231
  """
232
- Gera resposta em streaming usando Groq.
233
 
234
  Args:
235
- prompt: Prompt para gerar
236
  system_prompt: System prompt
237
- model: Modelo a usar
238
- temperature: Criatividade
239
- max_tokens: Tokens máximos
240
- top_p: Top-p sampling
241
 
242
  Yields:
243
- Chunks de texto da resposta
244
  """
245
  headers = {
246
  "Authorization": f"Bearer {self.api_key}",
@@ -256,7 +193,6 @@ class GroqClient:
256
  "model": model,
257
  "messages": messages,
258
  "temperature": temperature,
259
- "top_p": top_p,
260
  "stream": True,
261
  }
262
 
@@ -291,7 +227,3 @@ class GroqClient:
291
  except Exception as e:
292
  logger.error(f"❌ Erro no streaming: {e}")
293
  raise
294
-
295
-
296
- # Alias para compatibilidade
297
- AsyncGroq = GroqClient
 
1
+ """Cliente Groq usando HTTP requests diretos - Compatível com LLMManager do PARA.AI."""
2
 
3
  import os
4
  import json
5
  import logging
6
  from typing import Optional, Dict, AsyncGenerator
 
 
7
  import aiohttp
8
+ import asyncio
9
 
10
  logger = logging.getLogger(__name__)
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class GroqClient:
14
+ """
15
+ Cliente Groq usando requests HTTP diretos (curl-style).
16
+ Interface compatível com LLMManager do PARA.AI.
17
+ """
18
 
19
  def __init__(self, api_key: Optional[str] = None):
20
  """
21
+ Inicializa cliente Groq.
22
 
23
  Args:
24
  api_key: API key (se None, usa GROQ_API_KEY)
 
31
  self.default_timeout = int(os.getenv('GROQ_TIMEOUT', '120'))
32
  logger.info("✅ GroqClient inicializado (HTTP requests)")
33
 
34
+ async def chat_completion(
35
+ self,
36
+ model: str,
37
+ messages: list,
38
+ temperature: float = 0.7,
39
+ max_tokens: Optional[int] = None,
40
+ response_format: Optional[Dict] = None,
41
+ **kwargs
42
+ ) -> Dict:
43
  """
44
+ Chat completion compatível com LLMManager.
 
45
 
46
  Args:
 
 
47
  model: Modelo a usar
48
+ messages: Lista de mensagens [{"role": "system", "content": "..."}, ...]
49
  temperature: Temperatura (0-2)
50
+ max_tokens: Máximo de tokens
51
+ response_format: Formato resposta (ex: {"type": "json_object"})
52
+ **kwargs: Argumentos adicionais (top_p, etc)
53
 
54
  Returns:
55
+ Dict com {'content': str, 'tokens_input': int, 'tokens_output': int, 'total_tokens': int}
56
  """
 
57
  headers = {
58
  "Authorization": f"Bearer {self.api_key}",
59
  "Content-Type": "application/json"
60
  }
61
 
 
 
 
 
 
 
 
62
  payload = {
63
  "model": model,
64
  "messages": messages,
65
  "temperature": temperature,
 
66
  }
67
 
68
  if max_tokens:
69
  payload["max_tokens"] = max_tokens
70
 
 
71
  if response_format:
72
  payload["response_format"] = response_format
73
 
74
+ # Adiciona kwargs extras (top_p, frequency_penalty, etc)
75
+ for key, value in kwargs.items():
76
+ if key not in payload:
77
+ payload[key] = value
78
+
79
  try:
80
+ logger.debug(f"📤 Groq request: model={model}, temp={temperature}, messages={len(messages)}")
 
81
 
82
  async with aiohttp.ClientSession() as session:
83
  async with session.post(
 
86
  json=payload,
87
  timeout=aiohttp.ClientTimeout(total=self.default_timeout)
88
  ) as response:
 
89
  response.raise_for_status()
 
 
90
  data = await response.json()
91
 
92
+ # Extrai dados da resposta
93
  content = data['choices'][0]['message']['content']
94
  finish_reason = data['choices'][0]['finish_reason']
95
 
96
  usage = data.get('usage', {})
97
  tokens_input = usage.get('prompt_tokens', 0)
98
  tokens_output = usage.get('completion_tokens', 0)
99
+ total_tokens = usage.get('total_tokens', tokens_input + tokens_output)
100
 
101
+ logger.info(f"✅ Groq response: {total_tokens} tokens, finish={finish_reason}")
102
 
103
+ # Retorna dict simples (compatível com LLMManager)
104
+ return {
105
+ 'content': content,
106
+ 'tokens_input': tokens_input,
107
+ 'tokens_output': tokens_output,
108
+ 'total_tokens': total_tokens,
109
+ 'finish_reason': finish_reason,
110
+ 'model': model
111
+ }
112
 
113
  except aiohttp.ClientResponseError as e:
114
  logger.error(f"❌ Groq HTTP Error {e.status}: {e.message}")
 
119
  raise
120
 
121
  except Exception as e:
122
+ logger.error(f"❌ Groq erro: {e}")
123
  raise
124
 
125
+ async def generate(
126
+ self,
127
+ prompt: str,
128
+ system_prompt: Optional[str] = None,
129
+ model: str = "llama-3.3-70b-versatile",
130
+ temperature: float = 0.7,
131
+ max_tokens: Optional[int] = None,
132
+ **kwargs
133
+ ) -> Dict:
134
  """
135
+ Método generate simplificado (wrapper para chat_completion).
136
 
137
  Args:
138
  prompt: Prompt do usuário
139
+ system_prompt: Prompt do sistema (opcional)
140
  model: Modelo
141
  temperature: Temperatura
142
  max_tokens: Max tokens
143
+ **kwargs: Argumentos extras
144
 
145
  Returns:
146
+ Dict com content e tokens
147
  """
 
 
 
 
 
148
  messages = []
149
  if system_prompt:
150
  messages.append({"role": "system", "content": system_prompt})
151
  messages.append({"role": "user", "content": prompt})
152
 
153
+ return await self.chat_completion(
154
+ model=model,
155
+ messages=messages,
156
+ temperature=temperature,
157
+ max_tokens=max_tokens,
158
+ **kwargs
159
+ )
160
+
161
+ async def generate_stream(
162
+ self,
163
+ prompt: str,
164
+ system_prompt: Optional[str] = None,
165
+ model: str = "llama-3.3-70b-versatile",
166
+ temperature: float = 0.7,
167
+ max_tokens: Optional[int] = None
168
+ ) -> AsyncGenerator[str, None]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  """
170
+ Gera resposta em streaming.
171
 
172
  Args:
173
+ prompt: Prompt do usuário
174
  system_prompt: System prompt
175
+ model: Modelo
176
+ temperature: Temperatura
177
+ max_tokens: Max tokens
 
178
 
179
  Yields:
180
+ Chunks de texto
181
  """
182
  headers = {
183
  "Authorization": f"Bearer {self.api_key}",
 
193
  "model": model,
194
  "messages": messages,
195
  "temperature": temperature,
 
196
  "stream": True,
197
  }
198
 
 
227
  except Exception as e:
228
  logger.error(f"❌ Erro no streaming: {e}")
229
  raise