caarleexx commited on
Commit
30f4353
·
verified ·
1 Parent(s): d24bb0a

Upload groq_client.py

Browse files
Files changed (1) hide show
  1. llm/clients/groq_client.py +224 -88
llm/clients/groq_client.py CHANGED
@@ -1,15 +1,11 @@
1
- """Cliente para Groq API."""
2
 
3
  import os
4
- import asyncio
5
- from typing import Optional, AsyncGenerator
6
- from dataclasses import dataclass
7
  import logging
8
-
9
- try:
10
- from groq import Groq, AsyncGroq
11
- except ImportError:
12
- raise ImportError("Instale groq: pip install groq")
13
 
14
  logger = logging.getLogger(__name__)
15
 
@@ -20,109 +16,249 @@ class GroqResponse:
20
  content: str
21
  tokens_input: int
22
  tokens_output: int
 
23
  finish_reason: str
 
24
 
25
 
26
  class GroqClient:
27
- """Cliente para Groq."""
28
-
29
  def __init__(self, api_key: Optional[str] = None):
30
  """
31
- Inicializa cliente Groq.
32
-
33
  Args:
34
  api_key: API key (se None, usa GROQ_API_KEY)
35
  """
36
  self.api_key = api_key or os.getenv('GROQ_API_KEY')
37
  if not self.api_key:
38
  raise ValueError("GROQ_API_KEY não configurada")
39
-
40
- self.client = Groq(api_key=self.api_key)
41
- self.async_client = AsyncGroq(api_key=self.api_key)
42
- logger.info("GroqClient inicializado")
43
-
44
- async def generate(self,
45
- prompt: str,
46
- model: str = "openai/gpt-oss-120b",
47
- temperature: float = 0.7,
48
- max_tokens: Optional[int] = None,
49
- top_p: float = 0.9) -> GroqResponse:
 
 
50
  """
51
- Gera resposta usando Groq.
52
-
53
  Args:
54
- prompt: Prompt para gerar
 
55
  model: Modelo a usar
56
- temperature: Criatividade
57
- max_tokens: Tokens máximos
58
- top_p: Top-p sampling
59
-
 
60
  Returns:
61
- GroqResponse
62
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  try:
64
- response = await self.async_client.chat.completions.create(
65
- model=model,
66
- messages=[{"role": "user", "content": prompt}],
67
- temperature=temperature,
68
- max_tokens=max_tokens,
69
- top_p=top_p,
70
- stream=False,
 
71
  )
72
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return GroqResponse(
74
- content=response.choices[0].message.content,
75
- tokens_input=response.usage.prompt_tokens,
76
- tokens_output=response.usage.completion_tokens,
77
- finish_reason=response.choices[0].finish_reason,
 
 
78
  )
79
-
 
 
 
 
 
 
 
 
 
 
80
  except Exception as e:
81
- logger.error(f"Erro ao gerar com Groq: {e}")
82
  raise
83
-
84
- async def generate_stream(self,
85
- prompt: str,
86
- model: str = "mixtral-8x7b-32768",
87
- temperature: float = 0.7,
88
- max_tokens: Optional[int] = None,
89
- top_p: float = 0.9) -> AsyncGenerator[str, None]:
90
  """
91
- Gera resposta em streaming usando Groq.
92
-
93
  Args:
94
- prompt: Prompt para gerar
95
- model: Modelo a usar
96
- temperature: Criatividade
97
- max_tokens: Tokens máximos
98
- top_p: Top-p sampling
99
-
100
- Yields:
101
- Chunks de texto
102
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  try:
104
- stream = await self.async_client.chat.completions.create(
105
- model=model,
106
- messages=[{"role": "user", "content": prompt}],
107
- temperature=temperature,
108
- max_tokens=max_tokens,
109
- top_p=top_p,
110
- stream=True,
111
- )
112
-
113
- async for chunk in stream:
114
- if chunk.choices[0].delta.content:
115
- yield chunk.choices[0].delta.content
116
-
117
- except Exception as e:
118
- logger.error(f"Erro ao gerar stream com Groq: {e}")
119
- raise
120
-
121
- def list_models(self) -> list:
122
- """Lista modelos disponíveis."""
123
- try:
124
- models = self.client.models.list()
125
- return [m.id for m in models.data]
126
- except Exception as e:
127
- logger.error(f"Erro ao listar modelos: {e}")
128
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cliente Groq usando HTTP requests diretos (curl-style)."""
2
 
3
  import os
4
+ import json
 
 
5
  import logging
6
+ from typing import Optional, Dict, List
7
+ from dataclasses import dataclass
8
+ import requests
 
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
16
  content: str
17
  tokens_input: int
18
  tokens_output: int
19
+ total_tokens: int
20
  finish_reason: str
21
+ model: str
22
 
23
 
24
  class GroqClient:
25
+ """Cliente Groq usando requests HTTP diretos (curl-style)."""
26
+
27
  def __init__(self, api_key: Optional[str] = None):
28
  """
29
+ Inicializa cliente Groq com requests.
30
+
31
  Args:
32
  api_key: API key (se None, usa GROQ_API_KEY)
33
  """
34
  self.api_key = api_key or os.getenv('GROQ_API_KEY')
35
  if not self.api_key:
36
  raise ValueError("GROQ_API_KEY não configurada")
37
+
38
+ self.base_url = "https://api.groq.com/openai/v1/chat/completions"
39
+ self.default_timeout = int(os.getenv('GROQ_TIMEOUT', '120'))
40
+ logger.info("GroqClient inicializado (HTTP requests)")
41
+
42
+ def generate(self,
43
+ prompt: str,
44
+ system_prompt: Optional[str] = None,
45
+ model: str = "llama-3.3-70b-versatile",
46
+ temperature: float = 0.7,
47
+ max_tokens: Optional[int] = None,
48
+ response_format: Optional[Dict] = None,
49
+ timeout: Optional[int] = None) -> GroqResponse:
50
  """
51
+ Gera resposta usando Groq via HTTP requests (curl-style).
52
+
53
  Args:
54
+ prompt: Prompt do usuário
55
+ system_prompt: Prompt do sistema (com schema JSON)
56
  model: Modelo a usar
57
+ temperature: Temperatura (0-2)
58
+ max_tokens: Máximo de tokens na resposta
59
+ response_format: Formato da resposta (ex: {"type": "json_object"})
60
+ timeout: Timeout da requisição
61
+
62
  Returns:
63
+ GroqResponse com content, tokens e metadados
64
  """
65
+ # Monta headers
66
+ headers = {
67
+ "Authorization": f"Bearer {self.api_key}",
68
+ "Content-Type": "application/json"
69
+ }
70
+
71
+ # Monta messages
72
+ messages = []
73
+ if system_prompt:
74
+ messages.append({"role": "system", "content": system_prompt})
75
+ messages.append({"role": "user", "content": prompt})
76
+
77
+ # Monta payload
78
+ payload = {
79
+ "model": model,
80
+ "messages": messages,
81
+ "temperature": temperature,
82
+ }
83
+
84
+ if max_tokens:
85
+ payload["max_tokens"] = max_tokens
86
+
87
+ # Adiciona response_format se fornecido
88
+ if response_format:
89
+ payload["response_format"] = response_format
90
+
91
+ # Timeout
92
+ request_timeout = timeout or self.default_timeout
93
+
94
  try:
95
+ # Requisição HTTP POST (curl-style)
96
+ logger.debug(f"Enviando request para Groq: model={model}, temp={temperature}")
97
+
98
+ response = requests.post(
99
+ self.base_url,
100
+ headers=headers,
101
+ json=payload,
102
+ timeout=request_timeout
103
  )
104
+
105
+ # Verifica erros HTTP
106
+ response.raise_for_status()
107
+
108
+ # Parse response JSON
109
+ data = response.json()
110
+
111
+ # Extrai dados
112
+ content = data['choices'][0]['message']['content']
113
+ finish_reason = data['choices'][0]['finish_reason']
114
+
115
+ usage = data.get('usage', {})
116
+ tokens_input = usage.get('prompt_tokens', 0)
117
+ tokens_output = usage.get('completion_tokens', 0)
118
+ total_tokens = usage.get('total_tokens', tokens_input + tokens_output)
119
+
120
+ logger.info(f"✅ Groq response: {total_tokens} tokens, finish={finish_reason}")
121
+
122
  return GroqResponse(
123
+ content=content,
124
+ tokens_input=tokens_input,
125
+ tokens_output=tokens_output,
126
+ total_tokens=total_tokens,
127
+ finish_reason=finish_reason,
128
+ model=model
129
  )
130
+
131
+ except requests.exceptions.HTTPError as e:
132
+ status_code = e.response.status_code if e.response else 0
133
+ error_body = e.response.text if e.response else str(e)
134
+ logger.error(f"❌ Groq HTTP Error {status_code}: {error_body}")
135
+ raise
136
+
137
+ except requests.exceptions.Timeout:
138
+ logger.error(f"❌ Groq timeout após {request_timeout}s")
139
+ raise
140
+
141
  except Exception as e:
142
+ logger.error(f" Groq erro inesperado: {e}")
143
  raise
144
+
145
+ def generate_with_retry(self,
146
+ prompt: str,
147
+ system_prompt: Optional[str] = None,
148
+ max_retries: int = 3,
149
+ **kwargs) -> GroqResponse:
 
150
  """
151
+ Gera com retry automático para erros 429/503.
152
+
153
  Args:
154
+ prompt: Prompt do usuário
155
+ system_prompt: System prompt
156
+ max_retries: Número máximo de tentativas
157
+ **kwargs: Argumentos adicionais para generate()
158
+
159
+ Returns:
160
+ GroqResponse
 
161
  """
162
+ import time
163
+ import random
164
+
165
+ for tentativa in range(1, max_retries + 1):
166
+ try:
167
+ return self.generate(prompt, system_prompt, **kwargs)
168
+
169
+ except requests.exceptions.HTTPError as e:
170
+ status_code = e.response.status_code if e.response else 0
171
+
172
+ # Rate limit ou service unavailable
173
+ if status_code in [429, 503] and tentativa < max_retries:
174
+ delay = (2 ** tentativa) + random.uniform(0, 2)
175
+ logger.warning(f"🔄 Groq {status_code} - retry {tentativa}/{max_retries} em {delay:.1f}s")
176
+ time.sleep(delay)
177
+ continue
178
+
179
+ # Outros erros HTTP - propaga
180
+ raise
181
+
182
+ except Exception as e:
183
+ # Erros não-HTTP - propaga imediatamente
184
+ raise
185
+
186
+ raise RuntimeError(f"Falha após {max_retries} tentativas")
187
+
188
+
189
+ # ============================================================================
190
+ # ASYNC VERSION (Opcional - mantém compatibilidade)
191
+ # ============================================================================
192
+
193
+ class AsyncGroqClient:
194
+ """Cliente Groq assíncrono usando aiohttp."""
195
+
196
+ def __init__(self, api_key: Optional[str] = None):
197
+ self.api_key = api_key or os.getenv('GROQ_API_KEY')
198
+ if not self.api_key:
199
+ raise ValueError("GROQ_API_KEY não configurada")
200
+
201
+ self.base_url = "https://api.groq.com/openai/v1/chat/completions"
202
+ self.default_timeout = int(os.getenv('GROQ_TIMEOUT', '120'))
203
+ logger.info("AsyncGroqClient inicializado")
204
+
205
+ async def generate(self,
206
+ prompt: str,
207
+ system_prompt: Optional[str] = None,
208
+ model: str = "llama-3.3-70b-versatile",
209
+ temperature: float = 0.7,
210
+ max_tokens: Optional[int] = None,
211
+ response_format: Optional[Dict] = None) -> GroqResponse:
212
+ """Versão assíncrona do generate."""
213
  try:
214
+ import aiohttp
215
+ except ImportError:
216
+ raise ImportError("aiohttp não instalado: pip install aiohttp")
217
+
218
+ headers = {
219
+ "Authorization": f"Bearer {self.api_key}",
220
+ "Content-Type": "application/json"
221
+ }
222
+
223
+ messages = []
224
+ if system_prompt:
225
+ messages.append({"role": "system", "content": system_prompt})
226
+ messages.append({"role": "user", "content": prompt})
227
+
228
+ payload = {
229
+ "model": model,
230
+ "messages": messages,
231
+ "temperature": temperature,
232
+ }
233
+
234
+ if max_tokens:
235
+ payload["max_tokens"] = max_tokens
236
+ if response_format:
237
+ payload["response_format"] = response_format
238
+
239
+ async with aiohttp.ClientSession() as session:
240
+ async with session.post(
241
+ self.base_url,
242
+ headers=headers,
243
+ json=payload,
244
+ timeout=aiohttp.ClientTimeout(total=self.default_timeout)
245
+ ) as response:
246
+ response.raise_for_status()
247
+ data = await response.json()
248
+
249
+ content = data['choices'][0]['message']['content']
250
+ finish_reason = data['choices'][0]['finish_reason']
251
+
252
+ usage = data.get('usage', {})
253
+ tokens_input = usage.get('prompt_tokens', 0)
254
+ tokens_output = usage.get('completion_tokens', 0)
255
+ total_tokens = usage.get('total_tokens', tokens_input + tokens_output)
256
+
257
+ return GroqResponse(
258
+ content=content,
259
+ tokens_input=tokens_input,
260
+ tokens_output=tokens_output,
261
+ total_tokens=total_tokens,
262
+ finish_reason=finish_reason,
263
+ model=model
264
+ )