nothingworry commited on
Commit
da3f5f6
·
1 Parent(s): 62e82b2

Add Groq API support for Hugging Face Spaces deployment

Browse files
Files changed (2) hide show
  1. backend/api/services/llm_client.py +100 -3
  2. env.example +12 -6
backend/api/services/llm_client.py CHANGED
@@ -9,7 +9,11 @@ class LLMClient:
9
  self.backend = backend
10
  self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
11
  self.api_key = api_key or os.getenv("GROQ_API_KEY")
12
- self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
 
 
 
 
13
  self.http = httpx.AsyncClient(timeout=30)
14
 
15
 
@@ -51,7 +55,47 @@ class LLMClient:
51
  )
52
  except Exception as e:
53
  raise RuntimeError(f"LLM call failed: {str(e)}")
54
- raise RuntimeError("Unsupported backend")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
57
  """Stream LLM response token by token."""
@@ -93,5 +137,58 @@ class LLMClient:
93
  )
94
  except Exception as e:
95
  raise RuntimeError(f"LLM streaming failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  else:
97
- raise RuntimeError("Streaming not supported for this backend")
 
9
  self.backend = backend
10
  self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
11
  self.api_key = api_key or os.getenv("GROQ_API_KEY")
12
+ # Default model based on backend
13
+ if backend == "groq":
14
+ self.model = model or os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile")
15
+ else:
16
+ self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
17
  self.http = httpx.AsyncClient(timeout=30)
18
 
19
 
 
55
  )
56
  except Exception as e:
57
  raise RuntimeError(f"LLM call failed: {str(e)}")
58
+ elif self.backend == "groq":
59
+ if not self.api_key:
60
+ raise RuntimeError(
61
+ "Groq API key not configured. Set GROQ_API_KEY environment variable. "
62
+ "Get a free API key at https://console.groq.com"
63
+ )
64
+ if not self.model:
65
+ raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
66
+
67
+ try:
68
+ # Groq uses OpenAI-compatible API
69
+ r = await self.http.post(
70
+ "https://api.groq.com/openai/v1/chat/completions",
71
+ headers={
72
+ "Authorization": f"Bearer {self.api_key}",
73
+ "Content-Type": "application/json"
74
+ },
75
+ json={
76
+ "model": self.model,
77
+ "messages": [
78
+ {"role": "user", "content": prompt}
79
+ ],
80
+ "temperature": temperature,
81
+ "stream": False
82
+ }
83
+ )
84
+ r.raise_for_status()
85
+ response_data = r.json()
86
+ return response_data["choices"][0]["message"]["content"]
87
+ except httpx.HTTPStatusError as e:
88
+ error_detail = "Unknown error"
89
+ try:
90
+ error_json = e.response.json()
91
+ error_detail = error_json.get("error", {}).get("message", str(error_json))
92
+ except:
93
+ error_detail = e.response.text
94
+ raise RuntimeError(f"Groq API error: HTTP {e.response.status_code} - {error_detail}")
95
+ except Exception as e:
96
+ raise RuntimeError(f"Groq API call failed: {str(e)}")
97
+ else:
98
+ raise RuntimeError(f"Unsupported backend: {self.backend}. Supported backends: 'ollama', 'groq'")
99
 
100
  async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
101
  """Stream LLM response token by token."""
 
137
  )
138
  except Exception as e:
139
  raise RuntimeError(f"LLM streaming failed: {str(e)}")
140
+ elif self.backend == "groq":
141
+ if not self.api_key:
142
+ raise RuntimeError(
143
+ "Groq API key not configured. Set GROQ_API_KEY environment variable. "
144
+ "Get a free API key at https://console.groq.com"
145
+ )
146
+ if not self.model:
147
+ raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
148
+
149
+ try:
150
+ async with httpx.AsyncClient(timeout=300.0) as client:
151
+ async with client.stream(
152
+ "POST",
153
+ "https://api.groq.com/openai/v1/chat/completions",
154
+ headers={
155
+ "Authorization": f"Bearer {self.api_key}",
156
+ "Content-Type": "application/json"
157
+ },
158
+ json={
159
+ "model": self.model,
160
+ "messages": [
161
+ {"role": "user", "content": prompt}
162
+ ],
163
+ "temperature": temperature,
164
+ "stream": True
165
+ }
166
+ ) as response:
167
+ response.raise_for_status()
168
+ async for line in response.aiter_lines():
169
+ if line:
170
+ # Groq uses Server-Sent Events format
171
+ if line.startswith("data: "):
172
+ data_str = line[6:] # Remove "data: " prefix
173
+ if data_str.strip() == "[DONE]":
174
+ break
175
+ try:
176
+ data = json.loads(data_str)
177
+ delta = data.get("choices", [{}])[0].get("delta", {})
178
+ token = delta.get("content", "")
179
+ if token:
180
+ yield token
181
+ except json.JSONDecodeError:
182
+ continue
183
+ except httpx.HTTPStatusError as e:
184
+ error_detail = "Unknown error"
185
+ try:
186
+ error_json = e.response.json()
187
+ error_detail = error_json.get("error", {}).get("message", str(error_json))
188
+ except:
189
+ error_detail = e.response.text
190
+ raise RuntimeError(f"Groq API streaming error: HTTP {e.response.status_code} - {error_detail}")
191
+ except Exception as e:
192
+ raise RuntimeError(f"Groq API streaming failed: {str(e)}")
193
  else:
194
+ raise RuntimeError(f"Streaming not supported for backend: {self.backend}")
env.example CHANGED
@@ -13,12 +13,18 @@ POSTGRESQL_URL=postgresql://user:password@host:port/database
13
  # =============================================================
14
  # LLM CONFIGURATION
15
  # =============================================================
16
- # If using local Ollama
17
- OLLAMA_URL=http://localhost:11434
18
- OLLAMA_MODEL=llama3.1:latest
19
-
20
- # Backend selection (optional, defaults to "ollama")
21
- LLM_BACKEND=ollama
 
 
 
 
 
 
22
 
23
  # =============================================================
24
  # MCP SERVER CONFIG
 
13
  # =============================================================
14
  # LLM CONFIGURATION
15
  # =============================================================
16
+ # Backend selection: "ollama" (local) or "groq" (cloud API)
17
+ # For Hugging Face Spaces, use "groq"
18
+ LLM_BACKEND=groq
19
+
20
+ # Option 1: Using Groq API (recommended for Hugging Face Spaces)
21
+ # Get free API key at https://console.groq.com
22
+ GROQ_API_KEY=your_groq_api_key_here
23
+ GROQ_MODEL=llama-3.1-70b-versatile
24
+
25
+ # Option 2: Using local Ollama (for local development)
26
+ # OLLAMA_URL=http://localhost:11434
27
+ # OLLAMA_MODEL=llama3.1:latest
28
 
29
  # =============================================================
30
  # MCP SERVER CONFIG