Spaces:

nothingworry
/

IntegraChat

Sleeping

App Files Files Community

nothingworry commited on Dec 5, 2025

Commit

da3f5f6

1 Parent(s): 62e82b2

Add Groq API support for Hugging Face Spaces deployment

Browse files

Files changed (2) hide show

backend/api/services/llm_client.py +100 -3
env.example +12 -6

backend/api/services/llm_client.py CHANGED Viewed

@@ -9,7 +9,11 @@ class LLMClient:
         self.backend = backend
         self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
         self.api_key = api_key or os.getenv("GROQ_API_KEY")
-        self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
         self.http = httpx.AsyncClient(timeout=30)
@@ -51,7 +55,47 @@ class LLMClient:
                 )
             except Exception as e:
                 raise RuntimeError(f"LLM call failed: {str(e)}")
-        raise RuntimeError("Unsupported backend")
     async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
         """Stream LLM response token by token."""
@@ -93,5 +137,58 @@ class LLMClient:
                 )
             except Exception as e:
                 raise RuntimeError(f"LLM streaming failed: {str(e)}")
         else:
-            raise RuntimeError("Streaming not supported for this backend")

         self.backend = backend
         self.url = url or os.getenv("OLLAMA_URL", "http://localhost:11434")
         self.api_key = api_key or os.getenv("GROQ_API_KEY")
+        # Default model based on backend
+        if backend == "groq":
+            self.model = model or os.getenv("GROQ_MODEL", "llama-3.1-70b-versatile")
+        else:
+            self.model = model or os.getenv("OLLAMA_MODEL", "llama3.1:latest")
         self.http = httpx.AsyncClient(timeout=30)
                 )
             except Exception as e:
                 raise RuntimeError(f"LLM call failed: {str(e)}")
+        elif self.backend == "groq":
+            if not self.api_key:
+                raise RuntimeError(
+                    "Groq API key not configured. Set GROQ_API_KEY environment variable. "
+                    "Get a free API key at https://console.groq.com"
+                )
+            if not self.model:
+                raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
+            try:
+                # Groq uses OpenAI-compatible API
+                r = await self.http.post(
+                    "https://api.groq.com/openai/v1/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {self.api_key}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": self.model,
+                        "messages": [
+                            {"role": "user", "content": prompt}
+                        ],
+                        "temperature": temperature,
+                        "stream": False
+                    }
+                )
+                r.raise_for_status()
+                response_data = r.json()
+                return response_data["choices"][0]["message"]["content"]
+            except httpx.HTTPStatusError as e:
+                error_detail = "Unknown error"
+                try:
+                    error_json = e.response.json()
+                    error_detail = error_json.get("error", {}).get("message", str(error_json))
+                except:
+                    error_detail = e.response.text
+                raise RuntimeError(f"Groq API error: HTTP {e.response.status_code} - {error_detail}")
+            except Exception as e:
+                raise RuntimeError(f"Groq API call failed: {str(e)}")
+        else:
+            raise RuntimeError(f"Unsupported backend: {self.backend}. Supported backends: 'ollama', 'groq'")
     async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
         """Stream LLM response token by token."""
                 )
             except Exception as e:
                 raise RuntimeError(f"LLM streaming failed: {str(e)}")
+        elif self.backend == "groq":
+            if not self.api_key:
+                raise RuntimeError(
+                    "Groq API key not configured. Set GROQ_API_KEY environment variable. "
+                    "Get a free API key at https://console.groq.com"
+                )
+            if not self.model:
+                raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
+            try:
+                async with httpx.AsyncClient(timeout=300.0) as client:
+                    async with client.stream(
+                        "POST",
+                        "https://api.groq.com/openai/v1/chat/completions",
+                        headers={
+                            "Authorization": f"Bearer {self.api_key}",
+                            "Content-Type": "application/json"
+                        },
+                        json={
+                            "model": self.model,
+                            "messages": [
+                                {"role": "user", "content": prompt}
+                            ],
+                            "temperature": temperature,
+                            "stream": True
+                        }
+                    ) as response:
+                        response.raise_for_status()
+                        async for line in response.aiter_lines():
+                            if line:
+                                # Groq uses Server-Sent Events format
+                                if line.startswith("data: "):
+                                    data_str = line[6:]  # Remove "data: " prefix
+                                    if data_str.strip() == "[DONE]":
+                                        break
+                                    try:
+                                        data = json.loads(data_str)
+                                        delta = data.get("choices", [{}])[0].get("delta", {})
+                                        token = delta.get("content", "")
+                                        if token:
+                                            yield token
+                                    except json.JSONDecodeError:
+                                        continue
+            except httpx.HTTPStatusError as e:
+                error_detail = "Unknown error"
+                try:
+                    error_json = e.response.json()
+                    error_detail = error_json.get("error", {}).get("message", str(error_json))
+                except:
+                    error_detail = e.response.text
+                raise RuntimeError(f"Groq API streaming error: HTTP {e.response.status_code} - {error_detail}")
+            except Exception as e:
+                raise RuntimeError(f"Groq API streaming failed: {str(e)}")
         else:
+            raise RuntimeError(f"Streaming not supported for backend: {self.backend}")

env.example CHANGED Viewed

@@ -13,12 +13,18 @@ POSTGRESQL_URL=postgresql://user:password@host:port/database
 # =============================================================
 # LLM CONFIGURATION
 # =============================================================
-# If using local Ollama
-OLLAMA_URL=http://localhost:11434
-OLLAMA_MODEL=llama3.1:latest
-# Backend selection (optional, defaults to "ollama")
-LLM_BACKEND=ollama
 # =============================================================
 # MCP SERVER CONFIG

 # =============================================================
 # LLM CONFIGURATION
 # =============================================================
+# Backend selection: "ollama" (local) or "groq" (cloud API)
+# For Hugging Face Spaces, use "groq"
+LLM_BACKEND=groq
+# Option 1: Using Groq API (recommended for Hugging Face Spaces)
+# Get free API key at https://console.groq.com
+GROQ_API_KEY=your_groq_api_key_here
+GROQ_MODEL=llama-3.1-70b-versatile
+# Option 2: Using local Ollama (for local development)
+# OLLAMA_URL=http://localhost:11434
+# OLLAMA_MODEL=llama3.1:latest
 # =============================================================
 # MCP SERVER CONFIG