File size: 4,720 Bytes
2f235a0
 
a477044
c16e1c9
20a1017
 
 
0452a50
2f235a0
0452a50
2f235a0
20a1017
2f235a0
0452a50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da3f5f6
0452a50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da3f5f6
 
 
 
 
 
 
 
 
 
 
0452a50
da3f5f6
0452a50
 
 
 
 
 
 
 
 
a477044
0452a50
 
 
a477044
 
 
 
0452a50
 
da3f5f6
0452a50
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os, json
import httpx
from typing import AsyncGenerator


class LLMClient:

    def __init__(self, api_key=None, model=None):
        self.api_key = api_key or os.getenv("GROQ_API_KEY")
        self.model = model or os.getenv("GROQ_MODEL", "llama-3.1-8b-instant")
        self.http = httpx.AsyncClient(timeout=30)

    async def simple_call(self, prompt: str, temperature: float = 0.0) -> str:
        if not self.api_key:
            raise RuntimeError(
                "Groq API key not configured. Set GROQ_API_KEY environment variable. "
                "Get a free API key at https://console.groq.com"
            )
        if not self.model:
            raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
        
        try:
            # Groq uses OpenAI-compatible API
            r = await self.http.post(
                "https://api.groq.com/openai/v1/chat/completions",
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json"
                },
                json={
                    "model": self.model,
                    "messages": [
                        {"role": "user", "content": prompt}
                    ],
                    "temperature": temperature,
                    "stream": False
                }
            )
            r.raise_for_status()
            response_data = r.json()
            return response_data["choices"][0]["message"]["content"]
        except httpx.HTTPStatusError as e:
            error_detail = "Unknown error"
            try:
                error_json = e.response.json()
                error_detail = error_json.get("error", {}).get("message", str(error_json))
            except:
                error_detail = e.response.text
            raise RuntimeError(f"Groq API error: HTTP {e.response.status_code} - {error_detail}")
        except Exception as e:
            raise RuntimeError(f"Groq API call failed: {str(e)}")

    async def stream_call(self, prompt: str, temperature: float = 0.0) -> AsyncGenerator[str, None]:
        """Stream LLM response token by token."""
        if not self.api_key:
            raise RuntimeError(
                "Groq API key not configured. Set GROQ_API_KEY environment variable. "
                "Get a free API key at https://console.groq.com"
            )
        if not self.model:
            raise RuntimeError("Groq model not configured. Set GROQ_MODEL environment variable.")
        
        try:
            async with httpx.AsyncClient(timeout=300.0) as client:
                async with client.stream(
                    "POST",
                    "https://api.groq.com/openai/v1/chat/completions",
                    headers={
                        "Authorization": f"Bearer {self.api_key}",
                        "Content-Type": "application/json"
                    },
                    json={
                        "model": self.model,
                        "messages": [
                            {"role": "user", "content": prompt}
                        ],
                        "temperature": temperature,
                        "stream": True
                    }
                ) as response:
                    response.raise_for_status()
                    async for line in response.aiter_lines():
                        if line:
                            # Groq uses Server-Sent Events format
                            if line.startswith("data: "):
                                data_str = line[6:]  # Remove "data: " prefix
                                if data_str.strip() == "[DONE]":
                                    break
                                try:
                                    data = json.loads(data_str)
                                    delta = data.get("choices", [{}])[0].get("delta", {})
                                    token = delta.get("content", "")
                                    if token:
                                        yield token
                                except json.JSONDecodeError:
                                    continue
        except httpx.HTTPStatusError as e:
            error_detail = "Unknown error"
            try:
                error_json = e.response.json()
                error_detail = error_json.get("error", {}).get("message", str(error_json))
            except:
                error_detail = e.response.text
            raise RuntimeError(f"Groq API streaming error: HTTP {e.response.status_code} - {error_detail}")
        except Exception as e:
            raise RuntimeError(f"Groq API streaming failed: {str(e)}")