Spaces:
Running
Running
File size: 9,020 Bytes
b0b150b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
"""
MEXAR Core Engine - Groq API Client Wrapper
Provides a unified interface for all Groq API interactions.
"""
import os
import base64
from typing import Optional, List, Dict, Any
from groq import Groq
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class GroqClient:
"""
Unified Groq API client for MEXAR.
Handles LLM, Whisper (audio), and Vision (image) capabilities.
"""
def __init__(self, api_key: Optional[str] = None):
"""
Initialize Groq client with API key.
Args:
api_key: Groq API key. If not provided, reads from GROQ_API_KEY env var.
"""
self.api_key = api_key or os.getenv("GROQ_API_KEY")
if not self.api_key:
raise ValueError("GROQ_API_KEY not found in environment variables")
self.client = Groq(api_key=self.api_key)
# Model configurations (using fast model for better conversational responses)
self.models = {
"chat": "llama-3.1-8b-instant", # Primary LLM (fast & conversational)
"advanced": "llama-3.3-70b-versatile", # Advanced reasoning
"fast": "llama-3.1-8b-instant", # Fast responses
"vision": "meta-llama/llama-4-scout-17b-16e-instruct", # Llama 4 Vision model (Jan 2025)
"whisper": "whisper-large-v3" # Audio transcription
}
def chat_completion(
self,
messages: List[Dict[str, str]],
model: str = "chat",
temperature: float = 0.7,
max_tokens: int = 4096,
json_mode: bool = False
) -> str:
"""
Send a chat completion request.
Args:
messages: List of message dicts with 'role' and 'content'
model: Model key from self.models
temperature: Sampling temperature (0-2)
max_tokens: Maximum tokens in response
json_mode: If True, force JSON output
Returns:
Generated text response
"""
model_name = self.models.get(model, model)
kwargs = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens
}
if json_mode:
kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**kwargs)
return response.choices[0].message.content
def analyze_with_system_prompt(
self,
system_prompt: str,
user_message: str,
model: str = "chat",
json_mode: bool = False
) -> str:
"""
Convenience method for system + user message pattern.
Args:
system_prompt: System instructions
user_message: User query
model: Model to use
json_mode: If True, force JSON output
Returns:
Generated response
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
]
return self.chat_completion(messages, model=model, json_mode=json_mode)
def transcribe_audio(self, audio_path: str, language: str = "en") -> str:
"""
Transcribe audio file using Whisper via direct HTTP request.
Args:
audio_path: Path to audio file
language: Language code (e.g., 'en', 'es')
Returns:
Transcribed text
"""
import requests
from pathlib import Path
url = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": f"Bearer {self.api_key}"
}
audio_file_path = Path(audio_path)
# Determine the correct mime type
ext = audio_file_path.suffix.lower()
mime_types = {
".mp3": "audio/mpeg",
".wav": "audio/wav",
".m4a": "audio/mp4",
".ogg": "audio/ogg",
".flac": "audio/flac",
".webm": "audio/webm"
}
mime_type = mime_types.get(ext, "audio/mpeg")
with open(audio_path, "rb") as audio_file:
files = {
"file": (audio_file_path.name, audio_file, mime_type)
}
data = {
"model": "whisper-large-v3-turbo",
"language": language
}
response = requests.post(url, headers=headers, files=files, data=data, timeout=60)
if response.status_code == 200:
result = response.json()
return result.get("text", "")
else:
raise Exception(f"Groq Whisper API error: {response.status_code} - {response.text}")
def describe_image(
self,
image_path: str,
prompt: str = "Describe this image in detail.",
max_tokens: int = 1024
) -> str:
"""
Describe an image using Vision model.
Args:
image_path: Path to image file
prompt: Question about the image
max_tokens: Maximum response tokens
Returns:
Image description
"""
import logging
logger = logging.getLogger(__name__)
logger.info(f"[GROQ VISION] Starting image analysis for: {image_path}")
logger.info(f"[GROQ VISION] Prompt: {prompt[:100]}...")
# Verify file exists
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file does not exist: {image_path}")
# Get file size
file_size = os.path.getsize(image_path)
logger.info(f"[GROQ VISION] Image file size: {file_size} bytes")
# Read and encode image
with open(image_path, "rb") as img_file:
image_bytes = img_file.read()
image_data = base64.b64encode(image_bytes).decode("utf-8")
logger.info(f"[GROQ VISION] Image encoded to base64, length: {len(image_data)} chars")
# Detect image type from extension
ext = os.path.splitext(image_path)[1].lower()
mime_types = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp"
}
mime_type = mime_types.get(ext, "image/jpeg")
logger.info(f"[GROQ VISION] Detected MIME type: {mime_type}")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{image_data}"
}
}
]
}
]
logger.info(f"[GROQ VISION] Calling Groq API with model: {self.models['vision']}")
try:
response = self.client.chat.completions.create(
model=self.models["vision"],
messages=messages,
max_tokens=max_tokens,
temperature=0.7
)
result = response.choices[0].message.content
logger.info(f"[GROQ VISION] Success! Response length: {len(result)} chars")
logger.info(f"[GROQ VISION] Response preview: {result[:200]}...")
return result
except Exception as e:
logger.error(f"[GROQ VISION] API call failed: {type(e).__name__}: {str(e)}")
raise
def extract_json(self, text: str, schema_description: str) -> Dict[str, Any]:
"""
Extract structured JSON from text.
Args:
text: Input text to analyze
schema_description: Description of expected JSON structure
Returns:
Parsed JSON dictionary
"""
import json
system_prompt = f"""You are a JSON extraction assistant.
Extract structured data from the given text and return ONLY valid JSON.
Expected structure: {schema_description}
Do not include any explanation, only the JSON object."""
response = self.analyze_with_system_prompt(
system_prompt=system_prompt,
user_message=text,
model="fast",
json_mode=True
)
return json.loads(response)
# Singleton instance for easy importing
_client_instance: Optional[GroqClient] = None
def get_groq_client() -> GroqClient:
"""Get or create the singleton Groq client instance."""
global _client_instance
if _client_instance is None:
_client_instance = GroqClient()
return _client_instance
|