Mexar / backend /utils /groq_client.py
Devrajsinh bharatsinh gohil
Initial commit of MEXAR Ultimate - Phase 2 cleanup complete
b0b150b
"""
MEXAR Core Engine - Groq API Client Wrapper
Provides a unified interface for all Groq API interactions.
"""
import os
import base64
from typing import Optional, List, Dict, Any
from groq import Groq
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class GroqClient:
"""
Unified Groq API client for MEXAR.
Handles LLM, Whisper (audio), and Vision (image) capabilities.
"""
def __init__(self, api_key: Optional[str] = None):
"""
Initialize Groq client with API key.
Args:
api_key: Groq API key. If not provided, reads from GROQ_API_KEY env var.
"""
self.api_key = api_key or os.getenv("GROQ_API_KEY")
if not self.api_key:
raise ValueError("GROQ_API_KEY not found in environment variables")
self.client = Groq(api_key=self.api_key)
# Model configurations (using fast model for better conversational responses)
self.models = {
"chat": "llama-3.1-8b-instant", # Primary LLM (fast & conversational)
"advanced": "llama-3.3-70b-versatile", # Advanced reasoning
"fast": "llama-3.1-8b-instant", # Fast responses
"vision": "meta-llama/llama-4-scout-17b-16e-instruct", # Llama 4 Vision model (Jan 2025)
"whisper": "whisper-large-v3" # Audio transcription
}
def chat_completion(
self,
messages: List[Dict[str, str]],
model: str = "chat",
temperature: float = 0.7,
max_tokens: int = 4096,
json_mode: bool = False
) -> str:
"""
Send a chat completion request.
Args:
messages: List of message dicts with 'role' and 'content'
model: Model key from self.models
temperature: Sampling temperature (0-2)
max_tokens: Maximum tokens in response
json_mode: If True, force JSON output
Returns:
Generated text response
"""
model_name = self.models.get(model, model)
kwargs = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens
}
if json_mode:
kwargs["response_format"] = {"type": "json_object"}
response = self.client.chat.completions.create(**kwargs)
return response.choices[0].message.content
def analyze_with_system_prompt(
self,
system_prompt: str,
user_message: str,
model: str = "chat",
json_mode: bool = False
) -> str:
"""
Convenience method for system + user message pattern.
Args:
system_prompt: System instructions
user_message: User query
model: Model to use
json_mode: If True, force JSON output
Returns:
Generated response
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
]
return self.chat_completion(messages, model=model, json_mode=json_mode)
def transcribe_audio(self, audio_path: str, language: str = "en") -> str:
"""
Transcribe audio file using Whisper via direct HTTP request.
Args:
audio_path: Path to audio file
language: Language code (e.g., 'en', 'es')
Returns:
Transcribed text
"""
import requests
from pathlib import Path
url = "https://api.groq.com/openai/v1/audio/transcriptions"
headers = {
"Authorization": f"Bearer {self.api_key}"
}
audio_file_path = Path(audio_path)
# Determine the correct mime type
ext = audio_file_path.suffix.lower()
mime_types = {
".mp3": "audio/mpeg",
".wav": "audio/wav",
".m4a": "audio/mp4",
".ogg": "audio/ogg",
".flac": "audio/flac",
".webm": "audio/webm"
}
mime_type = mime_types.get(ext, "audio/mpeg")
with open(audio_path, "rb") as audio_file:
files = {
"file": (audio_file_path.name, audio_file, mime_type)
}
data = {
"model": "whisper-large-v3-turbo",
"language": language
}
response = requests.post(url, headers=headers, files=files, data=data, timeout=60)
if response.status_code == 200:
result = response.json()
return result.get("text", "")
else:
raise Exception(f"Groq Whisper API error: {response.status_code} - {response.text}")
def describe_image(
self,
image_path: str,
prompt: str = "Describe this image in detail.",
max_tokens: int = 1024
) -> str:
"""
Describe an image using Vision model.
Args:
image_path: Path to image file
prompt: Question about the image
max_tokens: Maximum response tokens
Returns:
Image description
"""
import logging
logger = logging.getLogger(__name__)
logger.info(f"[GROQ VISION] Starting image analysis for: {image_path}")
logger.info(f"[GROQ VISION] Prompt: {prompt[:100]}...")
# Verify file exists
if not os.path.exists(image_path):
raise FileNotFoundError(f"Image file does not exist: {image_path}")
# Get file size
file_size = os.path.getsize(image_path)
logger.info(f"[GROQ VISION] Image file size: {file_size} bytes")
# Read and encode image
with open(image_path, "rb") as img_file:
image_bytes = img_file.read()
image_data = base64.b64encode(image_bytes).decode("utf-8")
logger.info(f"[GROQ VISION] Image encoded to base64, length: {len(image_data)} chars")
# Detect image type from extension
ext = os.path.splitext(image_path)[1].lower()
mime_types = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp"
}
mime_type = mime_types.get(ext, "image/jpeg")
logger.info(f"[GROQ VISION] Detected MIME type: {mime_type}")
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:{mime_type};base64,{image_data}"
}
}
]
}
]
logger.info(f"[GROQ VISION] Calling Groq API with model: {self.models['vision']}")
try:
response = self.client.chat.completions.create(
model=self.models["vision"],
messages=messages,
max_tokens=max_tokens,
temperature=0.7
)
result = response.choices[0].message.content
logger.info(f"[GROQ VISION] Success! Response length: {len(result)} chars")
logger.info(f"[GROQ VISION] Response preview: {result[:200]}...")
return result
except Exception as e:
logger.error(f"[GROQ VISION] API call failed: {type(e).__name__}: {str(e)}")
raise
def extract_json(self, text: str, schema_description: str) -> Dict[str, Any]:
"""
Extract structured JSON from text.
Args:
text: Input text to analyze
schema_description: Description of expected JSON structure
Returns:
Parsed JSON dictionary
"""
import json
system_prompt = f"""You are a JSON extraction assistant.
Extract structured data from the given text and return ONLY valid JSON.
Expected structure: {schema_description}
Do not include any explanation, only the JSON object."""
response = self.analyze_with_system_prompt(
system_prompt=system_prompt,
user_message=text,
model="fast",
json_mode=True
)
return json.loads(response)
# Singleton instance for easy importing
_client_instance: Optional[GroqClient] = None
def get_groq_client() -> GroqClient:
"""Get or create the singleton Groq client instance."""
global _client_instance
if _client_instance is None:
_client_instance = GroqClient()
return _client_instance