Spaces:

jblast94
/

my-voice-agent

Sleeping

File size: 6,662 Bytes

# llm_handler.py (Refactored)
import os
import requests
from typing import Iterator, Optional

# A simple registry to define providers and their models
PROVIDER_CONFIG = {
    "anthropic": {
        "models": {
            "claude-3-5-sonnet-20241022": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"},
            "claude-3-haiku-20240307": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"},
        },
        "openrouter": {
            "models": {
                "anthropic/claude-3-opus-20240229": {"provider": "anthropic", "api_url": "https://api.anthropic.com/v1/messages"},
                "google/gemini-2.0-flash-exp": {"provider": "google", "api_url": "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent"},
            }
        },
        "huggingface": {
            "models": {
                "mistralai/Mixtral-8x7B": {"provider": "huggingface", "api_url": "https://api-inference.huggingface.co/models/mistralai/Mixtral-8x7B"},
                "meta-llama/Meta-Llama-3.1-8B-Instruct": {"provider": "huggingface", "api_url": "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-8B-Instruct"},
            }
        }
    }
}

class LLMHandler:
    """
    Handle LLM interactions via OpenRouter.

    - Uses OPENROUTER_API_KEY (required).
    - Default model from PREFERRED_MODEL or google/gemini-2.0-flash-exp.
    - Supports dynamic override from UI (model_override).
    """

    def __init__(self, model_override: str | None = None):
        self.openrouter_key = os.getenv("OPENROUTER_API_KEY")

        if not self.openrouter_key:
            raise ValueError(
                "OPENROUTER_API_KEY is not set. Configure it in your Space secrets."
            )

        default_model = os.getenv("PREFERRED_MODEL", "google/gemini-2.0-flash-exp")
        self.model_id = model_override or default_model

    def set_model(self, model_name: str):
        """Update active model at runtime."""
        if model_name:
            self.model_id = model_name

    def generate_streaming(self, prompt: str, model: Optional[str] = None) -> Iterator[str]:
        """
        Generate a streaming response using OpenRouter chat completions.
        """
        model_to_use = model or self.model_id
        print(f"[LLMHandler] Using OpenRouter model: {model_to_use}")

        try:
            yield from self._call_openrouter_streaming(prompt, model_to_use)
        except Exception as e:
            error_msg = f"Error during generation with OpenRouter: {str(e)}"
            print(error_msg)
            yield error_msg
    
    def _call_anthropic_streaming(self, prompt: str, api_url: str) -> Iterator[str]:
        headers = {
            "x-api-key": self.anthropic_key,
            "anthropic-version": "2023-06-01",
            "content-type": "application/json"
        }
        data = {"model": self.model_id, "max_tokens": 2000, "stream": True, "messages": [{"role": "user", "content": prompt}]}
        
        response = requests.post(api_url, headers=headers, json=data, stream=True, timeout=60)
        response.raise_for_status()
        for line in response.iter_lines():
            line = line.decode('utf-8')
            if line.startswith("data: "):
                line = line[6:]
                if line == "[DONE]":
                    break
                try:
                    chunk = json.loads(line)
                    if chunk.get("type") == "content_block_delta" and chunk.get("delta", {}).get("text"):
                        yield chunk["delta"]["text"]
                except json.JSONDecodeError:
                    continue
    
    def _call_huggingface_streaming(self, prompt: str, api_url: str) -> Iterator[str]:
        headers = {"Authorization": f"Bearer {self.hf_token}", "Content-Type": "application/json"}
        data = {"inputs": prompt, "parameters": {"max_new_tokens": 2000, "stream": True}}
        
        response = requests.post(api_url, headers=headers, json=data, stream=True, timeout=60)
        
        if response.status_code == 503: # Model loading
            yield "Model is loading, please try again in a few moments..."
            return
            
        response.raise_for_status()
        for line in response.iter_lines():
            if line:
                try:
                    chunk = json.loads(line.decode('utf-8'))
                    if 'token' in chunk:
                        text = chunk.get('token', {}).get('text', '')
                        if text:
                            yield text
                except json.JSONDecodeError:
                    continue
    
    def _call_openrouter_streaming(self, prompt: str, model_id: str) -> Iterator[str]:
        """
        Stream completions from OpenRouter using OpenAI-compatible SSE.
        """
        api_url = "https://openrouter.ai/api/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {self.openrouter_key}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://huggingface.co/spaces/jblast94/my-voice-agent",
            "X-Title": "my-voice-agent",
        }
        data = {
            "model": model_id,
            "stream": True,
            "messages": [
                {"role": "system", "content": "You are a helpful, friendly AI assistant."},
                {"role": "user", "content": prompt},
            ],
        }

        with requests.post(api_url, headers=headers, json=data, stream=True, timeout=60) as response:
            response.raise_for_status()
            for raw_line in response.iter_lines():
                if not raw_line:
                    continue
                if raw_line.startswith(b"data: "):
                    payload = raw_line[6:]
                    if payload == b"[DONE]":
                        break
                    try:
                        chunk = json.loads(payload.decode("utf-8"))
                    except Exception:
                        continue

                    choices = chunk.get("choices") or []
                    if not choices:
                        continue
                    delta = choices[0].get("delta") or {}
                    content_piece = delta.get("content")
                    if content_piece:
                        yield content_piece

    def get_provider_info(self) -> dict:
        """Get information about the current provider configuration."""
        return {
            "provider": "openrouter",
            "model": self.model_id,
            "requires": ["OPENROUTER_API_KEY"],
        }