Spaces:

huzzle-labs
/

visual_memory

Sleeping

File size: 4,082 Bytes

"""
LLM abstraction layer using LiteLLM.

Supports any model LiteLLM supports — switch with a single string:
  - OpenAI:     "gpt-4o", "gpt-5.4", "o3-pro"
  - Anthropic:  "claude-opus-4-6", "claude-sonnet-4-6"
  - Local:      "ollama/llama3", "ollama/mistral"
  - And 100+ more providers

API keys are read from environment variables (loaded from root .env):
  OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.

Usage:
    from agent.llm import LLMClient

    llm = LLMClient(model="gpt-4o")
    response = llm.chat(
        messages=[{"role": "user", "content": "Hello"}],
        tools=[...],
    )
"""

import json
import logging
from typing import Any, Dict, List, Optional

import litellm

logger = logging.getLogger(__name__)


class LLMClient:
    """
    Thin wrapper around LiteLLM for consistent tool-calling across providers.

    The same code works whether you're hitting GPT-4o, Claude, or a local
    Ollama model — LiteLLM handles the translation.
    """

    _REASONING_MODELS = {"o3-pro", "o3-mini", "o3", "o1", "o1-mini", "o1-pro", "gpt-5"}

    def __init__(
        self,
        model: str,
        temperature: float = 0.0,
        max_tokens: int = 1024,
    ):
        self.model = model
        self.usage_log: list = []

        if model in self._REASONING_MODELS:
            self.temperature = 1.0
            self.max_tokens = max(max_tokens, 4096)
            if temperature != 1.0:
                logger.info(f"Model {model} requires temperature=1.0, overriding from {temperature}")
        else:
            self.temperature = temperature
            self.max_tokens = max_tokens

    def chat(
        self,
        messages: List[Dict[str, Any]],
        tools: Optional[List[Dict[str, Any]]] = None,
    ) -> Any:
        """
        Send messages to the LLM and get a response.

        Args:
            messages: Conversation history in OpenAI format
            tools: Optional list of tools in OpenAI function-calling format

        Returns:
            LiteLLM ModelResponse (same shape as OpenAI ChatCompletion).
        """
        kwargs: Dict[str, Any] = {
            "model": self.model,
            "messages": messages,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
        }

        if tools:
            kwargs["tools"] = tools
            kwargs["tool_choice"] = "auto"

        logger.debug(f"LLM request: model={self.model}, messages={len(messages)}, tools={len(tools or [])}")
        response = litellm.completion(**kwargs)
        logger.debug(f"LLM response: finish_reason={response.choices[0].finish_reason}")

        if hasattr(response, "usage") and response.usage:
            self.usage_log.append({
                "prompt_tokens": getattr(response.usage, "prompt_tokens", 0) or 0,
                "completion_tokens": getattr(response.usage, "completion_tokens", 0) or 0,
            })

        return response

    @property
    def total_usage(self) -> Dict[str, int]:
        """Aggregate token usage across all calls."""
        return {
            "prompt_tokens": sum(u["prompt_tokens"] for u in self.usage_log),
            "completion_tokens": sum(u["completion_tokens"] for u in self.usage_log),
            "total_calls": len(self.usage_log),
        }

    @staticmethod
    def extract_tool_calls(response) -> List[Dict[str, Any]]:
        """Extract tool calls from an LLM response."""
        choice = response.choices[0]
        if not choice.message.tool_calls:
            return []

        calls = []
        for tc in choice.message.tool_calls:
            args = tc.function.arguments
            if isinstance(args, str):
                args = json.loads(args)
            calls.append({
                "id": tc.id,
                "name": tc.function.name,
                "arguments": args,
            })
        return calls

    @staticmethod
    def get_text_response(response) -> Optional[str]:
        """Extract plain text content from an LLM response (if any)."""
        choice = response.choices[0]
        return choice.message.content