Spaces:
Sleeping
Sleeping
| """ | |
| LLM abstraction layer using LiteLLM. | |
| Supports any model LiteLLM supports — switch with a single string: | |
| - OpenAI: "gpt-4o", "gpt-5.4", "o3-pro" | |
| - Anthropic: "claude-opus-4-6", "claude-sonnet-4-6" | |
| - Local: "ollama/llama3", "ollama/mistral" | |
| - And 100+ more providers | |
| API keys are read from environment variables (loaded from root .env): | |
| OPENAI_API_KEY, ANTHROPIC_API_KEY, etc. | |
| Usage: | |
| from agent.llm import LLMClient | |
| llm = LLMClient(model="gpt-4o") | |
| response = llm.chat( | |
| messages=[{"role": "user", "content": "Hello"}], | |
| tools=[...], | |
| ) | |
| """ | |
| import json | |
| import logging | |
| from typing import Any, Dict, List, Optional | |
| import litellm | |
| logger = logging.getLogger(__name__) | |
| class LLMClient: | |
| """ | |
| Thin wrapper around LiteLLM for consistent tool-calling across providers. | |
| The same code works whether you're hitting GPT-4o, Claude, or a local | |
| Ollama model — LiteLLM handles the translation. | |
| """ | |
| _REASONING_MODELS = {"o3-pro", "o3-mini", "o3", "o1", "o1-mini", "o1-pro", "gpt-5"} | |
| def __init__( | |
| self, | |
| model: str, | |
| temperature: float = 0.0, | |
| max_tokens: int = 1024, | |
| ): | |
| self.model = model | |
| self.usage_log: list = [] | |
| if model in self._REASONING_MODELS: | |
| self.temperature = 1.0 | |
| self.max_tokens = max(max_tokens, 4096) | |
| if temperature != 1.0: | |
| logger.info(f"Model {model} requires temperature=1.0, overriding from {temperature}") | |
| else: | |
| self.temperature = temperature | |
| self.max_tokens = max_tokens | |
| def chat( | |
| self, | |
| messages: List[Dict[str, Any]], | |
| tools: Optional[List[Dict[str, Any]]] = None, | |
| ) -> Any: | |
| """ | |
| Send messages to the LLM and get a response. | |
| Args: | |
| messages: Conversation history in OpenAI format | |
| tools: Optional list of tools in OpenAI function-calling format | |
| Returns: | |
| LiteLLM ModelResponse (same shape as OpenAI ChatCompletion). | |
| """ | |
| kwargs: Dict[str, Any] = { | |
| "model": self.model, | |
| "messages": messages, | |
| "temperature": self.temperature, | |
| "max_tokens": self.max_tokens, | |
| } | |
| if tools: | |
| kwargs["tools"] = tools | |
| kwargs["tool_choice"] = "auto" | |
| logger.debug(f"LLM request: model={self.model}, messages={len(messages)}, tools={len(tools or [])}") | |
| response = litellm.completion(**kwargs) | |
| logger.debug(f"LLM response: finish_reason={response.choices[0].finish_reason}") | |
| if hasattr(response, "usage") and response.usage: | |
| self.usage_log.append({ | |
| "prompt_tokens": getattr(response.usage, "prompt_tokens", 0) or 0, | |
| "completion_tokens": getattr(response.usage, "completion_tokens", 0) or 0, | |
| }) | |
| return response | |
| def total_usage(self) -> Dict[str, int]: | |
| """Aggregate token usage across all calls.""" | |
| return { | |
| "prompt_tokens": sum(u["prompt_tokens"] for u in self.usage_log), | |
| "completion_tokens": sum(u["completion_tokens"] for u in self.usage_log), | |
| "total_calls": len(self.usage_log), | |
| } | |
| def extract_tool_calls(response) -> List[Dict[str, Any]]: | |
| """Extract tool calls from an LLM response.""" | |
| choice = response.choices[0] | |
| if not choice.message.tool_calls: | |
| return [] | |
| calls = [] | |
| for tc in choice.message.tool_calls: | |
| args = tc.function.arguments | |
| if isinstance(args, str): | |
| args = json.loads(args) | |
| calls.append({ | |
| "id": tc.id, | |
| "name": tc.function.name, | |
| "arguments": args, | |
| }) | |
| return calls | |
| def get_text_response(response) -> Optional[str]: | |
| """Extract plain text content from an LLM response (if any).""" | |
| choice = response.choices[0] | |
| return choice.message.content | |