Spaces:
Running
Running
| """ | |
| LLM Service - Chat completions via HuggingFace. | |
| """ | |
| import logging | |
| from typing import Dict, List, Optional, Any | |
| from dataclasses import dataclass | |
| from huggingface_hub import InferenceClient | |
| from config.settings import Settings | |
| logger = logging.getLogger(__name__) | |
| class LLMConfig: | |
| """LLM configuration.""" | |
| api_key: str | |
| model_name: str | |
| temperature: float = 0.01 | |
| max_tokens: int = 512 | |
| class LLMService: | |
| """ | |
| LLM service using HuggingFace InferenceClient. | |
| """ | |
| def __init__( | |
| self, | |
| api_key: Optional[str] = None, | |
| model_name: Optional[str] = None, | |
| ): | |
| """ | |
| Initialize LLM service. | |
| Args: | |
| api_key: HuggingFace API key | |
| model_name: Model name/ID | |
| """ | |
| settings = Settings() | |
| key = api_key or settings.hf_token | |
| name = model_name or settings.effective_model_name | |
| self.config = LLMConfig( | |
| api_key=key, | |
| model_name=name, | |
| temperature=settings.hf_temperature, | |
| max_tokens=settings.hf_max_new_tokens, | |
| ) | |
| self.client = InferenceClient(token=self.config.api_key) | |
| logger.info(f"LLMService initialized with model: {self.config.model_name}") | |
| async def get_chat_completion( | |
| self, | |
| messages: List[Dict[str, str]], | |
| temperature: Optional[float] = None, | |
| max_tokens: Optional[int] = None, | |
| ) -> str: | |
| """ | |
| Get chat completion from the model. | |
| Args: | |
| messages: List of message dicts with 'role' and 'content' | |
| temperature: Override temperature | |
| max_tokens: Override max tokens | |
| Returns: | |
| Assistant response text | |
| """ | |
| logger.debug(f"Chat completion request with model: {self.config.model_name}") | |
| try: | |
| response = self.client.chat_completion( | |
| messages=messages, | |
| model=self.config.model_name, | |
| max_tokens=max_tokens or self.config.max_tokens, | |
| temperature=temperature or self.config.temperature | |
| ) | |
| content = response.choices[0].message.content | |
| logger.debug(f"Chat completion response: {content[:200]}...") | |
| return content | |
| except Exception as e: | |
| logger.error(f"Chat completion error: {str(e)}") | |
| raise Exception(f"LLM completion error: {str(e)}") | |
| async def get_streaming_completion( | |
| self, | |
| messages: List[Dict[str, str]], | |
| temperature: Optional[float] = None, | |
| max_tokens: Optional[int] = None, | |
| ): | |
| """ | |
| Get streaming chat completion. | |
| Yields: | |
| Text chunks as they're generated | |
| """ | |
| logger.debug(f"Streaming completion request with model: {self.config.model_name}") | |
| try: | |
| stream = self.client.chat_completion( | |
| messages=messages, | |
| model=self.config.model_name, | |
| max_tokens=max_tokens or self.config.max_tokens, | |
| temperature=temperature or self.config.temperature, | |
| stream=True | |
| ) | |
| for chunk in stream: | |
| if chunk.choices and chunk.choices[0].delta.content: | |
| yield chunk.choices[0].delta.content | |
| except Exception as e: | |
| logger.error(f"Streaming completion error: {str(e)}") | |
| raise Exception(f"LLM streaming error: {str(e)}") | |
| def build_messages_with_tools( | |
| self, | |
| system_prompt: str, | |
| user_input: str, | |
| tools_description: str = "", | |
| conversation_history: Optional[List[Dict[str, str]]] = None, | |
| tool_results: Optional[str] = None, | |
| ) -> List[Dict[str, str]]: | |
| """ | |
| Build messages array with tools and context. | |
| Args: | |
| system_prompt: System instruction | |
| user_input: User's message | |
| tools_description: Available tools description | |
| conversation_history: Previous messages | |
| tool_results: Results from tool execution | |
| Returns: | |
| Messages array for chat completion | |
| """ | |
| messages = [{"role": "system", "content": system_prompt}] | |
| if tools_description: | |
| messages.append({ | |
| "role": "system", | |
| "content": f"Available tools:\n{tools_description}" | |
| }) | |
| # Add conversation history | |
| if conversation_history: | |
| for msg in conversation_history[-10:]: # Last 10 messages | |
| if msg.get("role") in ["user", "assistant"]: | |
| messages.append(msg) | |
| # Add current user input | |
| messages.append({"role": "user", "content": user_input}) | |
| # Add tool results if present | |
| if tool_results: | |
| messages.append({"role": "assistant", "content": tool_results}) | |
| return messages | |