Atlas / services /llm_service.py
a-zamfir's picture
initial atlas commit
f26de06
"""
LLM Service - Chat completions via HuggingFace.
"""
import logging
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from huggingface_hub import InferenceClient
from config.settings import Settings
logger = logging.getLogger(__name__)
@dataclass
class LLMConfig:
"""LLM configuration."""
api_key: str
model_name: str
temperature: float = 0.01
max_tokens: int = 512
class LLMService:
"""
LLM service using HuggingFace InferenceClient.
"""
def __init__(
self,
api_key: Optional[str] = None,
model_name: Optional[str] = None,
):
"""
Initialize LLM service.
Args:
api_key: HuggingFace API key
model_name: Model name/ID
"""
settings = Settings()
key = api_key or settings.hf_token
name = model_name or settings.effective_model_name
self.config = LLMConfig(
api_key=key,
model_name=name,
temperature=settings.hf_temperature,
max_tokens=settings.hf_max_new_tokens,
)
self.client = InferenceClient(token=self.config.api_key)
logger.info(f"LLMService initialized with model: {self.config.model_name}")
async def get_chat_completion(
self,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
) -> str:
"""
Get chat completion from the model.
Args:
messages: List of message dicts with 'role' and 'content'
temperature: Override temperature
max_tokens: Override max tokens
Returns:
Assistant response text
"""
logger.debug(f"Chat completion request with model: {self.config.model_name}")
try:
response = self.client.chat_completion(
messages=messages,
model=self.config.model_name,
max_tokens=max_tokens or self.config.max_tokens,
temperature=temperature or self.config.temperature
)
content = response.choices[0].message.content
logger.debug(f"Chat completion response: {content[:200]}...")
return content
except Exception as e:
logger.error(f"Chat completion error: {str(e)}")
raise Exception(f"LLM completion error: {str(e)}")
async def get_streaming_completion(
self,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
):
"""
Get streaming chat completion.
Yields:
Text chunks as they're generated
"""
logger.debug(f"Streaming completion request with model: {self.config.model_name}")
try:
stream = self.client.chat_completion(
messages=messages,
model=self.config.model_name,
max_tokens=max_tokens or self.config.max_tokens,
temperature=temperature or self.config.temperature,
stream=True
)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
logger.error(f"Streaming completion error: {str(e)}")
raise Exception(f"LLM streaming error: {str(e)}")
def build_messages_with_tools(
self,
system_prompt: str,
user_input: str,
tools_description: str = "",
conversation_history: Optional[List[Dict[str, str]]] = None,
tool_results: Optional[str] = None,
) -> List[Dict[str, str]]:
"""
Build messages array with tools and context.
Args:
system_prompt: System instruction
user_input: User's message
tools_description: Available tools description
conversation_history: Previous messages
tool_results: Results from tool execution
Returns:
Messages array for chat completion
"""
messages = [{"role": "system", "content": system_prompt}]
if tools_description:
messages.append({
"role": "system",
"content": f"Available tools:\n{tools_description}"
})
# Add conversation history
if conversation_history:
for msg in conversation_history[-10:]: # Last 10 messages
if msg.get("role") in ["user", "assistant"]:
messages.append(msg)
# Add current user input
messages.append({"role": "user", "content": user_input})
# Add tool results if present
if tool_results:
messages.append({"role": "assistant", "content": tool_results})
return messages