Spaces:
Running
Running
File size: 5,150 Bytes
f26de06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
"""
LLM Service - Chat completions via HuggingFace.
"""
import logging
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from huggingface_hub import InferenceClient
from config.settings import Settings
logger = logging.getLogger(__name__)
@dataclass
class LLMConfig:
"""LLM configuration."""
api_key: str
model_name: str
temperature: float = 0.01
max_tokens: int = 512
class LLMService:
"""
LLM service using HuggingFace InferenceClient.
"""
def __init__(
self,
api_key: Optional[str] = None,
model_name: Optional[str] = None,
):
"""
Initialize LLM service.
Args:
api_key: HuggingFace API key
model_name: Model name/ID
"""
settings = Settings()
key = api_key or settings.hf_token
name = model_name or settings.effective_model_name
self.config = LLMConfig(
api_key=key,
model_name=name,
temperature=settings.hf_temperature,
max_tokens=settings.hf_max_new_tokens,
)
self.client = InferenceClient(token=self.config.api_key)
logger.info(f"LLMService initialized with model: {self.config.model_name}")
async def get_chat_completion(
self,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
) -> str:
"""
Get chat completion from the model.
Args:
messages: List of message dicts with 'role' and 'content'
temperature: Override temperature
max_tokens: Override max tokens
Returns:
Assistant response text
"""
logger.debug(f"Chat completion request with model: {self.config.model_name}")
try:
response = self.client.chat_completion(
messages=messages,
model=self.config.model_name,
max_tokens=max_tokens or self.config.max_tokens,
temperature=temperature or self.config.temperature
)
content = response.choices[0].message.content
logger.debug(f"Chat completion response: {content[:200]}...")
return content
except Exception as e:
logger.error(f"Chat completion error: {str(e)}")
raise Exception(f"LLM completion error: {str(e)}")
async def get_streaming_completion(
self,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
):
"""
Get streaming chat completion.
Yields:
Text chunks as they're generated
"""
logger.debug(f"Streaming completion request with model: {self.config.model_name}")
try:
stream = self.client.chat_completion(
messages=messages,
model=self.config.model_name,
max_tokens=max_tokens or self.config.max_tokens,
temperature=temperature or self.config.temperature,
stream=True
)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
except Exception as e:
logger.error(f"Streaming completion error: {str(e)}")
raise Exception(f"LLM streaming error: {str(e)}")
def build_messages_with_tools(
self,
system_prompt: str,
user_input: str,
tools_description: str = "",
conversation_history: Optional[List[Dict[str, str]]] = None,
tool_results: Optional[str] = None,
) -> List[Dict[str, str]]:
"""
Build messages array with tools and context.
Args:
system_prompt: System instruction
user_input: User's message
tools_description: Available tools description
conversation_history: Previous messages
tool_results: Results from tool execution
Returns:
Messages array for chat completion
"""
messages = [{"role": "system", "content": system_prompt}]
if tools_description:
messages.append({
"role": "system",
"content": f"Available tools:\n{tools_description}"
})
# Add conversation history
if conversation_history:
for msg in conversation_history[-10:]: # Last 10 messages
if msg.get("role") in ["user", "assistant"]:
messages.append(msg)
# Add current user input
messages.append({"role": "user", "content": user_input})
# Add tool results if present
if tool_results:
messages.append({"role": "assistant", "content": tool_results})
return messages
|