NeilDriscoll's picture
Upload 14 files
661743e verified
"""LLM Client with dual-provider routing (HuggingFace + Anthropic)."""
import logging
import time
from typing import Optional
from config import (
LLM_PROVIDER, LLM_MODEL, HF_TOKEN, ANTHROPIC_API_KEY,
MAX_RETRIES, RETRY_DELAY, REQUEST_TIMEOUT,
)
logger = logging.getLogger(__name__)
class LLMClient:
"""Dual-provider LLM client. Routes to HuggingFace or Anthropic based on LLM_PROVIDER."""
def __init__(self):
self.provider = LLM_PROVIDER.lower().strip()
self.model = LLM_MODEL
self._client = None
self._init_provider()
def _init_provider(self):
if self.provider == "anthropic":
if not ANTHROPIC_API_KEY:
logger.warning("ANTHROPIC_API_KEY not set. LLM calls will fail.")
return
try:
import anthropic
self._client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
logger.info(f"LLMClient initialized: Anthropic / {self.model}")
except ImportError:
logger.error("anthropic package not installed. Run: pip install anthropic")
except Exception as e:
logger.error(f"Failed to initialize Anthropic client: {e}")
else: # default: huggingface
if not HF_TOKEN:
logger.warning("HF_TOKEN not set. LLM calls will fail.")
return
try:
from huggingface_hub import InferenceClient
self._client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT)
logger.info(f"LLMClient initialized: HuggingFace / {self.model}")
except ImportError:
logger.error("huggingface_hub not installed. Run: pip install huggingface_hub")
except Exception as e:
logger.error(f"Failed to initialize HF InferenceClient: {e}")
def is_configured(self) -> bool:
return self._client is not None
def generate(self, prompt: str, max_new_tokens: int = 1024) -> Optional[str]:
"""Generate a response with retry logic. Returns None on failure."""
if not self._client:
return None
last_err = None
for attempt in range(MAX_RETRIES):
try:
if self.provider == "anthropic":
result = self._call_anthropic(prompt, max_new_tokens)
else:
result = self._call_huggingface(prompt, max_new_tokens)
if result:
return result
logger.warning("Empty response from LLM")
except Exception as e:
last_err = e
msg = str(e).lower()
logger.warning(f"LLM call failed (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
if "401" in msg or "unauthorized" in msg or "invalid" in msg:
break # don't retry auth errors
if attempt < MAX_RETRIES - 1:
time.sleep(RETRY_DELAY)
logger.error(f"LLM generation failed after {MAX_RETRIES} attempts. Last error: {last_err}")
return None
def _call_huggingface(self, prompt: str, max_new_tokens: int) -> Optional[str]:
response = self._client.chat_completion(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_new_tokens,
temperature=0.7,
top_p=0.9,
)
if hasattr(response, "choices") and response.choices:
content = response.choices[0].message.content
return content.strip() if content else None
return None
def _call_anthropic(self, prompt: str, max_new_tokens: int) -> Optional[str]:
response = self._client.messages.create(
model=self.model,
max_tokens=max_new_tokens,
messages=[{"role": "user", "content": prompt}],
)
if response.content and len(response.content) > 0:
return response.content[0].text.strip()
return None
_llm_client: Optional[LLMClient] = None
def get_llm_client() -> LLMClient:
global _llm_client
if _llm_client is None:
_llm_client = LLMClient()
return _llm_client