"""LLM Client with dual-provider routing (HuggingFace + Anthropic).""" import logging import time from typing import Optional from config import ( LLM_PROVIDER, LLM_MODEL, HF_TOKEN, ANTHROPIC_API_KEY, MAX_RETRIES, RETRY_DELAY, REQUEST_TIMEOUT, ) logger = logging.getLogger(__name__) class LLMClient: """Dual-provider LLM client. Routes to HuggingFace or Anthropic based on LLM_PROVIDER.""" def __init__(self): self.provider = LLM_PROVIDER.lower().strip() self.model = LLM_MODEL self._client = None self._init_provider() def _init_provider(self): if self.provider == "anthropic": if not ANTHROPIC_API_KEY: logger.warning("ANTHROPIC_API_KEY not set. LLM calls will fail.") return try: import anthropic self._client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) logger.info(f"LLMClient initialized: Anthropic / {self.model}") except ImportError: logger.error("anthropic package not installed. Run: pip install anthropic") except Exception as e: logger.error(f"Failed to initialize Anthropic client: {e}") else: # default: huggingface if not HF_TOKEN: logger.warning("HF_TOKEN not set. LLM calls will fail.") return try: from huggingface_hub import InferenceClient self._client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT) logger.info(f"LLMClient initialized: HuggingFace / {self.model}") except ImportError: logger.error("huggingface_hub not installed. Run: pip install huggingface_hub") except Exception as e: logger.error(f"Failed to initialize HF InferenceClient: {e}") def is_configured(self) -> bool: return self._client is not None def generate(self, prompt: str, max_new_tokens: int = 1024) -> Optional[str]: """Generate a response with retry logic. Returns None on failure.""" if not self._client: return None last_err = None for attempt in range(MAX_RETRIES): try: if self.provider == "anthropic": result = self._call_anthropic(prompt, max_new_tokens) else: result = self._call_huggingface(prompt, max_new_tokens) if result: return result logger.warning("Empty response from LLM") except Exception as e: last_err = e msg = str(e).lower() logger.warning(f"LLM call failed (attempt {attempt + 1}/{MAX_RETRIES}): {e}") if "401" in msg or "unauthorized" in msg or "invalid" in msg: break # don't retry auth errors if attempt < MAX_RETRIES - 1: time.sleep(RETRY_DELAY) logger.error(f"LLM generation failed after {MAX_RETRIES} attempts. Last error: {last_err}") return None def _call_huggingface(self, prompt: str, max_new_tokens: int) -> Optional[str]: response = self._client.chat_completion( model=self.model, messages=[{"role": "user", "content": prompt}], max_tokens=max_new_tokens, temperature=0.7, top_p=0.9, ) if hasattr(response, "choices") and response.choices: content = response.choices[0].message.content return content.strip() if content else None return None def _call_anthropic(self, prompt: str, max_new_tokens: int) -> Optional[str]: response = self._client.messages.create( model=self.model, max_tokens=max_new_tokens, messages=[{"role": "user", "content": prompt}], ) if response.content and len(response.content) > 0: return response.content[0].text.strip() return None _llm_client: Optional[LLMClient] = None def get_llm_client() -> LLMClient: global _llm_client if _llm_client is None: _llm_client = LLMClient() return _llm_client