| """LLM Client with dual-provider routing (HuggingFace + Anthropic).""" |
|
|
| import logging |
| import time |
| from typing import Optional |
|
|
| from config import ( |
| LLM_PROVIDER, LLM_MODEL, HF_TOKEN, ANTHROPIC_API_KEY, |
| MAX_RETRIES, RETRY_DELAY, REQUEST_TIMEOUT, |
| ) |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class LLMClient: |
| """Dual-provider LLM client. Routes to HuggingFace or Anthropic based on LLM_PROVIDER.""" |
|
|
| def __init__(self): |
| self.provider = LLM_PROVIDER.lower().strip() |
| self.model = LLM_MODEL |
| self._client = None |
| self._init_provider() |
|
|
| def _init_provider(self): |
| if self.provider == "anthropic": |
| if not ANTHROPIC_API_KEY: |
| logger.warning("ANTHROPIC_API_KEY not set. LLM calls will fail.") |
| return |
| try: |
| import anthropic |
| self._client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) |
| logger.info(f"LLMClient initialized: Anthropic / {self.model}") |
| except ImportError: |
| logger.error("anthropic package not installed. Run: pip install anthropic") |
| except Exception as e: |
| logger.error(f"Failed to initialize Anthropic client: {e}") |
| else: |
| if not HF_TOKEN: |
| logger.warning("HF_TOKEN not set. LLM calls will fail.") |
| return |
| try: |
| from huggingface_hub import InferenceClient |
| self._client = InferenceClient(token=HF_TOKEN, timeout=REQUEST_TIMEOUT) |
| logger.info(f"LLMClient initialized: HuggingFace / {self.model}") |
| except ImportError: |
| logger.error("huggingface_hub not installed. Run: pip install huggingface_hub") |
| except Exception as e: |
| logger.error(f"Failed to initialize HF InferenceClient: {e}") |
|
|
| def is_configured(self) -> bool: |
| return self._client is not None |
|
|
| def generate(self, prompt: str, max_new_tokens: int = 1024) -> Optional[str]: |
| """Generate a response with retry logic. Returns None on failure.""" |
| if not self._client: |
| return None |
|
|
| last_err = None |
| for attempt in range(MAX_RETRIES): |
| try: |
| if self.provider == "anthropic": |
| result = self._call_anthropic(prompt, max_new_tokens) |
| else: |
| result = self._call_huggingface(prompt, max_new_tokens) |
| if result: |
| return result |
| logger.warning("Empty response from LLM") |
| except Exception as e: |
| last_err = e |
| msg = str(e).lower() |
| logger.warning(f"LLM call failed (attempt {attempt + 1}/{MAX_RETRIES}): {e}") |
| if "401" in msg or "unauthorized" in msg or "invalid" in msg: |
| break |
| if attempt < MAX_RETRIES - 1: |
| time.sleep(RETRY_DELAY) |
|
|
| logger.error(f"LLM generation failed after {MAX_RETRIES} attempts. Last error: {last_err}") |
| return None |
|
|
| def _call_huggingface(self, prompt: str, max_new_tokens: int) -> Optional[str]: |
| response = self._client.chat_completion( |
| model=self.model, |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=max_new_tokens, |
| temperature=0.7, |
| top_p=0.9, |
| ) |
| if hasattr(response, "choices") and response.choices: |
| content = response.choices[0].message.content |
| return content.strip() if content else None |
| return None |
|
|
| def _call_anthropic(self, prompt: str, max_new_tokens: int) -> Optional[str]: |
| response = self._client.messages.create( |
| model=self.model, |
| max_tokens=max_new_tokens, |
| messages=[{"role": "user", "content": prompt}], |
| ) |
| if response.content and len(response.content) > 0: |
| return response.content[0].text.strip() |
| return None |
|
|
|
|
| _llm_client: Optional[LLMClient] = None |
|
|
|
|
| def get_llm_client() -> LLMClient: |
| global _llm_client |
| if _llm_client is None: |
| _llm_client = LLMClient() |
| return _llm_client |
|
|