from typing import Tuple from huggingface_hub import InferenceClient from config import settings class ModelClient: def __init__(self): self.primary_model = settings.PRIMARY_CODE_MODEL self.fallback_model = settings.FALLBACK_CODE_MODEL self.timeout = settings.MODEL_TIMEOUT_SECONDS self.temperature = settings.DEFAULT_TEMPERATURE self.top_p = settings.DEFAULT_TOP_P self.max_tokens = settings.MAX_OUTPUT_TOKENS self.hf_token = getattr(settings, "HUGGINGFACE_API_TOKEN", "") def _create_client(self) -> InferenceClient: return InferenceClient( api_key=self.hf_token if self.hf_token else None, timeout=self.timeout, ) def _extract_content(self, response) -> str: if not response or not getattr(response, "choices", None): raise RuntimeError("Empty response from model.") first_choice = response.choices[0] if not first_choice or not getattr(first_choice, "message", None): raise RuntimeError("Model returned an invalid response structure.") message = first_choice.message content = getattr(message, "content", None) if content is None: raise RuntimeError("Model returned no content.") cleaned = str(content).strip() if not cleaned: raise RuntimeError("Model returned empty content.") return cleaned def _call_model(self, prompt: str, model_name: str) -> str: cleaned_prompt = str(prompt or "").strip() if not cleaned_prompt: raise RuntimeError("Prompt is empty.") client = self._create_client() response = client.chat.completions.create( model=model_name, messages=[ { "role": "user", "content": cleaned_prompt, } ], temperature=self.temperature, top_p=self.top_p, max_tokens=self.max_tokens, ) return self._extract_content(response) def generate(self, prompt: str) -> Tuple[str, str, bool]: try: output = self._call_model(prompt, self.primary_model) return output, self.primary_model, False except Exception as primary_error: print(f"Primary model failed: {primary_error}", flush=True) try: output = self._call_model(prompt, self.fallback_model) return output, self.fallback_model, True except Exception as fallback_error: print(f"Fallback model failed: {fallback_error}", flush=True) raise RuntimeError("Both primary and fallback models failed.") model_client = ModelClient()