File size: 2,777 Bytes
f8eaac1 895ed04 f8eaac1 895ed04 a5dbf47 f8eaac1 895ed04 f8eaac1 895ed04 a5dbf47 f8eaac1 a5dbf47 f8eaac1 a5dbf47 f8eaac1 a5dbf47 f8eaac1 895ed04 a5dbf47 895ed04 f8eaac1 895ed04 a5dbf47 895ed04 a5dbf47 f8eaac1 895ed04 f8eaac1 895ed04 a5dbf47 895ed04 a5dbf47 f8eaac1 895ed04 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | from typing import Tuple
from huggingface_hub import InferenceClient
from config import settings
class ModelClient:
def __init__(self):
self.primary_model = settings.PRIMARY_CODE_MODEL
self.fallback_model = settings.FALLBACK_CODE_MODEL
self.timeout = settings.MODEL_TIMEOUT_SECONDS
self.temperature = settings.DEFAULT_TEMPERATURE
self.top_p = settings.DEFAULT_TOP_P
self.max_tokens = settings.MAX_OUTPUT_TOKENS
self.hf_token = getattr(settings, "HUGGINGFACE_API_TOKEN", "")
def _create_client(self) -> InferenceClient:
return InferenceClient(
api_key=self.hf_token if self.hf_token else None,
timeout=self.timeout,
)
def _extract_content(self, response) -> str:
if not response or not getattr(response, "choices", None):
raise RuntimeError("Empty response from model.")
first_choice = response.choices[0]
if not first_choice or not getattr(first_choice, "message", None):
raise RuntimeError("Model returned an invalid response structure.")
message = first_choice.message
content = getattr(message, "content", None)
if content is None:
raise RuntimeError("Model returned no content.")
cleaned = str(content).strip()
if not cleaned:
raise RuntimeError("Model returned empty content.")
return cleaned
def _call_model(self, prompt: str, model_name: str) -> str:
cleaned_prompt = str(prompt or "").strip()
if not cleaned_prompt:
raise RuntimeError("Prompt is empty.")
client = self._create_client()
response = client.chat.completions.create(
model=model_name,
messages=[
{
"role": "user",
"content": cleaned_prompt,
}
],
temperature=self.temperature,
top_p=self.top_p,
max_tokens=self.max_tokens,
)
return self._extract_content(response)
def generate(self, prompt: str) -> Tuple[str, str, bool]:
try:
output = self._call_model(prompt, self.primary_model)
return output, self.primary_model, False
except Exception as primary_error:
print(f"Primary model failed: {primary_error}", flush=True)
try:
output = self._call_model(prompt, self.fallback_model)
return output, self.fallback_model, True
except Exception as fallback_error:
print(f"Fallback model failed: {fallback_error}", flush=True)
raise RuntimeError("Both primary and fallback models failed.")
model_client = ModelClient() |