File size: 2,777 Bytes
f8eaac1
 
 
 
895ed04
 
f8eaac1
895ed04
 
 
 
 
 
 
a5dbf47
f8eaac1
895ed04
f8eaac1
 
 
 
 
895ed04
a5dbf47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8eaac1
a5dbf47
 
 
 
f8eaac1
 
 
 
 
 
 
a5dbf47
f8eaac1
 
 
 
a5dbf47
f8eaac1
895ed04
a5dbf47
895ed04
 
 
f8eaac1
895ed04
a5dbf47
895ed04
a5dbf47
f8eaac1
895ed04
f8eaac1
895ed04
a5dbf47
895ed04
a5dbf47
f8eaac1
 
895ed04
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from typing import Tuple

from huggingface_hub import InferenceClient

from config import settings


class ModelClient:
    def __init__(self):
        self.primary_model = settings.PRIMARY_CODE_MODEL
        self.fallback_model = settings.FALLBACK_CODE_MODEL
        self.timeout = settings.MODEL_TIMEOUT_SECONDS
        self.temperature = settings.DEFAULT_TEMPERATURE
        self.top_p = settings.DEFAULT_TOP_P
        self.max_tokens = settings.MAX_OUTPUT_TOKENS
        self.hf_token = getattr(settings, "HUGGINGFACE_API_TOKEN", "")

    def _create_client(self) -> InferenceClient:
        return InferenceClient(
            api_key=self.hf_token if self.hf_token else None,
            timeout=self.timeout,
        )

    def _extract_content(self, response) -> str:
        if not response or not getattr(response, "choices", None):
            raise RuntimeError("Empty response from model.")

        first_choice = response.choices[0]
        if not first_choice or not getattr(first_choice, "message", None):
            raise RuntimeError("Model returned an invalid response structure.")

        message = first_choice.message
        content = getattr(message, "content", None)

        if content is None:
            raise RuntimeError("Model returned no content.")

        cleaned = str(content).strip()
        if not cleaned:
            raise RuntimeError("Model returned empty content.")

        return cleaned

    def _call_model(self, prompt: str, model_name: str) -> str:
        cleaned_prompt = str(prompt or "").strip()
        if not cleaned_prompt:
            raise RuntimeError("Prompt is empty.")

        client = self._create_client()

        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {
                    "role": "user",
                    "content": cleaned_prompt,
                }
            ],
            temperature=self.temperature,
            top_p=self.top_p,
            max_tokens=self.max_tokens,
        )

        return self._extract_content(response)

    def generate(self, prompt: str) -> Tuple[str, str, bool]:
        try:
            output = self._call_model(prompt, self.primary_model)
            return output, self.primary_model, False

        except Exception as primary_error:
            print(f"Primary model failed: {primary_error}", flush=True)

            try:
                output = self._call_model(prompt, self.fallback_model)
                return output, self.fallback_model, True

            except Exception as fallback_error:
                print(f"Fallback model failed: {fallback_error}", flush=True)
                raise RuntimeError("Both primary and fallback models failed.")


model_client = ModelClient()