File size: 2,636 Bytes
0084562
 
f664bab
0084562
 
f664bab
0084562
f664bab
 
 
 
 
0084562
f664bab
 
 
 
 
 
0084562
 
 
 
f664bab
 
 
 
 
 
0084562
 
 
f664bab
 
 
 
0084562
 
 
 
 
 
 
 
 
 
 
f664bab
0084562
 
f664bab
 
0084562
 
 
f664bab
 
0084562
 
 
 
 
 
 
 
 
f664bab
0084562
 
 
 
2d0a6e3
0084562
 
2d0a6e3
0084562
 
1e22ea2
0084562
 
 
 
 
 
 
 
 
 
2d0a6e3
0084562
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from __future__ import annotations

import os
from typing import Any

from dotenv import load_dotenv
from huggingface_hub import InferenceClient

load_dotenv()


class HFLLMClient:
    def __init__(self) -> None:
        self.api_key = os.getenv("HF_TOKEN")
        print("HF token present:", bool(self.api_key))

        if not self.api_key:
            raise ValueError("HF_TOKEN is not set")

        self.model = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
        self.max_tokens = int(os.getenv("HF_MAX_TOKENS", "128"))
        self.temperature = float(os.getenv("HF_TEMPERATURE", "0.1"))

        self.client = InferenceClient(
            provider="auto",
            api_key=self.api_key,
        )

    def generate(self, prompt: str) -> str:
        """
        Generate a deterministic short answer for benchmark submission tasks.
        """
        try:
            output = self.client.chat_completion(
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": (
                            "You are an exact-match benchmark solver. "
                            "Return only the final answer with no explanation."
                        ),
                    },
                    {
                        "role": "user",
                        "content": prompt,
                    },
                ],
                max_tokens=self.max_tokens,
                temperature=self.temperature,
            )

            text = self._extract_text(output)
            print("LLM response preview:", text[:300])
            return text

        except Exception as e:
            raise ValueError(f"Inference call failed: {e}") from e

    @staticmethod
    def _extract_text(output: Any) -> str:
        """
        Safely extract text from HF chat completion responses.
        """
        if output is None:
            return ""

        try:
            text = output.choices[0].message.content
        except Exception:
            return ""

        if text is None:
            return ""

        if isinstance(text, str):
            return text.strip()

        if isinstance(text, list):
            parts = []
            for item in text:
                if isinstance(item, dict):
                    piece = item.get("text") or item.get("content") or ""
                    if piece:
                        parts.append(str(piece))
                elif item is not None:
                    parts.append(str(item))
            return " ".join(parts).strip()

        return str(text).strip()