Final_Assignment_Template / llm_client.py
abhi1294's picture
Fix prompts and utils
0084562
from __future__ import annotations
import os
from typing import Any
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
class HFLLMClient:
def __init__(self) -> None:
self.api_key = os.getenv("HF_TOKEN")
print("HF token present:", bool(self.api_key))
if not self.api_key:
raise ValueError("HF_TOKEN is not set")
self.model = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
self.max_tokens = int(os.getenv("HF_MAX_TOKENS", "128"))
self.temperature = float(os.getenv("HF_TEMPERATURE", "0.1"))
self.client = InferenceClient(
provider="auto",
api_key=self.api_key,
)
def generate(self, prompt: str) -> str:
"""
Generate a deterministic short answer for benchmark submission tasks.
"""
try:
output = self.client.chat_completion(
model=self.model,
messages=[
{
"role": "system",
"content": (
"You are an exact-match benchmark solver. "
"Return only the final answer with no explanation."
),
},
{
"role": "user",
"content": prompt,
},
],
max_tokens=self.max_tokens,
temperature=self.temperature,
)
text = self._extract_text(output)
print("LLM response preview:", text[:300])
return text
except Exception as e:
raise ValueError(f"Inference call failed: {e}") from e
@staticmethod
def _extract_text(output: Any) -> str:
"""
Safely extract text from HF chat completion responses.
"""
if output is None:
return ""
try:
text = output.choices[0].message.content
except Exception:
return ""
if text is None:
return ""
if isinstance(text, str):
return text.strip()
if isinstance(text, list):
parts = []
for item in text:
if isinstance(item, dict):
piece = item.get("text") or item.get("content") or ""
if piece:
parts.append(str(piece))
elif item is not None:
parts.append(str(item))
return " ".join(parts).strip()
return str(text).strip()