Siggmoid's picture
Fix AI feedback: use HF Inference chat API via LangChain
51e25cb
import logging
import os
from functools import lru_cache
from typing import Any, List, Optional
from huggingface_hub import InferenceClient
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
from langchain_core.outputs import ChatGeneration, ChatResult
from pydantic import Field
logger = logging.getLogger(__name__)
MODEL_ID = "allenai/Olmo-3-7B-Instruct"
FALLBACK_MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
def get_hf_token() -> str:
token = (
os.environ.get("HF_TOKEN")
or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
or os.environ.get("HUGGING_FACE_HUB_TOKEN")
)
if not token:
raise ValueError(
"HF_TOKEN is not set. Add it as a Space secret (Settings → Repository secrets)."
)
# LangChain / huggingface_hub also read this name
os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN", token)
return token
@lru_cache(maxsize=1)
def get_inference_client() -> InferenceClient:
return InferenceClient(api_key=get_hf_token())
class HuggingFaceInferenceChat(BaseChatModel):
"""LangChain chat model using Hugging Face Inference API chat.completions."""
model_id: str = Field(default=MODEL_ID)
max_tokens: int = 512
temperature: float = 0.2
@property
def _llm_type(self) -> str:
return "huggingface-inference-chat"
def _to_hf_messages(self, messages: List[BaseMessage]) -> list[dict[str, str]]:
hf_messages: list[dict[str, str]] = []
for msg in messages:
if isinstance(msg, SystemMessage):
hf_messages.append({"role": "system", "content": str(msg.content)})
elif isinstance(msg, HumanMessage):
hf_messages.append({"role": "user", "content": str(msg.content)})
elif isinstance(msg, AIMessage):
hf_messages.append({"role": "assistant", "content": str(msg.content)})
return hf_messages
def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Any = None,
**kwargs: Any,
) -> ChatResult:
client = get_inference_client()
response = client.chat.completions.create(
model=self.model_id,
messages=self._to_hf_messages(messages),
max_tokens=self.max_tokens,
temperature=self.temperature,
)
if not response.choices:
raise RuntimeError(f"No choices returned for model {self.model_id}")
content = response.choices[0].message.content or ""
return ChatResult(
generations=[ChatGeneration(message=AIMessage(content=content))]
)
_llm: HuggingFaceInferenceChat | None = None
def get_llm(model_id: str = MODEL_ID) -> HuggingFaceInferenceChat:
global _llm
if _llm is None or _llm.model_id != model_id:
get_hf_token()
_llm = HuggingFaceInferenceChat(model_id=model_id)
return _llm
def invoke_chat(messages: List[BaseMessage], model_id: str = MODEL_ID) -> str:
"""Call primary model, then fallback if the provider rejects the request."""
last_error: Exception | None = None
for mid in (model_id, FALLBACK_MODEL_ID):
try:
llm = get_llm(mid)
result = llm.invoke(messages)
text = result.content if isinstance(result.content, str) else str(result.content)
if text.strip():
return text.strip()
except Exception as exc:
last_error = exc
logger.warning("HF chat failed for model %s: %s", mid, exc)
global _llm
_llm = None
raise RuntimeError(str(last_error) if last_error else "Unknown inference error")
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
out = invoke_chat(
[
SystemMessage(content="You are an ATS resume analyst."),
HumanMessage(
content=(
"ATS Scores: Semantic 0.45, Keyword 0.70, Final 0.68. "
"Missing: docker, tensorflow. Skill overlap: 70%. "
"Write 3 short sections: Score Explanation, Weak Areas, Actionable Improvements."
)
),
]
)
print(out)