File size: 4,341 Bytes
51e25cb
d2b7a80
51e25cb
 
d2b7a80
51e25cb
 
 
 
 
 
 
d2b7a80
 
51e25cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2b7a80
51e25cb
 
d2b7a80
51e25cb
 
 
d2b7a80
51e25cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2b7a80
51e25cb
 
 
 
 
d2b7a80
51e25cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2b7a80
 
 
51e25cb
 
d2b7a80
 
 
 
 
51e25cb
d2b7a80
 
 
 
 
51e25cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import logging
import os
from functools import lru_cache
from typing import Any, List, Optional

from huggingface_hub import InferenceClient
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
from langchain_core.outputs import ChatGeneration, ChatResult
from pydantic import Field

logger = logging.getLogger(__name__)

MODEL_ID = "allenai/Olmo-3-7B-Instruct"
FALLBACK_MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"


def get_hf_token() -> str:
    token = (
        os.environ.get("HF_TOKEN")
        or os.environ.get("HUGGINGFACEHUB_API_TOKEN")
        or os.environ.get("HUGGING_FACE_HUB_TOKEN")
    )
    if not token:
        raise ValueError(
            "HF_TOKEN is not set. Add it as a Space secret (Settings → Repository secrets)."
        )
    # LangChain / huggingface_hub also read this name
    os.environ.setdefault("HUGGINGFACEHUB_API_TOKEN", token)
    return token


@lru_cache(maxsize=1)
def get_inference_client() -> InferenceClient:
    return InferenceClient(api_key=get_hf_token())


class HuggingFaceInferenceChat(BaseChatModel):
    """LangChain chat model using Hugging Face Inference API chat.completions."""

    model_id: str = Field(default=MODEL_ID)
    max_tokens: int = 512
    temperature: float = 0.2

    @property
    def _llm_type(self) -> str:
        return "huggingface-inference-chat"

    def _to_hf_messages(self, messages: List[BaseMessage]) -> list[dict[str, str]]:
        hf_messages: list[dict[str, str]] = []
        for msg in messages:
            if isinstance(msg, SystemMessage):
                hf_messages.append({"role": "system", "content": str(msg.content)})
            elif isinstance(msg, HumanMessage):
                hf_messages.append({"role": "user", "content": str(msg.content)})
            elif isinstance(msg, AIMessage):
                hf_messages.append({"role": "assistant", "content": str(msg.content)})
        return hf_messages

    def _generate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Any = None,
        **kwargs: Any,
    ) -> ChatResult:
        client = get_inference_client()
        response = client.chat.completions.create(
            model=self.model_id,
            messages=self._to_hf_messages(messages),
            max_tokens=self.max_tokens,
            temperature=self.temperature,
        )
        if not response.choices:
            raise RuntimeError(f"No choices returned for model {self.model_id}")
        content = response.choices[0].message.content or ""
        return ChatResult(
            generations=[ChatGeneration(message=AIMessage(content=content))]
        )


_llm: HuggingFaceInferenceChat | None = None


def get_llm(model_id: str = MODEL_ID) -> HuggingFaceInferenceChat:
    global _llm
    if _llm is None or _llm.model_id != model_id:
        get_hf_token()
        _llm = HuggingFaceInferenceChat(model_id=model_id)
    return _llm


def invoke_chat(messages: List[BaseMessage], model_id: str = MODEL_ID) -> str:
    """Call primary model, then fallback if the provider rejects the request."""
    last_error: Exception | None = None
    for mid in (model_id, FALLBACK_MODEL_ID):
        try:
            llm = get_llm(mid)
            result = llm.invoke(messages)
            text = result.content if isinstance(result.content, str) else str(result.content)
            if text.strip():
                return text.strip()
        except Exception as exc:
            last_error = exc
            logger.warning("HF chat failed for model %s: %s", mid, exc)
            global _llm
            _llm = None
    raise RuntimeError(str(last_error) if last_error else "Unknown inference error")


if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    out = invoke_chat(
        [
            SystemMessage(content="You are an ATS resume analyst."),
            HumanMessage(
                content=(
                    "ATS Scores: Semantic 0.45, Keyword 0.70, Final 0.68. "
                    "Missing: docker, tensorflow. Skill overlap: 70%. "
                    "Write 3 short sections: Score Explanation, Weak Areas, Actionable Improvements."
                )
            ),
        ]
    )
    print(out)