File size: 2,163 Bytes
b4c7867
 
 
1bfb390
b4c7867
 
1bfb390
 
b4c7867
 
1bfb390
b4c7867
 
 
1bfb390
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import gradio as gr
from huggingface_hub import InferenceClient
from huggingface_hub.utils import HfHubHTTPError

HF_INFERENCE_TOKEN = os.environ.get("HF_INFERENCE_TOKEN","").strip()
HF_TOKEN = os.environ.get("HF_TOKEN", "").strip()
HF_API_TOKEN = HF_INFERENCE_TOKEN or HF_TOKEN
HF_LLM_MODEL = os.environ.get("HF_LLM_MODEL","HuggingFaceH4/zephyr-7b-beta").strip()

_client = InferenceClient(model=HF_LLM_MODEL, token=HF_API_TOKEN) if HF_API_TOKEN else None

def llm_generate(prompt: str, max_new_tokens=450, temperature=0.2) -> str:
    if _client is None:
        raise gr.Error("Set HF_INFERENCE_TOKEN (or HF_TOKEN) in Space secrets or local environment.")
    try:
        out = _client.text_generation(
            prompt,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=temperature > 0,
            return_full_text=False,
        )
        return (out or "").strip()
    except ValueError as e:
        msg = str(e)
        if "not supported for task text-generation" in msg or "Supported task: conversational" in msg:
            try:
                resp = _client.chat.completions.create(
                    model=HF_LLM_MODEL,
                    messages=[{"role": "user", "content": prompt}],
                    max_tokens=max_new_tokens,
                    temperature=temperature,
                )
                choice = (resp.choices or [None])[0]
                content = getattr(getattr(choice, "message", None), "content", "") if choice else ""
                return (content or "").strip()
            except Exception as inner:
                raise gr.Error(f"LLM request failed after conversational fallback: {inner}")
        raise gr.Error(f"LLM request failed: {msg}")
    except HfHubHTTPError as e:
        msg = str(e)
        if "api-inference.huggingface.co is no longer supported" in msg or "410 Client Error" in msg:
            raise gr.Error(
                "Your Hugging Face Hub client is outdated for inference routing. "
                "Upgrade `huggingface_hub` and restart the app."
            )
        raise gr.Error(f"LLM request failed: {msg}")