Hitakshi26's picture
Testing2
1bfb390
import os
import gradio as gr
from huggingface_hub import InferenceClient
from huggingface_hub.utils import HfHubHTTPError
HF_INFERENCE_TOKEN = os.environ.get("HF_INFERENCE_TOKEN","").strip()
HF_TOKEN = os.environ.get("HF_TOKEN", "").strip()
HF_API_TOKEN = HF_INFERENCE_TOKEN or HF_TOKEN
HF_LLM_MODEL = os.environ.get("HF_LLM_MODEL","HuggingFaceH4/zephyr-7b-beta").strip()
_client = InferenceClient(model=HF_LLM_MODEL, token=HF_API_TOKEN) if HF_API_TOKEN else None
def llm_generate(prompt: str, max_new_tokens=450, temperature=0.2) -> str:
if _client is None:
raise gr.Error("Set HF_INFERENCE_TOKEN (or HF_TOKEN) in Space secrets or local environment.")
try:
out = _client.text_generation(
prompt,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=temperature > 0,
return_full_text=False,
)
return (out or "").strip()
except ValueError as e:
msg = str(e)
if "not supported for task text-generation" in msg or "Supported task: conversational" in msg:
try:
resp = _client.chat.completions.create(
model=HF_LLM_MODEL,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_new_tokens,
temperature=temperature,
)
choice = (resp.choices or [None])[0]
content = getattr(getattr(choice, "message", None), "content", "") if choice else ""
return (content or "").strip()
except Exception as inner:
raise gr.Error(f"LLM request failed after conversational fallback: {inner}")
raise gr.Error(f"LLM request failed: {msg}")
except HfHubHTTPError as e:
msg = str(e)
if "api-inference.huggingface.co is no longer supported" in msg or "410 Client Error" in msg:
raise gr.Error(
"Your Hugging Face Hub client is outdated for inference routing. "
"Upgrade `huggingface_hub` and restart the app."
)
raise gr.Error(f"LLM request failed: {msg}")