Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from typing import Iterator, List, Dict, Any, Tuple | |
| from backend_hf_api import HFInferenceBackend, is_hf_api_available | |
| SYSTEM_PROMPT_DEFAULT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant. Be concise and accurate.") | |
| DEFAULT_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512")) | |
| DEFAULT_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7")) | |
| # Use a valid Nemotron repo by default; override via Space Variables if you want another. | |
| DEFAULT_HF_API_MODEL = os.getenv("HF_API_MODEL", "NVIDIA/Nemotron-3-8B-Instruct") | |
| def _msg_content_to_text(content: Any) -> str: | |
| if isinstance(content, str): | |
| return content | |
| if isinstance(content, dict) and isinstance(content.get("text"), str): | |
| return content["text"] | |
| return "" if content is None else str(content) | |
| def _history_to_pairs(history: Any) -> List[Tuple[str, str]]: | |
| """Gradio v6 messages or legacy (user, assistant) pairs β (user, assistant) pairs.""" | |
| pairs: List[Tuple[str, str]] = [] | |
| if not history: | |
| return pairs | |
| if isinstance(history[0], dict): | |
| pending_user: str | None = None | |
| for m in history: | |
| role = m.get("role") | |
| text = _msg_content_to_text(m.get("content")) | |
| if role == "user": | |
| if pending_user is not None: | |
| pairs.append((pending_user, "")) | |
| pending_user = text | |
| elif role == "assistant": | |
| if pending_user is None: | |
| pairs.append(("", text)) | |
| else: | |
| pairs.append((pending_user, text)) | |
| pending_user = None | |
| if pending_user is not None: | |
| pairs.append((pending_user, "")) | |
| return pairs | |
| if isinstance(history[0], (list, tuple)) and len(history[0]) == 2: | |
| return [(str(u or ""), str(a or "")) for (u, a) in history] | |
| return [(str(history), "")] | |
| def chat_fn( | |
| message: str, | |
| history: List[Dict[str, Any]] | List[Tuple[str, str]], | |
| model_name: str, | |
| system_prompt: str, | |
| temperature: float, | |
| max_new_tokens: int, | |
| ) -> Iterator[str]: | |
| if not is_hf_api_available(): | |
| yield "[error] HF_TOKEN not set. Add it in Spaces β Settings β Secrets and restart." | |
| return | |
| try: | |
| backend = HFInferenceBackend(model_name or DEFAULT_HF_API_MODEL) | |
| pairs_history = _history_to_pairs(history) | |
| yield from backend.generate_stream( | |
| system_prompt=(system_prompt or SYSTEM_PROMPT_DEFAULT).strip(), | |
| history=pairs_history, | |
| user_msg=message, | |
| temperature=float(temperature), | |
| max_new_tokens=int(max_new_tokens), | |
| ) | |
| except Exception as e: | |
| yield f"[error] {type(e).__name__}: {e}" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ HF Inference API Chatbot (Gradio v6)\nUses your **HF_TOKEN**. Preflight checks model to prevent crashes.") | |
| model_name = gr.Textbox( | |
| value=DEFAULT_HF_API_MODEL, | |
| label="HF model repo", | |
| placeholder="e.g., NVIDIA/Nemotron-3-8B-Instruct", | |
| ) | |
| with gr.Accordion("Advanced", open=False) as adv: | |
| system_prompt = gr.Textbox(value=SYSTEM_PROMPT_DEFAULT, label="System prompt", lines=3) | |
| temperature = gr.Slider(0.0, 1.5, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature") | |
| max_new_tokens = gr.Slider(16, 4096, value=DEFAULT_MAX_NEW_TOKENS, step=16, label="Max new tokens") | |
| gr.ChatInterface( | |
| fn=chat_fn, | |
| title="Chat", | |
| examples=[ | |
| ["Summarize why the sky is blue in 3 sentences.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS], | |
| ["Draft a friendly product blurb for a coffee mug.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS], | |
| ["Explain binary search with a tiny Python example.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS], | |
| ], | |
| cache_examples=False, | |
| additional_inputs=[model_name, system_prompt, temperature, max_new_tokens], | |
| additional_inputs_accordion=adv, | |
| save_history=True, | |
| editable=True, | |
| autoscroll=True, | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |