Spaces:

dzezzefezfz
/

Chatbot

Sleeping

App Files Files Community

Chatbot / app.py

dzezzefezfz

Update app.py

b60854a verified 2 months ago

raw

history blame contribute delete

4.31 kB

	import os
	import gradio as gr
	from typing import Iterator, List, Dict, Any, Tuple

	from backend_hf_api import HFInferenceBackend, is_hf_api_available

	SYSTEM_PROMPT_DEFAULT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant. Be concise and accurate.")
	DEFAULT_MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "512"))
	DEFAULT_TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
	# Use a valid Nemotron repo by default; override via Space Variables if you want another.
	DEFAULT_HF_API_MODEL = os.getenv("HF_API_MODEL", "NVIDIA/Nemotron-3-8B-Instruct")


	def _msg_content_to_text(content: Any) -> str:
	if isinstance(content, str):
	return content
	if isinstance(content, dict) and isinstance(content.get("text"), str):
	return content["text"]
	return "" if content is None else str(content)


	def _history_to_pairs(history: Any) -> List[Tuple[str, str]]:
	"""Gradio v6 messages or legacy (user, assistant) pairs → (user, assistant) pairs."""
	pairs: List[Tuple[str, str]] = []
	if not history:
	return pairs

	if isinstance(history[0], dict):
	pending_user: str \| None = None
	for m in history:
	role = m.get("role")
	text = _msg_content_to_text(m.get("content"))
	if role == "user":
	if pending_user is not None:
	pairs.append((pending_user, ""))
	pending_user = text
	elif role == "assistant":
	if pending_user is None:
	pairs.append(("", text))
	else:
	pairs.append((pending_user, text))
	pending_user = None
	if pending_user is not None:
	pairs.append((pending_user, ""))
	return pairs

	if isinstance(history[0], (list, tuple)) and len(history[0]) == 2:
	return [(str(u or ""), str(a or "")) for (u, a) in history]

	return [(str(history), "")]


	def chat_fn(
	message: str,
	history: List[Dict[str, Any]] \| List[Tuple[str, str]],
	model_name: str,
	system_prompt: str,
	temperature: float,
	max_new_tokens: int,
	) -> Iterator[str]:
	if not is_hf_api_available():
	yield "[error] HF_TOKEN not set. Add it in Spaces → Settings → Secrets and restart."
	return
	try:
	backend = HFInferenceBackend(model_name or DEFAULT_HF_API_MODEL)
	pairs_history = _history_to_pairs(history)
	yield from backend.generate_stream(
	system_prompt=(system_prompt or SYSTEM_PROMPT_DEFAULT).strip(),
	history=pairs_history,
	user_msg=message,
	temperature=float(temperature),
	max_new_tokens=int(max_new_tokens),
	)
	except Exception as e:
	yield f"[error] {type(e).__name__}: {e}"


	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 HF Inference API Chatbot (Gradio v6)\nUses your HF_TOKEN. Preflight checks model to prevent crashes.")

	model_name = gr.Textbox(
	value=DEFAULT_HF_API_MODEL,
	label="HF model repo",
	placeholder="e.g., NVIDIA/Nemotron-3-8B-Instruct",
	)

	with gr.Accordion("Advanced", open=False) as adv:
	system_prompt = gr.Textbox(value=SYSTEM_PROMPT_DEFAULT, label="System prompt", lines=3)
	temperature = gr.Slider(0.0, 1.5, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
	max_new_tokens = gr.Slider(16, 4096, value=DEFAULT_MAX_NEW_TOKENS, step=16, label="Max new tokens")

	gr.ChatInterface(
	fn=chat_fn,
	title="Chat",
	examples=[
	["Summarize why the sky is blue in 3 sentences.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
	["Draft a friendly product blurb for a coffee mug.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
	["Explain binary search with a tiny Python example.", DEFAULT_HF_API_MODEL, SYSTEM_PROMPT_DEFAULT, DEFAULT_TEMPERATURE, DEFAULT_MAX_NEW_TOKENS],
	],
	cache_examples=False,
	additional_inputs=[model_name, system_prompt, temperature, max_new_tokens],
	additional_inputs_accordion=adv,
	save_history=True,
	editable=True,
	autoscroll=True,
	)

	if __name__ == "__main__":
	demo.queue().launch()