Spaces:

new421
/

helpful-assistant

Runtime error

App Files Files Community

helpful-assistant / app.py

new421

Rename # app.py to app.py

5707486 verified about 2 months ago

raw

history blame contribute delete

11.9 kB

	# app.py
	"""
	Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs).
	Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env).
	If HF_API_TOKEN is missing, falls back to a KB-only answer.

	Usage (locally):
	1. export HF_API_TOKEN="hf_...." # optional but recommended for LLM replies
	2. python app.py
	3. open http://localhost:7860

	On Spaces:
	- Put app.py and requirements.txt into the repo root and push to a Gradio Space.
	"""

	import os
	import json
	import time
	import io
	import zipfile
	from typing import List, Tuple, Dict, Optional

	import requests
	import gradio as gr

	# --- Config: choose model used on HF Inference API if token provided ---
	HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small")
	HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None) # put your token in env on Spaces (use Secrets)
	HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"

	# --- Knowledge base: short curated snippets + source URLs ---
	# These entries were assembled from official docs (Hugging Face + Gradio),
	# and are used for retrieval-augmented answers.
	KB = [
	{
	"id": "spaces_overview",
	"title": "Hugging Face Spaces overview",
	"text": (
	"Spaces are hosted apps on Hugging Face Hub. "
	"A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the "
	"Space automatically rebuilds and restarts on new commits."
	),
	"url": "https://huggingface.co/docs/hub/spaces",
	},
	{
	"id": "spaces_repo",
	"title": "Spaces store code in a repo",
	"text": (
	"Spaces store your code inside a repository, similar to model and dataset repos. "
	"Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt."
	),
	"url": "https://huggingface.co/docs/hub/main/en/spaces-overview",
	},
	{
	"id": "gradio_interface",
	"title": "Gradio Interface basics",
	"text": (
	"Gradio's Interface / Blocks lets you wrap Python functions with input and output components "
	"to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it."
	),
	"url": "https://www.gradio.app/docs/gradio/interface",
	},
	{
	"id": "gradio_integration",
	"title": "Gradio ↔ Hugging Face integrations",
	"text": (
	"Gradio integrates with Hugging Face Inference Endpoints and Spaces. "
	"You can call Inference API from a Gradio app to get model responses without installing heavy libraries."
	),
	"url": "https://www.gradio.app/guides/using-hugging-face-integrations",
	},
	]

	# --- Simple retriever: score KB entries by keyword overlap (fast, explainable) ---
	def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]:
	q = query.lower()
	scored = []
	for entry in KB:
	score = 0
	# simple signals: overlap of words and presence of key terms from title
	for w in q.split():
	if w in entry["text"].lower() or w in entry["title"].lower():
	score += 1
	# boost if query words in url path
	if any(part in entry["url"].lower() for part in q.split("/")):
	score += 1
	scored.append((score, entry))
	scored.sort(key=lambda x: x[0], reverse=True)
	# return top entries with positive score, else top_k default
	results = [e for s, e in scored if s > 0]
	if not results:
	results = [e for s, e in scored[:top_k]]
	return results[:top_k]


	# --- HF Inference API call (text generation / instruction) ---
	def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str:
	if not HF_API_TOKEN:
	raise RuntimeError("HF_API_TOKEN not set")
	headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
	payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}}
	resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout)
	if resp.status_code != 200:
	raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}")
	data = resp.json()
	# Inference API returns either a dict with 'error' or a list of outputs, or a string.
	if isinstance(data, dict) and data.get("error"):
	raise RuntimeError(f"HF API error: {data.get('error')}")
	# handle list of generated texts
	if isinstance(data, list) and "generated_text" in data[0]:
	return data[0]["generated_text"]
	# sometimes it's returned as a string
	if isinstance(data, dict) and "generated_text" in data:
	return data["generated_text"]
	if isinstance(data, str):
	return data
	# fallback: convert to JSON string
	return json.dumps(data)


	# --- Compose a RAG prompt: give the LLM the KB snippets and the user's question ---
	def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str:
	header = (
	"You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. "
	"Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n"
	)
	docs_section = "Documentation snippets (quote them if you use them):\n"
	for i, entry in enumerate(kb_entries, start=1):
	docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n"
	instruct = (
	"\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). "
	"If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n"
	)
	return header + docs_section + instruct + question


	# --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources ---
	def answer_question(user_question: str) -> Dict:
	user_question = user_question.strip()
	if not user_question:
	return {"answer": "Please type a question.", "sources": []}

	kb_hits = retrieve_kb(user_question, top_k=3)
	prompt = build_prompt_with_kb(user_question, kb_hits)

	# Try calling HF Inference API if token present
	try:
	if HF_API_TOKEN:
	start = time.time()
	gen = call_hf_inference(prompt, max_tokens=256)
	latency = time.time() - start
	# Build sources list
	sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
	return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}}
	else:
	# fallback: build an answer from KB entries without LLM
	combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits])
	answer = (
	"HF API token not found. Here's a KB-based answer composed from the docs:\n\n"
	f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM."
	)
	sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
	return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}}
	except Exception as e:
	# return helpful error details
	return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}}


	# --- Utilities: create starter zip for download ---
	STARTER_APP = """# Minimal starter app.py (for a Space)
	import os
	import gradio as gr

	def greet(name):
	return f"Hello {name} — replace this with your model or pipeline."

	with gr.Blocks() as demo:
	gr.Markdown('# Starter Gradio App')
	name = gr.Textbox(label='Your name')
	out = gr.Textbox(label='Greeting')
	btn = gr.Button('Run')
	btn.click(fn=greet, inputs=name, outputs=out)

	if __name__ == '__main__':
	demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))
	"""

	STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n"

	STARTER_README = """Starter repo for a Gradio Space.
	Files: app.py, requirements.txt
	Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app.
	"""

	def make_starter_zip() -> Tuple[str, io.BytesIO]:
	buf = io.BytesIO()
	with zipfile.ZipFile(buf, "w") as z:
	z.writestr("app.py", STARTER_APP)
	z.writestr("requirements.txt", STARTER_REQ)
	z.writestr("README.md", STARTER_README)
	buf.seek(0)
	return ("gradio-starter.zip", buf)


	# --- Gradio UI ---
	def build_ui():
	with gr.Blocks() as demo:
	gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor")
	gr.Markdown(
	"Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. "
	"If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)."
	)

	chatbot = gr.Chatbot(label="Instructor").style(height=420)
	state = gr.State([]) # history: list of (user, bot) pairs

	with gr.Row():
	txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2)
	send = gr.Button("Send")

	# quick action buttons
	with gr.Row():
	b_show_app = gr.Button("Show starter app.py")
	b_hf_info = gr.Button("Explain Hugging Face & Spaces")
	b_gr_info = gr.Button("Explain Gradio")
	b_download = gr.Button("Download starter zip")

	file_view = gr.Code(label="File / Snippet", value="", language="python")
	sources_md = gr.Markdown()

	# Handlers
	def on_send(msg, history):
	history = history or []
	history.append((msg, "…")) # placeholder
	# Get answer
	result = answer_question(msg)
	# compose display answer with sources
	src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])])
	meta = result.get("meta", {})
	via = meta.get("via", "unknown")
	details = f"\n\n(Answer via: {via})"
	full = result["answer"] + "\n\nSources:\n" + (src_lines or "- (no sources)") + details
	history[-1] = (msg, full)
	return history, result.get("sources", [])

	def show_starter_app():
	return STARTER_APP

	def show_hf_info():
	# return a short KB excerpt
	ent = KB[0]
	return f"{ent['title']}\n\n{ent['text']}\n\nSource: {ent['url']}"

	def show_gr_info():
	ent = KB[2]
	return f"{ent['title']}\n\n{ent['text']}\n\nSource: {ent['url']}"

	def handle_download():
	return make_starter_zip()

	# wire events
	send.click(on_send, inputs=[txt, state], outputs=[chatbot, state])
	txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state])

	b_show_app.click(fn=show_starter_app, outputs=[file_view])
	b_hf_info.click(fn=show_hf_info, outputs=[file_view])
	b_gr_info.click(fn=show_gr_info, outputs=[file_view])
	b_download.click(fn=handle_download, outputs=gr.File())

	# show sources when available
	def render_sources(sources):
	if not sources:
	return ""
	md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources])
	return md

	sources_md.bind(render_sources, inputs=[state], outputs=[sources_md])

	return demo

	if __name__ == "__main__":
	app = build_ui()
	app.launch(server_name="0.0.0.0", server_port=int(os.environ.get
	::contentReference[oaicite:4]{index=4}
	("PORT", 7860)))