# app.py """ Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs). Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env). If HF_API_TOKEN is missing, falls back to a KB-only answer. Usage (locally): 1. export HF_API_TOKEN="hf_...." # optional but recommended for LLM replies 2. python app.py 3. open http://localhost:7860 On Spaces: - Put app.py and requirements.txt into the repo root and push to a Gradio Space. """ import os import json import time import io import zipfile from typing import List, Tuple, Dict, Optional import requests import gradio as gr # --- Config: choose model used on HF Inference API if token provided --- HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small") HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None) # put your token in env on Spaces (use Secrets) HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}" # --- Knowledge base: short curated snippets + source URLs --- # These entries were assembled from official docs (Hugging Face + Gradio), # and are used for retrieval-augmented answers. KB = [ { "id": "spaces_overview", "title": "Hugging Face Spaces overview", "text": ( "Spaces are hosted apps on Hugging Face Hub. " "A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the " "Space automatically rebuilds and restarts on new commits." ), "url": "https://huggingface.co/docs/hub/spaces", }, { "id": "spaces_repo", "title": "Spaces store code in a repo", "text": ( "Spaces store your code inside a repository, similar to model and dataset repos. " "Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt." ), "url": "https://huggingface.co/docs/hub/main/en/spaces-overview", }, { "id": "gradio_interface", "title": "Gradio Interface basics", "text": ( "Gradio's Interface / Blocks lets you wrap Python functions with input and output components " "to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it." ), "url": "https://www.gradio.app/docs/gradio/interface", }, { "id": "gradio_integration", "title": "Gradio ↔ Hugging Face integrations", "text": ( "Gradio integrates with Hugging Face Inference Endpoints and Spaces. " "You can call Inference API from a Gradio app to get model responses without installing heavy libraries." ), "url": "https://www.gradio.app/guides/using-hugging-face-integrations", }, ] # --- Simple retriever: score KB entries by keyword overlap (fast, explainable) --- def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]: q = query.lower() scored = [] for entry in KB: score = 0 # simple signals: overlap of words and presence of key terms from title for w in q.split(): if w in entry["text"].lower() or w in entry["title"].lower(): score += 1 # boost if query words in url path if any(part in entry["url"].lower() for part in q.split("/")): score += 1 scored.append((score, entry)) scored.sort(key=lambda x: x[0], reverse=True) # return top entries with positive score, else top_k default results = [e for s, e in scored if s > 0] if not results: results = [e for s, e in scored[:top_k]] return results[:top_k] # --- HF Inference API call (text generation / instruction) --- def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str: if not HF_API_TOKEN: raise RuntimeError("HF_API_TOKEN not set") headers = {"Authorization": f"Bearer {HF_API_TOKEN}"} payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}} resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout) if resp.status_code != 200: raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}") data = resp.json() # Inference API returns either a dict with 'error' or a list of outputs, or a string. if isinstance(data, dict) and data.get("error"): raise RuntimeError(f"HF API error: {data.get('error')}") # handle list of generated texts if isinstance(data, list) and "generated_text" in data[0]: return data[0]["generated_text"] # sometimes it's returned as a string if isinstance(data, dict) and "generated_text" in data: return data["generated_text"] if isinstance(data, str): return data # fallback: convert to JSON string return json.dumps(data) # --- Compose a RAG prompt: give the LLM the KB snippets and the user's question --- def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str: header = ( "You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. " "Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n" ) docs_section = "Documentation snippets (quote them if you use them):\n" for i, entry in enumerate(kb_entries, start=1): docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n" instruct = ( "\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). " "If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n" ) return header + docs_section + instruct + question # --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources --- def answer_question(user_question: str) -> Dict: user_question = user_question.strip() if not user_question: return {"answer": "Please type a question.", "sources": []} kb_hits = retrieve_kb(user_question, top_k=3) prompt = build_prompt_with_kb(user_question, kb_hits) # Try calling HF Inference API if token present try: if HF_API_TOKEN: start = time.time() gen = call_hf_inference(prompt, max_tokens=256) latency = time.time() - start # Build sources list sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits] return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}} else: # fallback: build an answer from KB entries without LLM combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits]) answer = ( "HF API token not found. Here's a KB-based answer composed from the docs:\n\n" f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM." ) sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits] return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}} except Exception as e: # return helpful error details return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}} # --- Utilities: create starter zip for download --- STARTER_APP = """# Minimal starter app.py (for a Space) import os import gradio as gr def greet(name): return f"Hello {name} — replace this with your model or pipeline." with gr.Blocks() as demo: gr.Markdown('# Starter Gradio App') name = gr.Textbox(label='Your name') out = gr.Textbox(label='Greeting') btn = gr.Button('Run') btn.click(fn=greet, inputs=name, outputs=out) if __name__ == '__main__': demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860))) """ STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n" STARTER_README = """Starter repo for a Gradio Space. Files: app.py, requirements.txt Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app. """ def make_starter_zip() -> Tuple[str, io.BytesIO]: buf = io.BytesIO() with zipfile.ZipFile(buf, "w") as z: z.writestr("app.py", STARTER_APP) z.writestr("requirements.txt", STARTER_REQ) z.writestr("README.md", STARTER_README) buf.seek(0) return ("gradio-starter.zip", buf) # --- Gradio UI --- def build_ui(): with gr.Blocks() as demo: gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor") gr.Markdown( "Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. " "If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)." ) chatbot = gr.Chatbot(label="Instructor").style(height=420) state = gr.State([]) # history: list of (user, bot) pairs with gr.Row(): txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2) send = gr.Button("Send") # quick action buttons with gr.Row(): b_show_app = gr.Button("Show starter app.py") b_hf_info = gr.Button("Explain Hugging Face & Spaces") b_gr_info = gr.Button("Explain Gradio") b_download = gr.Button("Download starter zip") file_view = gr.Code(label="File / Snippet", value="", language="python") sources_md = gr.Markdown() # Handlers def on_send(msg, history): history = history or [] history.append((msg, "…")) # placeholder # Get answer result = answer_question(msg) # compose display answer with sources src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])]) meta = result.get("meta", {}) via = meta.get("via", "unknown") details = f"\n\n*(Answer via: {via})*" full = result["answer"] + "\n\n**Sources:**\n" + (src_lines or "- (no sources)") + details history[-1] = (msg, full) return history, result.get("sources", []) def show_starter_app(): return STARTER_APP def show_hf_info(): # return a short KB excerpt ent = KB[0] return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}" def show_gr_info(): ent = KB[2] return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}" def handle_download(): return make_starter_zip() # wire events send.click(on_send, inputs=[txt, state], outputs=[chatbot, state]) txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state]) b_show_app.click(fn=show_starter_app, outputs=[file_view]) b_hf_info.click(fn=show_hf_info, outputs=[file_view]) b_gr_info.click(fn=show_gr_info, outputs=[file_view]) b_download.click(fn=handle_download, outputs=gr.File()) # show sources when available def render_sources(sources): if not sources: return "" md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources]) return md sources_md.bind(render_sources, inputs=[state], outputs=[sources_md]) return demo if __name__ == "__main__": app = build_ui() app.launch(server_name="0.0.0.0", server_port=int(os.environ.get ::contentReference[oaicite:4]{index=4} ("PORT", 7860)))