new421's picture
Rename # app.py to app.py
5707486 verified
# app.py
"""
Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs).
Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env).
If HF_API_TOKEN is missing, falls back to a KB-only answer.
Usage (locally):
1. export HF_API_TOKEN="hf_...." # optional but recommended for LLM replies
2. python app.py
3. open http://localhost:7860
On Spaces:
- Put app.py and requirements.txt into the repo root and push to a Gradio Space.
"""
import os
import json
import time
import io
import zipfile
from typing import List, Tuple, Dict, Optional
import requests
import gradio as gr
# --- Config: choose model used on HF Inference API if token provided ---
HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small")
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None) # put your token in env on Spaces (use Secrets)
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
# --- Knowledge base: short curated snippets + source URLs ---
# These entries were assembled from official docs (Hugging Face + Gradio),
# and are used for retrieval-augmented answers.
KB = [
{
"id": "spaces_overview",
"title": "Hugging Face Spaces overview",
"text": (
"Spaces are hosted apps on Hugging Face Hub. "
"A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the "
"Space automatically rebuilds and restarts on new commits."
),
"url": "https://huggingface.co/docs/hub/spaces",
},
{
"id": "spaces_repo",
"title": "Spaces store code in a repo",
"text": (
"Spaces store your code inside a repository, similar to model and dataset repos. "
"Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt."
),
"url": "https://huggingface.co/docs/hub/main/en/spaces-overview",
},
{
"id": "gradio_interface",
"title": "Gradio Interface basics",
"text": (
"Gradio's Interface / Blocks lets you wrap Python functions with input and output components "
"to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it."
),
"url": "https://www.gradio.app/docs/gradio/interface",
},
{
"id": "gradio_integration",
"title": "Gradio ↔ Hugging Face integrations",
"text": (
"Gradio integrates with Hugging Face Inference Endpoints and Spaces. "
"You can call Inference API from a Gradio app to get model responses without installing heavy libraries."
),
"url": "https://www.gradio.app/guides/using-hugging-face-integrations",
},
]
# --- Simple retriever: score KB entries by keyword overlap (fast, explainable) ---
def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]:
q = query.lower()
scored = []
for entry in KB:
score = 0
# simple signals: overlap of words and presence of key terms from title
for w in q.split():
if w in entry["text"].lower() or w in entry["title"].lower():
score += 1
# boost if query words in url path
if any(part in entry["url"].lower() for part in q.split("/")):
score += 1
scored.append((score, entry))
scored.sort(key=lambda x: x[0], reverse=True)
# return top entries with positive score, else top_k default
results = [e for s, e in scored if s > 0]
if not results:
results = [e for s, e in scored[:top_k]]
return results[:top_k]
# --- HF Inference API call (text generation / instruction) ---
def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str:
if not HF_API_TOKEN:
raise RuntimeError("HF_API_TOKEN not set")
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}}
resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout)
if resp.status_code != 200:
raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}")
data = resp.json()
# Inference API returns either a dict with 'error' or a list of outputs, or a string.
if isinstance(data, dict) and data.get("error"):
raise RuntimeError(f"HF API error: {data.get('error')}")
# handle list of generated texts
if isinstance(data, list) and "generated_text" in data[0]:
return data[0]["generated_text"]
# sometimes it's returned as a string
if isinstance(data, dict) and "generated_text" in data:
return data["generated_text"]
if isinstance(data, str):
return data
# fallback: convert to JSON string
return json.dumps(data)
# --- Compose a RAG prompt: give the LLM the KB snippets and the user's question ---
def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str:
header = (
"You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. "
"Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n"
)
docs_section = "Documentation snippets (quote them if you use them):\n"
for i, entry in enumerate(kb_entries, start=1):
docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n"
instruct = (
"\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). "
"If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n"
)
return header + docs_section + instruct + question
# --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources ---
def answer_question(user_question: str) -> Dict:
user_question = user_question.strip()
if not user_question:
return {"answer": "Please type a question.", "sources": []}
kb_hits = retrieve_kb(user_question, top_k=3)
prompt = build_prompt_with_kb(user_question, kb_hits)
# Try calling HF Inference API if token present
try:
if HF_API_TOKEN:
start = time.time()
gen = call_hf_inference(prompt, max_tokens=256)
latency = time.time() - start
# Build sources list
sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}}
else:
# fallback: build an answer from KB entries without LLM
combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits])
answer = (
"HF API token not found. Here's a KB-based answer composed from the docs:\n\n"
f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM."
)
sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}}
except Exception as e:
# return helpful error details
return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}}
# --- Utilities: create starter zip for download ---
STARTER_APP = """# Minimal starter app.py (for a Space)
import os
import gradio as gr
def greet(name):
return f"Hello {name} — replace this with your model or pipeline."
with gr.Blocks() as demo:
gr.Markdown('# Starter Gradio App')
name = gr.Textbox(label='Your name')
out = gr.Textbox(label='Greeting')
btn = gr.Button('Run')
btn.click(fn=greet, inputs=name, outputs=out)
if __name__ == '__main__':
demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))
"""
STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n"
STARTER_README = """Starter repo for a Gradio Space.
Files: app.py, requirements.txt
Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app.
"""
def make_starter_zip() -> Tuple[str, io.BytesIO]:
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as z:
z.writestr("app.py", STARTER_APP)
z.writestr("requirements.txt", STARTER_REQ)
z.writestr("README.md", STARTER_README)
buf.seek(0)
return ("gradio-starter.zip", buf)
# --- Gradio UI ---
def build_ui():
with gr.Blocks() as demo:
gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor")
gr.Markdown(
"Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. "
"If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)."
)
chatbot = gr.Chatbot(label="Instructor").style(height=420)
state = gr.State([]) # history: list of (user, bot) pairs
with gr.Row():
txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2)
send = gr.Button("Send")
# quick action buttons
with gr.Row():
b_show_app = gr.Button("Show starter app.py")
b_hf_info = gr.Button("Explain Hugging Face & Spaces")
b_gr_info = gr.Button("Explain Gradio")
b_download = gr.Button("Download starter zip")
file_view = gr.Code(label="File / Snippet", value="", language="python")
sources_md = gr.Markdown()
# Handlers
def on_send(msg, history):
history = history or []
history.append((msg, "…")) # placeholder
# Get answer
result = answer_question(msg)
# compose display answer with sources
src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])])
meta = result.get("meta", {})
via = meta.get("via", "unknown")
details = f"\n\n*(Answer via: {via})*"
full = result["answer"] + "\n\n**Sources:**\n" + (src_lines or "- (no sources)") + details
history[-1] = (msg, full)
return history, result.get("sources", [])
def show_starter_app():
return STARTER_APP
def show_hf_info():
# return a short KB excerpt
ent = KB[0]
return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"
def show_gr_info():
ent = KB[2]
return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"
def handle_download():
return make_starter_zip()
# wire events
send.click(on_send, inputs=[txt, state], outputs=[chatbot, state])
txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state])
b_show_app.click(fn=show_starter_app, outputs=[file_view])
b_hf_info.click(fn=show_hf_info, outputs=[file_view])
b_gr_info.click(fn=show_gr_info, outputs=[file_view])
b_download.click(fn=handle_download, outputs=gr.File())
# show sources when available
def render_sources(sources):
if not sources:
return ""
md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources])
return md
sources_md.bind(render_sources, inputs=[state], outputs=[sources_md])
return demo
if __name__ == "__main__":
app = build_ui()
app.launch(server_name="0.0.0.0", server_port=int(os.environ.get
::contentReference[oaicite:4]{index=4}
("PORT", 7860)))