Spaces:
Runtime error
Runtime error
File size: 11,862 Bytes
26b85f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 | # app.py
"""
Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs).
Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env).
If HF_API_TOKEN is missing, falls back to a KB-only answer.
Usage (locally):
1. export HF_API_TOKEN="hf_...." # optional but recommended for LLM replies
2. python app.py
3. open http://localhost:7860
On Spaces:
- Put app.py and requirements.txt into the repo root and push to a Gradio Space.
"""
import os
import json
import time
import io
import zipfile
from typing import List, Tuple, Dict, Optional
import requests
import gradio as gr
# --- Config: choose model used on HF Inference API if token provided ---
HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small")
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None) # put your token in env on Spaces (use Secrets)
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"
# --- Knowledge base: short curated snippets + source URLs ---
# These entries were assembled from official docs (Hugging Face + Gradio),
# and are used for retrieval-augmented answers.
KB = [
{
"id": "spaces_overview",
"title": "Hugging Face Spaces overview",
"text": (
"Spaces are hosted apps on Hugging Face Hub. "
"A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the "
"Space automatically rebuilds and restarts on new commits."
),
"url": "https://huggingface.co/docs/hub/spaces",
},
{
"id": "spaces_repo",
"title": "Spaces store code in a repo",
"text": (
"Spaces store your code inside a repository, similar to model and dataset repos. "
"Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt."
),
"url": "https://huggingface.co/docs/hub/main/en/spaces-overview",
},
{
"id": "gradio_interface",
"title": "Gradio Interface basics",
"text": (
"Gradio's Interface / Blocks lets you wrap Python functions with input and output components "
"to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it."
),
"url": "https://www.gradio.app/docs/gradio/interface",
},
{
"id": "gradio_integration",
"title": "Gradio ↔ Hugging Face integrations",
"text": (
"Gradio integrates with Hugging Face Inference Endpoints and Spaces. "
"You can call Inference API from a Gradio app to get model responses without installing heavy libraries."
),
"url": "https://www.gradio.app/guides/using-hugging-face-integrations",
},
]
# --- Simple retriever: score KB entries by keyword overlap (fast, explainable) ---
def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]:
q = query.lower()
scored = []
for entry in KB:
score = 0
# simple signals: overlap of words and presence of key terms from title
for w in q.split():
if w in entry["text"].lower() or w in entry["title"].lower():
score += 1
# boost if query words in url path
if any(part in entry["url"].lower() for part in q.split("/")):
score += 1
scored.append((score, entry))
scored.sort(key=lambda x: x[0], reverse=True)
# return top entries with positive score, else top_k default
results = [e for s, e in scored if s > 0]
if not results:
results = [e for s, e in scored[:top_k]]
return results[:top_k]
# --- HF Inference API call (text generation / instruction) ---
def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str:
if not HF_API_TOKEN:
raise RuntimeError("HF_API_TOKEN not set")
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}}
resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout)
if resp.status_code != 200:
raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}")
data = resp.json()
# Inference API returns either a dict with 'error' or a list of outputs, or a string.
if isinstance(data, dict) and data.get("error"):
raise RuntimeError(f"HF API error: {data.get('error')}")
# handle list of generated texts
if isinstance(data, list) and "generated_text" in data[0]:
return data[0]["generated_text"]
# sometimes it's returned as a string
if isinstance(data, dict) and "generated_text" in data:
return data["generated_text"]
if isinstance(data, str):
return data
# fallback: convert to JSON string
return json.dumps(data)
# --- Compose a RAG prompt: give the LLM the KB snippets and the user's question ---
def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str:
header = (
"You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. "
"Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n"
)
docs_section = "Documentation snippets (quote them if you use them):\n"
for i, entry in enumerate(kb_entries, start=1):
docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n"
instruct = (
"\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). "
"If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n"
)
return header + docs_section + instruct + question
# --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources ---
def answer_question(user_question: str) -> Dict:
user_question = user_question.strip()
if not user_question:
return {"answer": "Please type a question.", "sources": []}
kb_hits = retrieve_kb(user_question, top_k=3)
prompt = build_prompt_with_kb(user_question, kb_hits)
# Try calling HF Inference API if token present
try:
if HF_API_TOKEN:
start = time.time()
gen = call_hf_inference(prompt, max_tokens=256)
latency = time.time() - start
# Build sources list
sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}}
else:
# fallback: build an answer from KB entries without LLM
combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits])
answer = (
"HF API token not found. Here's a KB-based answer composed from the docs:\n\n"
f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM."
)
sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}}
except Exception as e:
# return helpful error details
return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}}
# --- Utilities: create starter zip for download ---
STARTER_APP = """# Minimal starter app.py (for a Space)
import os
import gradio as gr
def greet(name):
return f"Hello {name} — replace this with your model or pipeline."
with gr.Blocks() as demo:
gr.Markdown('# Starter Gradio App')
name = gr.Textbox(label='Your name')
out = gr.Textbox(label='Greeting')
btn = gr.Button('Run')
btn.click(fn=greet, inputs=name, outputs=out)
if __name__ == '__main__':
demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))
"""
STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n"
STARTER_README = """Starter repo for a Gradio Space.
Files: app.py, requirements.txt
Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app.
"""
def make_starter_zip() -> Tuple[str, io.BytesIO]:
buf = io.BytesIO()
with zipfile.ZipFile(buf, "w") as z:
z.writestr("app.py", STARTER_APP)
z.writestr("requirements.txt", STARTER_REQ)
z.writestr("README.md", STARTER_README)
buf.seek(0)
return ("gradio-starter.zip", buf)
# --- Gradio UI ---
def build_ui():
with gr.Blocks() as demo:
gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor")
gr.Markdown(
"Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. "
"If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)."
)
chatbot = gr.Chatbot(label="Instructor").style(height=420)
state = gr.State([]) # history: list of (user, bot) pairs
with gr.Row():
txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2)
send = gr.Button("Send")
# quick action buttons
with gr.Row():
b_show_app = gr.Button("Show starter app.py")
b_hf_info = gr.Button("Explain Hugging Face & Spaces")
b_gr_info = gr.Button("Explain Gradio")
b_download = gr.Button("Download starter zip")
file_view = gr.Code(label="File / Snippet", value="", language="python")
sources_md = gr.Markdown()
# Handlers
def on_send(msg, history):
history = history or []
history.append((msg, "…")) # placeholder
# Get answer
result = answer_question(msg)
# compose display answer with sources
src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])])
meta = result.get("meta", {})
via = meta.get("via", "unknown")
details = f"\n\n*(Answer via: {via})*"
full = result["answer"] + "\n\n**Sources:**\n" + (src_lines or "- (no sources)") + details
history[-1] = (msg, full)
return history, result.get("sources", [])
def show_starter_app():
return STARTER_APP
def show_hf_info():
# return a short KB excerpt
ent = KB[0]
return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"
def show_gr_info():
ent = KB[2]
return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"
def handle_download():
return make_starter_zip()
# wire events
send.click(on_send, inputs=[txt, state], outputs=[chatbot, state])
txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state])
b_show_app.click(fn=show_starter_app, outputs=[file_view])
b_hf_info.click(fn=show_hf_info, outputs=[file_view])
b_gr_info.click(fn=show_gr_info, outputs=[file_view])
b_download.click(fn=handle_download, outputs=gr.File())
# show sources when available
def render_sources(sources):
if not sources:
return ""
md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources])
return md
sources_md.bind(render_sources, inputs=[state], outputs=[sources_md])
return demo
if __name__ == "__main__":
app = build_ui()
app.launch(server_name="0.0.0.0", server_port=int(os.environ.get
::contentReference[oaicite:4]{index=4}
("PORT", 7860)))
|