Spaces:

new421
/

helpful-assistant

Runtime error

File size: 11,862 Bytes

26b85f6

# app.py
"""
Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs).
Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env).
If HF_API_TOKEN is missing, falls back to a KB-only answer.

Usage (locally):
  1. export HF_API_TOKEN="hf_...."   # optional but recommended for LLM replies
  2. python app.py
  3. open http://localhost:7860

On Spaces:
 - Put app.py and requirements.txt into the repo root and push to a Gradio Space.
"""

import os
import json
import time
import io
import zipfile
from typing import List, Tuple, Dict, Optional

import requests
import gradio as gr

# --- Config: choose model used on HF Inference API if token provided ---
HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small")
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None)  # put your token in env on Spaces (use Secrets)
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"

# --- Knowledge base: short curated snippets + source URLs ---
# These entries were assembled from official docs (Hugging Face + Gradio),
# and are used for retrieval-augmented answers.
KB = [
    {
        "id": "spaces_overview",
        "title": "Hugging Face Spaces overview",
        "text": (
            "Spaces are hosted apps on Hugging Face Hub. "
            "A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the "
            "Space automatically rebuilds and restarts on new commits."
        ),
        "url": "https://huggingface.co/docs/hub/spaces",
    },
    {
        "id": "spaces_repo",
        "title": "Spaces store code in a repo",
        "text": (
            "Spaces store your code inside a repository, similar to model and dataset repos. "
            "Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt."
        ),
        "url": "https://huggingface.co/docs/hub/main/en/spaces-overview",
    },
    {
        "id": "gradio_interface",
        "title": "Gradio Interface basics",
        "text": (
            "Gradio's Interface / Blocks lets you wrap Python functions with input and output components "
            "to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it."
        ),
        "url": "https://www.gradio.app/docs/gradio/interface",
    },
    {
        "id": "gradio_integration",
        "title": "Gradio ↔ Hugging Face integrations",
        "text": (
            "Gradio integrates with Hugging Face Inference Endpoints and Spaces. "
            "You can call Inference API from a Gradio app to get model responses without installing heavy libraries."
        ),
        "url": "https://www.gradio.app/guides/using-hugging-face-integrations",
    },
]

# --- Simple retriever: score KB entries by keyword overlap (fast, explainable) ---
def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]:
    q = query.lower()
    scored = []
    for entry in KB:
        score = 0
        # simple signals: overlap of words and presence of key terms from title
        for w in q.split():
            if w in entry["text"].lower() or w in entry["title"].lower():
                score += 1
        # boost if query words in url path
        if any(part in entry["url"].lower() for part in q.split("/")):
            score += 1
        scored.append((score, entry))
    scored.sort(key=lambda x: x[0], reverse=True)
    # return top entries with positive score, else top_k default
    results = [e for s, e in scored if s > 0]
    if not results:
        results = [e for s, e in scored[:top_k]]
    return results[:top_k]


# --- HF Inference API call (text generation / instruction) ---
def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str:
    if not HF_API_TOKEN:
        raise RuntimeError("HF_API_TOKEN not set")
    headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
    payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}}
    resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout)
    if resp.status_code != 200:
        raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}")
    data = resp.json()
    # Inference API returns either a dict with 'error' or a list of outputs, or a string.
    if isinstance(data, dict) and data.get("error"):
        raise RuntimeError(f"HF API error: {data.get('error')}")
    # handle list of generated texts
    if isinstance(data, list) and "generated_text" in data[0]:
        return data[0]["generated_text"]
    # sometimes it's returned as a string
    if isinstance(data, dict) and "generated_text" in data:
        return data["generated_text"]
    if isinstance(data, str):
        return data
    # fallback: convert to JSON string
    return json.dumps(data)


# --- Compose a RAG prompt: give the LLM the KB snippets and the user's question ---
def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str:
    header = (
        "You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. "
        "Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n"
    )
    docs_section = "Documentation snippets (quote them if you use them):\n"
    for i, entry in enumerate(kb_entries, start=1):
        docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n"
    instruct = (
        "\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). "
        "If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n"
    )
    return header + docs_section + instruct + question


# --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources ---
def answer_question(user_question: str) -> Dict:
    user_question = user_question.strip()
    if not user_question:
        return {"answer": "Please type a question.", "sources": []}

    kb_hits = retrieve_kb(user_question, top_k=3)
    prompt = build_prompt_with_kb(user_question, kb_hits)

    # Try calling HF Inference API if token present
    try:
        if HF_API_TOKEN:
            start = time.time()
            gen = call_hf_inference(prompt, max_tokens=256)
            latency = time.time() - start
            # Build sources list
            sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
            return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}}
        else:
            # fallback: build an answer from KB entries without LLM
            combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits])
            answer = (
                "HF API token not found. Here's a KB-based answer composed from the docs:\n\n"
                f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM."
            )
            sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
            return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}}
    except Exception as e:
        # return helpful error details
        return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}}


# --- Utilities: create starter zip for download ---
STARTER_APP = """# Minimal starter app.py (for a Space)
import os
import gradio as gr

def greet(name):
    return f"Hello {name} — replace this with your model or pipeline."

with gr.Blocks() as demo:
    gr.Markdown('# Starter Gradio App')
    name = gr.Textbox(label='Your name')
    out = gr.Textbox(label='Greeting')
    btn = gr.Button('Run')
    btn.click(fn=greet, inputs=name, outputs=out)

if __name__ == '__main__':
    demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))
"""

STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n"

STARTER_README = """Starter repo for a Gradio Space.
Files: app.py, requirements.txt
Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app.
"""

def make_starter_zip() -> Tuple[str, io.BytesIO]:
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w") as z:
        z.writestr("app.py", STARTER_APP)
        z.writestr("requirements.txt", STARTER_REQ)
        z.writestr("README.md", STARTER_README)
    buf.seek(0)
    return ("gradio-starter.zip", buf)


# --- Gradio UI ---
def build_ui():
    with gr.Blocks() as demo:
        gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor")
        gr.Markdown(
            "Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. "
            "If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)."
        )

        chatbot = gr.Chatbot(label="Instructor").style(height=420)
        state = gr.State([])  # history: list of (user, bot) pairs

        with gr.Row():
            txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2)
            send = gr.Button("Send")

        # quick action buttons
        with gr.Row():
            b_show_app = gr.Button("Show starter app.py")
            b_hf_info = gr.Button("Explain Hugging Face & Spaces")
            b_gr_info = gr.Button("Explain Gradio")
            b_download = gr.Button("Download starter zip")

        file_view = gr.Code(label="File / Snippet", value="", language="python")
        sources_md = gr.Markdown()

        # Handlers
        def on_send(msg, history):
            history = history or []
            history.append((msg, "…"))  # placeholder
            # Get answer
            result = answer_question(msg)
            # compose display answer with sources
            src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])])
            meta = result.get("meta", {})
            via = meta.get("via", "unknown")
            details = f"\n\n*(Answer via: {via})*"
            full = result["answer"] + "\n\n**Sources:**\n" + (src_lines or "- (no sources)") + details
            history[-1] = (msg, full)
            return history, result.get("sources", [])

        def show_starter_app():
            return STARTER_APP

        def show_hf_info():
            # return a short KB excerpt
            ent = KB[0]
            return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"

        def show_gr_info():
            ent = KB[2]
            return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"

        def handle_download():
            return make_starter_zip()

        # wire events
        send.click(on_send, inputs=[txt, state], outputs=[chatbot, state])
        txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state])

        b_show_app.click(fn=show_starter_app, outputs=[file_view])
        b_hf_info.click(fn=show_hf_info, outputs=[file_view])
        b_gr_info.click(fn=show_gr_info, outputs=[file_view])
        b_download.click(fn=handle_download, outputs=gr.File())

        # show sources when available
        def render_sources(sources):
            if not sources:
                return ""
            md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources])
            return md

        sources_md.bind(render_sources, inputs=[state], outputs=[sources_md])

    return demo

if __name__ == "__main__":
    app = build_ui()
    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get
::contentReference[oaicite:4]{index=4}
("PORT", 7860)))