File size: 11,862 Bytes
26b85f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# app.py
"""
Instruction Chatbot using LLM + small knowledgebase (Hugging Face docs + Gradio docs).
Defaults to calling the Hugging Face Inference API (set HF_API_TOKEN in env).
If HF_API_TOKEN is missing, falls back to a KB-only answer.

Usage (locally):
  1. export HF_API_TOKEN="hf_...."   # optional but recommended for LLM replies
  2. python app.py
  3. open http://localhost:7860

On Spaces:
 - Put app.py and requirements.txt into the repo root and push to a Gradio Space.
"""

import os
import json
import time
import io
import zipfile
from typing import List, Tuple, Dict, Optional

import requests
import gradio as gr

# --- Config: choose model used on HF Inference API if token provided ---
HF_MODEL = os.environ.get("HF_MODEL", "google/flan-t5-small")
HF_API_TOKEN = os.environ.get("HF_API_TOKEN", None)  # put your token in env on Spaces (use Secrets)
HF_API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"

# --- Knowledge base: short curated snippets + source URLs ---
# These entries were assembled from official docs (Hugging Face + Gradio),
# and are used for retrieval-augmented answers.
KB = [
    {
        "id": "spaces_overview",
        "title": "Hugging Face Spaces overview",
        "text": (
            "Spaces are hosted apps on Hugging Face Hub. "
            "A Space is backed by a git-like repository: you push files (app.py, requirements.txt) and the "
            "Space automatically rebuilds and restarts on new commits."
        ),
        "url": "https://huggingface.co/docs/hub/spaces",
    },
    {
        "id": "spaces_repo",
        "title": "Spaces store code in a repo",
        "text": (
            "Spaces store your code inside a repository, similar to model and dataset repos. "
            "Use the repo workflow (commit/push) to update your app. The build system installs requirements.txt."
        ),
        "url": "https://huggingface.co/docs/hub/main/en/spaces-overview",
    },
    {
        "id": "gradio_interface",
        "title": "Gradio Interface basics",
        "text": (
            "Gradio's Interface / Blocks lets you wrap Python functions with input and output components "
            "to create web UIs quickly. Define input components, outputs, and a function and Gradio serves it."
        ),
        "url": "https://www.gradio.app/docs/gradio/interface",
    },
    {
        "id": "gradio_integration",
        "title": "Gradio ↔ Hugging Face integrations",
        "text": (
            "Gradio integrates with Hugging Face Inference Endpoints and Spaces. "
            "You can call Inference API from a Gradio app to get model responses without installing heavy libraries."
        ),
        "url": "https://www.gradio.app/guides/using-hugging-face-integrations",
    },
]

# --- Simple retriever: score KB entries by keyword overlap (fast, explainable) ---
def retrieve_kb(query: str, top_k: int = 3) -> List[Dict]:
    q = query.lower()
    scored = []
    for entry in KB:
        score = 0
        # simple signals: overlap of words and presence of key terms from title
        for w in q.split():
            if w in entry["text"].lower() or w in entry["title"].lower():
                score += 1
        # boost if query words in url path
        if any(part in entry["url"].lower() for part in q.split("/")):
            score += 1
        scored.append((score, entry))
    scored.sort(key=lambda x: x[0], reverse=True)
    # return top entries with positive score, else top_k default
    results = [e for s, e in scored if s > 0]
    if not results:
        results = [e for s, e in scored[:top_k]]
    return results[:top_k]


# --- HF Inference API call (text generation / instruction) ---
def call_hf_inference(prompt: str, max_tokens: int = 512, timeout: int = 60) -> str:
    if not HF_API_TOKEN:
        raise RuntimeError("HF_API_TOKEN not set")
    headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
    payload = {"inputs": prompt, "parameters": {"max_new_tokens": max_tokens}, "options": {"wait_for_model": True}}
    resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=timeout)
    if resp.status_code != 200:
        raise RuntimeError(f"HF API error {resp.status_code}: {resp.text}")
    data = resp.json()
    # Inference API returns either a dict with 'error' or a list of outputs, or a string.
    if isinstance(data, dict) and data.get("error"):
        raise RuntimeError(f"HF API error: {data.get('error')}")
    # handle list of generated texts
    if isinstance(data, list) and "generated_text" in data[0]:
        return data[0]["generated_text"]
    # sometimes it's returned as a string
    if isinstance(data, dict) and "generated_text" in data:
        return data["generated_text"]
    if isinstance(data, str):
        return data
    # fallback: convert to JSON string
    return json.dumps(data)


# --- Compose a RAG prompt: give the LLM the KB snippets and the user's question ---
def build_prompt_with_kb(question: str, kb_entries: List[Dict]) -> str:
    header = (
        "You are a helpful assistant that answers questions about building Gradio apps and deploying them to Hugging Face Spaces. "
        "Use the following documentation snippets when relevant. If asked to provide commands or file contents, be explicit.\n\n"
    )
    docs_section = "Documentation snippets (quote them if you use them):\n"
    for i, entry in enumerate(kb_entries, start=1):
        docs_section += f"\n[{i}] {entry['title']}\n{entry['text']}\nSource: {entry['url']}\n"
    instruct = (
        "\nNow, answer the user question below. If the docs fully answer it, base your answer only on them and cite the source(s). "
        "If you extend beyond the docs, say so and keep it brief.\n\nUser question:\n"
    )
    return header + docs_section + instruct + question


# --- Bot logic: retrieve KB, call LLM (or fallback), return answer with sources ---
def answer_question(user_question: str) -> Dict:
    user_question = user_question.strip()
    if not user_question:
        return {"answer": "Please type a question.", "sources": []}

    kb_hits = retrieve_kb(user_question, top_k=3)
    prompt = build_prompt_with_kb(user_question, kb_hits)

    # Try calling HF Inference API if token present
    try:
        if HF_API_TOKEN:
            start = time.time()
            gen = call_hf_inference(prompt, max_tokens=256)
            latency = time.time() - start
            # Build sources list
            sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
            return {"answer": gen.strip(), "sources": sources, "meta": {"via": "hf_inference", "model": HF_MODEL, "latency_s": latency}}
        else:
            # fallback: build an answer from KB entries without LLM
            combined = " ".join([f"{e['title']}: {e['text']}" for e in kb_hits])
            answer = (
                "HF API token not found. Here's a KB-based answer composed from the docs:\n\n"
                f"{combined}\n\nIf you want richer responses, set HF_API_TOKEN as an env var in the Space and the app will use an LLM."
            )
            sources = [{"title": e["title"], "url": e["url"]} for e in kb_hits]
            return {"answer": answer, "sources": sources, "meta": {"via": "kb_fallback"}}
    except Exception as e:
        # return helpful error details
        return {"answer": f"Error when querying LLM: {e}", "sources": [{"title": e["title"], "url": e["url"]} for e in kb_hits], "meta": {"via": "error"}}


# --- Utilities: create starter zip for download ---
STARTER_APP = """# Minimal starter app.py (for a Space)
import os
import gradio as gr

def greet(name):
    return f"Hello {name} — replace this with your model or pipeline."

with gr.Blocks() as demo:
    gr.Markdown('# Starter Gradio App')
    name = gr.Textbox(label='Your name')
    out = gr.Textbox(label='Greeting')
    btn = gr.Button('Run')
    btn.click(fn=greet, inputs=name, outputs=out)

if __name__ == '__main__':
    demo.launch(server_name='0.0.0.0', server_port=int(os.environ.get('PORT', 7860)))
"""

STARTER_REQ = "gradio>=3.30\nrequests>=2.28\n"

STARTER_README = """Starter repo for a Gradio Space.
Files: app.py, requirements.txt
Push to a new Hugging Face Space (SDK: Gradio) and the build system will install requirements and start the app.
"""

def make_starter_zip() -> Tuple[str, io.BytesIO]:
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w") as z:
        z.writestr("app.py", STARTER_APP)
        z.writestr("requirements.txt", STARTER_REQ)
        z.writestr("README.md", STARTER_README)
    buf.seek(0)
    return ("gradio-starter.zip", buf)


# --- Gradio UI ---
def build_ui():
    with gr.Blocks() as demo:
        gr.Markdown("# LLM-backed Instruction Chatbot — Gradio & Spaces tutor")
        gr.Markdown(
            "Ask questions about Gradio, Hugging Face Spaces, deployment, and starter files. "
            "If you provide HF_API_TOKEN as an environment secret, the answers will be generated by an LLM (Inference API)."
        )

        chatbot = gr.Chatbot(label="Instructor").style(height=420)
        state = gr.State([])  # history: list of (user, bot) pairs

        with gr.Row():
            txt = gr.Textbox(placeholder="Ask: How do I deploy to Spaces? Or: Show me starter app.py", show_label=False, lines=2)
            send = gr.Button("Send")

        # quick action buttons
        with gr.Row():
            b_show_app = gr.Button("Show starter app.py")
            b_hf_info = gr.Button("Explain Hugging Face & Spaces")
            b_gr_info = gr.Button("Explain Gradio")
            b_download = gr.Button("Download starter zip")

        file_view = gr.Code(label="File / Snippet", value="", language="python")
        sources_md = gr.Markdown()

        # Handlers
        def on_send(msg, history):
            history = history or []
            history.append((msg, "…"))  # placeholder
            # Get answer
            result = answer_question(msg)
            # compose display answer with sources
            src_lines = "\n".join([f"- [{s['title']}]({s['url']})" for s in result.get("sources", [])])
            meta = result.get("meta", {})
            via = meta.get("via", "unknown")
            details = f"\n\n*(Answer via: {via})*"
            full = result["answer"] + "\n\n**Sources:**\n" + (src_lines or "- (no sources)") + details
            history[-1] = (msg, full)
            return history, result.get("sources", [])

        def show_starter_app():
            return STARTER_APP

        def show_hf_info():
            # return a short KB excerpt
            ent = KB[0]
            return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"

        def show_gr_info():
            ent = KB[2]
            return f"**{ent['title']}**\n\n{ent['text']}\n\nSource: {ent['url']}"

        def handle_download():
            return make_starter_zip()

        # wire events
        send.click(on_send, inputs=[txt, state], outputs=[chatbot, state])
        txt.submit(on_send, inputs=[txt, state], outputs=[chatbot, state])

        b_show_app.click(fn=show_starter_app, outputs=[file_view])
        b_hf_info.click(fn=show_hf_info, outputs=[file_view])
        b_gr_info.click(fn=show_gr_info, outputs=[file_view])
        b_download.click(fn=handle_download, outputs=gr.File())

        # show sources when available
        def render_sources(sources):
            if not sources:
                return ""
            md = "### Sources\n" + "\n".join([f"- [{s['title']}]({s['url']})" for s in sources])
            return md

        sources_md.bind(render_sources, inputs=[state], outputs=[sources_md])

    return demo

if __name__ == "__main__":
    app = build_ui()
    app.launch(server_name="0.0.0.0", server_port=int(os.environ.get
::contentReference[oaicite:4]{index=4}
("PORT", 7860)))