Petzys commited on
Commit
606e93c
·
1 Parent(s): bf11196

feat: Added first MVP for XKCD Finder

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +15 -1
  3. app.py +133 -0
  4. requirements.txt +5 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1 +1,15 @@
1
- # xkcd_finder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: XKCD Finder
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ hf_oauth: true
11
+ hf_oauth_scopes:
12
+ - inference-api
13
+ ---
14
+
15
+ XKCD Finder
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import pickle
5
+ import faiss
6
+ import gradio as gr
7
+ from datasets import load_dataset
8
+ from sentence_transformers import SentenceTransformer
9
+ from huggingface_hub import InferenceClient
10
+
11
+ # --- Config ---
12
+ INDEX_FILE = "xkcd.index"
13
+ META_FILE = "meta.pkl"
14
+ CHAT_MODEL = os.getenv("CHAT_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
15
+
16
+ # --- Build / load index ---
17
+ def build_index():
18
+ print("Building FAISS index...")
19
+ ds = load_dataset("olivierdehaene/xkcd", split="train")
20
+ model = SentenceTransformer("all-MiniLM-L6-v2")
21
+ texts = []
22
+ for ex in ds:
23
+ id_ = str(ex["id"]) if ex["id"] else ""
24
+ title = ex["title"] if ex["title"] else ""
25
+ transcript = ex["transcript"] if ex["transcript"] else ""
26
+ explanation = ex["explanation"] if "explanation" in ex and ex["explanation"] else ""
27
+ texts.append(f"{id_} {title} {transcript} {explanation}")
28
+
29
+ embeddings = model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
30
+ dim = embeddings.shape[1]
31
+ index = faiss.IndexFlatL2(dim)
32
+ index.add(embeddings)
33
+ faiss.write_index(index, INDEX_FILE)
34
+
35
+ # Store just the metadata we need (pickle-friendly)
36
+ meta = [
37
+ {
38
+ "id": ex["id"],
39
+ "title": ex["title"],
40
+ "transcript": ex["transcript"],
41
+ "explanation": ex["explanation"] if "explanation" in ex else "",
42
+ }
43
+ for ex in ds
44
+ ]
45
+ with open(META_FILE, "wb") as f:
46
+ pickle.dump(meta, f)
47
+
48
+ return index, meta
49
+
50
+ if os.path.exists(INDEX_FILE) and os.path.exists(META_FILE):
51
+ print("Loading cached index...")
52
+ index = faiss.read_index(INDEX_FILE)
53
+ with open(META_FILE, "rb") as f:
54
+ meta = pickle.load(f)
55
+ else:
56
+ index, meta = build_index()
57
+
58
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
59
+
60
+ # --- Chat handler ---
61
+ def respond(
62
+ message: str,
63
+ history: list[dict[str, str]],
64
+ oauth: gr.OAuthToken | None = None, # Gradio injects this when available
65
+ ):
66
+ if not oauth:
67
+ yield "⚠️ Please sign in with your Hugging Face account (top of the page)"
68
+ return
69
+ token = oauth.token
70
+
71
+ # Embed the query and search FAISS
72
+ query_vec = embedder.encode([message], convert_to_numpy=True)
73
+ D, I = index.search(query_vec, 5)
74
+ candidates = [meta[int(i)] for i in I[0]]
75
+
76
+ context = "\n".join(
77
+ f"[{c['id']}] {c['title']}\nTranscript: {c['transcript']}\nExplanation: {c['explanation']}"
78
+ for c in candidates
79
+ )
80
+ prompt = f"""Situation: "{message}"
81
+ Here are candidate xkcd comics:
82
+ {context}
83
+
84
+ Which comic fits best and why?
85
+ Please answer with the comic ID, URL (https://xkcd.com/ID/) and a short explanation.
86
+ """
87
+
88
+ print("[PROMPT] " + prompt)
89
+ client = InferenceClient(model=CHAT_MODEL, api_key=token) # 'api_key' alias also works
90
+ resp = client.chat_completion(
91
+ messages=[
92
+ {"role": "system", "content": "You are a helpful assistant that selects the most suitable xkcd comic."},
93
+ {"role": "user", "content": prompt},
94
+ ],
95
+ max_tokens=200,
96
+ temperature=0.0,
97
+ )
98
+
99
+ # Be tolerant to slight schema differences
100
+ try:
101
+ choice = resp.choices[0]
102
+ msg = getattr(choice, "message", None)
103
+ if isinstance(msg, dict):
104
+ out = msg.get("content", "")
105
+ else:
106
+ out = getattr(msg, "content", "") or getattr(choice, "text", "")
107
+ except Exception:
108
+ out = str(resp)
109
+
110
+ yield out.strip() or "Sorry, I couldn't parse the model response."
111
+
112
+ # --- UI ---
113
+ with gr.Blocks() as demo:
114
+ gr.Markdown("# xkcd Comic Finder")
115
+ gr.Markdown(
116
+ "Sign in with your Hugging Face account so the app can call the model via the Inference API."
117
+ "\n\n> If you deploy to a Space, add `hf_oauth: true` in your Space metadata and grant the `inference:api` scope."
118
+ )
119
+ gr.LoginButton() # Shows “Sign in with Hugging Face”
120
+
121
+ gr.ChatInterface(
122
+ fn=respond,
123
+ title="xkcd Comic Finder",
124
+ description="Find the most suitable xkcd comic for your situation. Use the login button above.",
125
+ examples=[
126
+ "I need a comic about procrastination.",
127
+ "A comic for programmers debugging code.",
128
+ "Life advice in comic form.",
129
+ ],
130
+ )
131
+
132
+ if __name__ == "__main__":
133
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ faiss-cpu
3
+ transformers
4
+ sentence-transformers
5
+ datasets