Danielos100 commited on
Commit
4ea9d05
·
verified ·
1 Parent(s): 89161e2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +414 -0
app.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # 🎁 GIfty — Smart Gift Recommender (Embeddings + FAISS)
3
+ # Dataset: ckandemir/amazon-products (Hugging Face)
4
+ # UI: Gradio (English)
5
+ #
6
+ # Works on common Spaces stacks (no RangeSlider; two sliders for budget)
7
+ # Chosen model: sentence-transformers/all-MiniLM-L6-v2 (fast, strong baseline)
8
+ #
9
+ # Tip: First query builds embeddings+FAISS (cached in-memory).
10
+
11
+ import os, re, random
12
+ from typing import Dict, List, Tuple
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ import gradio as gr
17
+ from datasets import load_dataset
18
+ from sentence_transformers import SentenceTransformer
19
+ import faiss
20
+
21
+ # ---------------- Config ----------------
22
+ MAX_ROWS = int(os.getenv("MAX_ROWS", "6000")) # cap to keep build time reasonable on CPU
23
+ TITLE = "# 🎁 GIfty — Smart Gift Recommender\n*Top-3 similar picks + 1 generated idea + personalized message*"
24
+
25
+ OCCASION_OPTIONS = [
26
+ "birthday", "anniversary", "valentines", "graduation",
27
+ "housewarming", "christmas", "hanukkah", "thank_you",
28
+ ]
29
+
30
+ AGE_OPTIONS = {
31
+ "any": "any",
32
+ "kid (3–12)": "kids",
33
+ "teen (13–17)": "teens",
34
+ "adult (18–64)": "adult",
35
+ "senior (65+)": "senior",
36
+ }
37
+
38
+ INTEREST_OPTIONS = [
39
+ "reading","writing","tech","travel","fitness","cooking","tea","coffee",
40
+ "games","movies","plants","music","design","stationery","home","experience",
41
+ "digital","aesthetic","premium","eco","practical","minimalist","social","party",
42
+ "photography","outdoors","pets","beauty","jewelry"
43
+ ]
44
+
45
+ # Query-expansion dictionary (improves semantic match with catalog wording)
46
+ SYNONYMS = {
47
+ "music": ["audio", "headphones", "vinyl", "earbuds", "speaker"],
48
+ "tech": ["electronics", "gadgets", "computer", "smart", "device"],
49
+ "games": ["board game", "puzzle", "gaming", "toy"],
50
+ "home": ["home decor", "kitchen", "appliance", "furniture"],
51
+ "cooking": ["kitchen", "cookware", "chef", "bake"],
52
+ "fitness": ["sports", "yoga", "run", "workout"],
53
+ "photography": ["camera", "lens", "tripod"],
54
+ "travel": ["luggage", "passport", "map", "travel"],
55
+ "beauty": ["skincare", "makeup", "fragrance", "cosmetic"],
56
+ "jewelry": ["ring", "necklace", "bracelet"],
57
+ "coffee": ["espresso", "mug", "grinder", "coffee"],
58
+ "tea": ["teapot", "infuser", "tea"],
59
+ "plants": ["garden", "planter", "indoor"],
60
+ "reading": ["book", "novel", "literature"],
61
+ "writing": ["notebook", "pen", "planner"],
62
+ "pets": ["pet", "dog", "cat"],
63
+ "outdoors": ["camping", "hiking", "outdoor"],
64
+ "eco": ["sustainable", "recycled", "eco"],
65
+ "digital": ["online", "voucher"],
66
+ "experience": ["voucher", "ticket", "workshop"],
67
+ }
68
+
69
+ # ---------------- Data loading & schema ----------------
70
+ def _to_price_usd(x):
71
+ s = str(x).strip().replace("$", "").replace(",", "")
72
+ try:
73
+ return float(s)
74
+ except Exception:
75
+ return np.nan
76
+
77
+ def _infer_age_from_category(cat: str) -> str:
78
+ s = (cat or "").lower()
79
+ if any(k in s for k in ["baby", "toddler", "infant"]): return "kids"
80
+ if "toys & games" in s or "board games" in s or "toy" in s: return "kids"
81
+ if any(k in s for k in ["teen", "young adult", "ya"]): return "teens"
82
+ return "any"
83
+
84
+ def _infer_occasion_tags(cat: str) -> str:
85
+ s = (cat or "").lower()
86
+ tags = set(["birthday"])
87
+ if any(k in s for k in ["home & kitchen","furniture","home décor","home decor","garden","tools","appliance","cookware","kitchen"]):
88
+ tags.update(["housewarming","thank_you"])
89
+ if any(k in s for k in ["beauty","jewelry","watch","fragrance","cosmetic","makeup","skincare"]):
90
+ tags.update(["valentines","anniversary"])
91
+ if any(k in s for k in ["toys","board game","puzzle","kids","lego"]):
92
+ tags.update(["hanukkah","christmas"])
93
+ if any(k in s for k in ["office","stationery","notebook","pen","planner"]):
94
+ tags.update(["graduation","thank_you"])
95
+ if any(k in s for k in ["electronics","camera","audio","headphones","gaming","computer"]):
96
+ tags.update(["birthday","christmas"])
97
+ if any(k in s for k in ["book","novel","literature"]):
98
+ tags.update(["graduation","thank_you"])
99
+ if any(k in s for k in ["sports","fitness","outdoor","camping","hiking","run","yoga"]):
100
+ tags.update(["birthday"])
101
+ return ",".join(sorted(tags))
102
+
103
+ def map_amazon_to_schema(df_raw: pd.DataFrame) -> pd.DataFrame:
104
+ cols = {c.lower().strip(): c for c in df_raw.columns}
105
+ get = lambda key: df_raw.get(cols.get(key, ""), "")
106
+ out = pd.DataFrame({
107
+ "name": get("product name"),
108
+ "short_desc": get("description"),
109
+ "tags": get("category"),
110
+ "price_usd": get("selling price").map(_to_price_usd) if "selling price" in cols else np.nan,
111
+ "age_range": "",
112
+ "gender_tags": "any",
113
+ "occasion_tags": "",
114
+ "persona_fit": get("category"),
115
+ "image_url": get("image") if "image" in cols else "",
116
+ })
117
+ # clean
118
+ out["name"] = out["name"].astype(str).str.strip().str.slice(0, 120)
119
+ out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 500)
120
+ out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
121
+ out["persona_fit"] = out["persona_fit"].astype(str).str.lower()
122
+ # infer occasion & age
123
+ out["occasion_tags"] = out["tags"].map(_infer_occasion_tags)
124
+ out["age_range"] = out["tags"].map(_infer_age_from_category).fillna("any")
125
+ return out
126
+
127
+ def build_doc(row: pd.Series) -> str:
128
+ parts = [
129
+ str(row.get("name","")),
130
+ str(row.get("short_desc","")),
131
+ str(row.get("tags","")),
132
+ str(row.get("persona_fit","")),
133
+ str(row.get("occasion_tags","")),
134
+ str(row.get("age_range","")),
135
+ ]
136
+ return " | ".join([p for p in parts if p])
137
+
138
+ def load_catalog() -> pd.DataFrame:
139
+ try:
140
+ ds = load_dataset("ckandemir/amazon-products", split="train")
141
+ raw = ds.to_pandas()
142
+ except Exception:
143
+ # Fallback so the app never crashes if internet is blocked
144
+ raw = pd.DataFrame({
145
+ "Product Name": ["Wireless Earbuds", "Coffee Sampler", "Strategy Board Game"],
146
+ "Description": [
147
+ "Compact earbuds with noise isolation and long battery life.",
148
+ "Four single-origin roasts from small roasters.",
149
+ "Modern eurogame for 2–4 players, 45–60 minutes."
150
+ ],
151
+ "Category": ["Electronics | Audio","Grocery | Coffee","Toys & Games | Board Games"],
152
+ "Selling Price": ["$59.00","$34.00","$39.00"],
153
+ "Image": ["","",""],
154
+ })
155
+ df = map_amazon_to_schema(raw).drop_duplicates(subset=["name","short_desc"])
156
+ if len(df) > MAX_ROWS:
157
+ df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
158
+ df["doc"] = df.apply(build_doc, axis=1)
159
+ return df
160
+
161
+ CATALOG = load_catalog()
162
+
163
+ # ---------------- Business filters ----------------
164
+ def _contains_ci(series: pd.Series, needle: str) -> pd.Series:
165
+ if not needle: return pd.Series(True, index=series.index)
166
+ pat = re.escape(needle)
167
+ return series.fillna("").str.contains(pat, case=False, regex=True)
168
+
169
+ def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None,
170
+ occasion: str=None, age_range: str="any") -> pd.DataFrame:
171
+ m = pd.Series(True, index=df.index)
172
+ if budget_min is not None:
173
+ m &= df["price_usd"].fillna(0) >= float(budget_min)
174
+ if budget_max is not None:
175
+ m &= df["price_usd"].fillna(1e9) <= float(budget_max)
176
+ if occasion:
177
+ m &= _contains_ci(df["occasion_tags"], occasion)
178
+ if age_range and age_range != "any":
179
+ m &= (df["age_range"].fillna("any").isin([age_range, "any"]))
180
+ return df[m]
181
+
182
+ # ---------------- Embeddings + FAISS ----------------
183
+ class EmbeddingIndex:
184
+ def __init__(self, docs: List[str], model_id: str):
185
+ self.model_id = model_id
186
+ self.model = SentenceTransformer(model_id)
187
+ embs = self.model.encode(docs, convert_to_numpy=True, normalize_embeddings=True)
188
+ self.index = faiss.IndexFlatIP(embs.shape[1]) # cosine if normalized
189
+ self.index.add(embs)
190
+ self.dim = embs.shape[1]
191
+
192
+ def search(self, query: str, topn: int) -> Tuple[np.ndarray, np.ndarray]:
193
+ qv = self.model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
194
+ sims, idxs = self.index.search(qv, topn)
195
+ return sims[0], idxs[0]
196
+
197
+ # Choose the best all-around model for this app:
198
+ EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # fast & good quality
199
+ EMB_INDEX = EmbeddingIndex(CATALOG["doc"].tolist(), EMBED_MODEL_ID)
200
+
201
+ # ---------------- Query building ----------------
202
+ def expand_with_synonyms(tokens: List[str]) -> List[str]:
203
+ out = []
204
+ for t in tokens:
205
+ t = t.strip().lower()
206
+ if not t: continue
207
+ out.append(t)
208
+ out.extend(SYNONYMS.get(t, []))
209
+ return out
210
+
211
+ def profile_to_query(profile: Dict) -> str:
212
+ """
213
+ Weighted, doc-aligned query (interests + synonyms) + occasion + age.
214
+ Repeats interests to give them more weight.
215
+ """
216
+ interests = [t.strip().lower() for t in profile.get("interests", []) if t.strip()]
217
+ expanded = expand_with_synonyms(interests)
218
+ expanded = expanded + expanded # weight x2
219
+ occasion = (profile.get("occasion", "") or "").lower()
220
+ age = profile.get("age_range", "any")
221
+ parts = []
222
+ if expanded: parts.append(", ".join(expanded))
223
+ if occasion: parts.append(occasion)
224
+ if age and age != "any": parts.append(age)
225
+ return " | ".join(parts).strip()
226
+
227
+ def recommend_topk(profile: Dict, k: int=3) -> pd.DataFrame:
228
+ query = profile_to_query(profile)
229
+
230
+ # Global search on full catalog
231
+ sims, idxs = EMB_INDEX.search(query, topn=min(max(k*50, k), len(CATALOG)))
232
+
233
+ # Filter down to business subset
234
+ df_f = filter_business(
235
+ CATALOG,
236
+ budget_min=profile.get("budget_min"),
237
+ budget_max=profile.get("budget_max"),
238
+ occasion=profile.get("occasion"),
239
+ age_range=profile.get("age_range","any"),
240
+ )
241
+ if df_f.empty:
242
+ df_f = CATALOG
243
+
244
+ order = np.argsort(-sims)
245
+ seen, picks = set(), []
246
+ for gi in idxs[order]:
247
+ gi = int(gi)
248
+ if gi not in df_f.index:
249
+ continue
250
+ nm = CATALOG.loc[gi, "name"]
251
+ if nm in seen:
252
+ continue
253
+ seen.add(nm)
254
+ picks.append(gi)
255
+ if len(picks) >= k:
256
+ break
257
+
258
+ if not picks:
259
+ res = df_f.head(k).copy()
260
+ res["similarity"] = np.nan
261
+ return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
262
+
263
+ gi_to_sim = {int(i): float(s) for i, s in zip(idxs, sims)}
264
+ res = CATALOG.loc[picks].copy()
265
+ res["similarity"] = [gi_to_sim.get(int(i), np.nan) for i in picks]
266
+ return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
267
+
268
+ # ---------------- Generative item + message ----------------
269
+ def generate_item(profile: Dict) -> Dict:
270
+ random.seed(42) # stable demo
271
+ interests = profile.get("interests", [])
272
+ occasion = profile.get("occasion","birthday")
273
+ budget = profile.get("budget_max", profile.get("budget_usd", 50)) or 50
274
+ age = profile.get("age_range","any")
275
+ core = (interests[0] if interests else "hobby").strip() or "hobby"
276
+ style = random.choice(["personalized","experience","bundle"])
277
+ if style == "personalized":
278
+ base_name = f"Custom {core} accessory with initials"
279
+ base_desc = f"Thoughtful personalized {core} accessory tailored to their taste."
280
+ elif style == "experience":
281
+ base_name = f"{core.title()} workshop voucher"
282
+ base_desc = f"A guided intro session to explore {core} in a fun, hands-on way."
283
+ else:
284
+ base_name = f"{core.title()} starter bundle"
285
+ base_desc = f"A curated set to kickstart their {core} passion."
286
+ if age == "kids":
287
+ base_desc += " Suitable for kids with safe, age-appropriate materials."
288
+ elif age == "teens":
289
+ base_desc += " Trendy pick that suits young enthusiasts."
290
+ elif age == "senior":
291
+ base_desc += " Comfortable and easy to use."
292
+ price = float(np.clip(float(budget), 10, 300))
293
+ return {
294
+ "name": f"{base_name} ({occasion})",
295
+ "short_desc": base_desc,
296
+ "price_usd": price,
297
+ "occasion_tags": occasion,
298
+ "persona_fit": ", ".join(interests) or "general",
299
+ "age_range": age,
300
+ "image_url": ""
301
+ }
302
+
303
+ def generate_message(profile: Dict) -> str:
304
+ name = profile.get("recipient_name","Friend")
305
+ occasion = profile.get("occasion","birthday")
306
+ tone = profile.get("tone","warm and friendly")
307
+ return (f"Dear {name},\n"
308
+ f"Happy {occasion}! Wishing you health, joy, and wonderful memories. "
309
+ f"May your goals come true. With {tone}.")
310
+
311
+ # ---------------- Rendering helpers ----------------
312
+ def md_escape(text: str) -> str:
313
+ return str(text).replace("|","\\|").replace("*","\\*").replace("_","\\_")
314
+
315
+ def render_top3_md(df: pd.DataFrame) -> str:
316
+ if df is None or df.empty:
317
+ return "_No results found._"
318
+ lines = ["**Top-3 recommendations:**\n"]
319
+ for _, r in df.iterrows():
320
+ name = md_escape(r.get("name",""))
321
+ desc = md_escape(r.get("short_desc",""))
322
+ price = r.get("price_usd")
323
+ sim = r.get("similarity")
324
+ age = r.get("age_range","any")
325
+ img = r.get("image_url","")
326
+ if img:
327
+ lines.append(f"![ ]({img})")
328
+ price_str = f"${price:.0f}" if pd.notna(price) else "N/A"
329
+ sim_str = f"{sim:.3f}" if pd.notna(sim) else "—"
330
+ lines.append(f"**{name}** \n{desc} \nPrice: **{price_str}** · Age: `{age}` · Similarity: `{sim_str}`\n")
331
+ return "\n".join(lines)
332
+
333
+ # ---------------- Gradio UI ----------------
334
+ EXAMPLES = [
335
+ [["tech","music"], "birthday", 20, 60, "Noa", "adult (18–64)", "warm and friendly"],
336
+ [["home","cooking","practical"], "housewarming", 25, 45, "Daniel", "adult (18–64)", "warm"],
337
+ [["games","photography"], "birthday", 30, 120, "Omer", "teen (13–17)", "fun"],
338
+ [["reading","design","aesthetic"], "thank_you", 15, 35, "Maya", "any", "friendly"],
339
+ ]
340
+
341
+ def ui_predict(interests_list: List[str], occasion: str, budget_min: float, budget_max: float,
342
+ recipient_name: str, age_label: str, tone: str):
343
+ try:
344
+ # budget sanity
345
+ if budget_min is None: budget_min = 20.0
346
+ if budget_max is None: budget_max = 60.0
347
+ if budget_min > budget_max:
348
+ budget_min, budget_max = budget_max, budget_min
349
+
350
+ age_range = AGE_OPTIONS.get(age_label, "any")
351
+ profile = {
352
+ "recipient_name": recipient_name or "Friend",
353
+ "interests": interests_list or [],
354
+ "occasion": occasion or "birthday",
355
+ "budget_min": float(budget_min),
356
+ "budget_max": float(budget_max),
357
+ "budget_usd": float(budget_max),
358
+ "age_range": age_range,
359
+ "tone": tone or "warm and friendly",
360
+ }
361
+
362
+ top3 = recommend_topk(profile, k=3)
363
+ gen = generate_item(profile)
364
+ msg = generate_message(profile)
365
+
366
+ top3_md = render_top3_md(top3)
367
+ gen_md = f"**{md_escape(gen['name'])}**\n\n{md_escape(gen['short_desc'])}\n\n~${gen['price_usd']:.0f}"
368
+ return top3_md, gen_md, msg
369
+ except Exception as e:
370
+ return f":warning: Error: {e}", "", ""
371
+
372
+ with gr.Blocks() as demo:
373
+ gr.Markdown(TITLE)
374
+
375
+ with gr.Row():
376
+ interests = gr.CheckboxGroup(
377
+ label="Interests (select a few)",
378
+ choices=INTEREST_OPTIONS,
379
+ value=["tech","music"],
380
+ interactive=True
381
+ )
382
+ with gr.Row():
383
+ occasion = gr.Dropdown(label="Occasion", choices=OCCASION_OPTIONS, value="birthday")
384
+ age = gr.Dropdown(label="Age group", choices=list(AGE_OPTIONS.keys()), value="adult (18–64)")
385
+
386
+ # Two sliders (compatible with older Gradio)
387
+ with gr.Row():
388
+ budget_min = gr.Slider(label="Min budget (USD)", minimum=5, maximum=500, step=1, value=20)
389
+ budget_max = gr.Slider(label="Max budget (USD)", minimum=5, maximum=500, step=1, value=60)
390
+
391
+ with gr.Row():
392
+ recipient_name = gr.Textbox(label="Recipient name", value="Noa")
393
+ tone = gr.Textbox(label="Message tone", value="warm and friendly")
394
+
395
+ go = gr.Button("Get GIfty 🎯")
396
+
397
+ out_top3 = gr.Markdown(label="Top-3 recommendations")
398
+ out_gen = gr.Markdown(label="Generated item")
399
+ out_msg = gr.Markdown(label="Personalized message")
400
+
401
+ gr.Examples(
402
+ EXAMPLES,
403
+ [interests, occasion, budget_min, budget_max, recipient_name, age, tone],
404
+ label="Quick examples",
405
+ )
406
+
407
+ go.click(
408
+ ui_predict,
409
+ [interests, occasion, budget_min, budget_max, recipient_name, age, tone],
410
+ [out_top3, out_gen, out_msg]
411
+ )
412
+
413
+ if __name__ == "__main__":
414
+ demo.launch()