Danielos100 commited on
Commit
152870d
Β·
verified Β·
1 Parent(s): ce5e1d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +208 -137
app.py CHANGED
@@ -1,13 +1,15 @@
1
  # app.py
2
- # 🎁 GIfty β€” Smart Gift Recommender (Embeddings + FAISS + LLM generator)
3
- # Dataset: ckandemir/amazon-products (Hugging Face)
4
  # UI: Gradio (English)
5
  #
6
- # Notes:
7
- # - Embeddings: sentence-transformers/all-MiniLM-L6-v2 + FAISS IndexFlatIP
8
- # - LLM generator: google/flan-t5-small (local transformers, no API keys)
9
- # - Budget uses two sliders (compatible with older Gradio).
10
- # - Images are rendered as right-side thumbnails per result.
 
 
11
 
12
  import os, re, json, random
13
  from typing import Dict, List, Tuple
@@ -18,12 +20,10 @@ import gradio as gr
18
  from datasets import load_dataset
19
  from sentence_transformers import SentenceTransformer
20
  import faiss
21
-
22
- # LLM (Flan-T5) for generation
23
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
24
 
25
- # ---------------- Config ----------------
26
- MAX_ROWS = int(os.getenv("MAX_ROWS", "6000")) # keep index build fast on CPU
27
  TITLE = "# 🎁 GIfty β€” Smart Gift Recommender\n*Top-3 similar picks + 1 generated idea + personalized message*"
28
 
29
  OCCASION_OPTIONS = [
@@ -31,6 +31,11 @@ OCCASION_OPTIONS = [
31
  "housewarming", "christmas", "hanukkah", "thank_you",
32
  ]
33
 
 
 
 
 
 
34
  AGE_OPTIONS = {
35
  "any": "any",
36
  "kid (3–12)": "kids",
@@ -41,6 +46,11 @@ AGE_OPTIONS = {
41
 
42
  GENDER_OPTIONS = ["any", "female", "male", "nonbinary"]
43
 
 
 
 
 
 
44
  INTEREST_OPTIONS = [
45
  "reading","writing","tech","travel","fitness","cooking","tea","coffee",
46
  "games","movies","plants","music","design","stationery","home","experience",
@@ -48,7 +58,7 @@ INTEREST_OPTIONS = [
48
  "photography","outdoors","pets","beauty","jewelry"
49
  ]
50
 
51
- # Query-expansion dictionary (helps matching catalog wording)
52
  SYNONYMS = {
53
  "music": ["audio", "headphones", "vinyl", "earbuds", "speaker"],
54
  "tech": ["electronics", "gadgets", "computer", "smart", "device"],
@@ -57,11 +67,11 @@ SYNONYMS = {
57
  "cooking": ["kitchen", "cookware", "chef", "bake"],
58
  "fitness": ["sports", "yoga", "run", "workout"],
59
  "photography": ["camera", "lens", "tripod"],
60
- "travel": ["luggage", "passport", "map", "travel"],
61
  "beauty": ["skincare", "makeup", "fragrance", "cosmetic"],
62
  "jewelry": ["ring", "necklace", "bracelet"],
63
- "coffee": ["espresso", "mug", "grinder", "coffee"],
64
- "tea": ["teapot", "infuser", "tea"],
65
  "plants": ["garden", "planter", "indoor"],
66
  "reading": ["book", "novel", "literature"],
67
  "writing": ["notebook", "pen", "planner"],
@@ -72,13 +82,11 @@ SYNONYMS = {
72
  "experience": ["voucher", "ticket", "workshop"],
73
  }
74
 
75
- # ---------------- Data loading & schema ----------------
76
  def _to_price_usd(x):
77
- s = str(x).strip().replace("$", "").replace(",", "")
78
- try:
79
- return float(s)
80
- except Exception:
81
- return np.nan
82
 
83
  def _infer_age_from_category(cat: str) -> str:
84
  s = (cat or "").lower()
@@ -120,12 +128,10 @@ def map_amazon_to_schema(df_raw: pd.DataFrame) -> pd.DataFrame:
120
  "persona_fit": get("category"),
121
  "image_url": get("image") if "image" in cols else "",
122
  })
123
- # clean
124
  out["name"] = out["name"].astype(str).str.strip().str.slice(0, 120)
125
  out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 500)
126
  out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
127
  out["persona_fit"] = out["persona_fit"].astype(str).str.lower()
128
- # infer occasion & age
129
  out["occasion_tags"] = out["tags"].map(_infer_occasion_tags)
130
  out["age_range"] = out["tags"].map(_infer_age_from_category).fillna("any")
131
  return out
@@ -146,7 +152,6 @@ def load_catalog() -> pd.DataFrame:
146
  ds = load_dataset("ckandemir/amazon-products", split="train")
147
  raw = ds.to_pandas()
148
  except Exception:
149
- # Fallback (keeps the app alive if internet is blocked)
150
  raw = pd.DataFrame({
151
  "Product Name": ["Wireless Earbuds", "Coffee Sampler", "Strategy Board Game"],
152
  "Description": [
@@ -166,25 +171,12 @@ def load_catalog() -> pd.DataFrame:
166
 
167
  CATALOG = load_catalog()
168
 
169
- # ---------------- Business filters ----------------
170
  def _contains_ci(series: pd.Series, needle: str) -> pd.Series:
171
  if not needle: return pd.Series(True, index=series.index)
172
  pat = re.escape(needle)
173
  return series.fillna("").str.contains(pat, case=False, regex=True)
174
 
175
- def gender_tokens(gender: str) -> List[str]:
176
- gender = (gender or "any").lower()
177
- if gender == "female": return ["women", "woman", "female", "her"]
178
- if gender == "male": return ["men", "man", "male", "him"]
179
- if gender == "nonbinary": return ["unisex", "gender neutral", "they"]
180
- return ["unisex"] # "any"
181
-
182
- def soft_gender_boost(row: pd.Series, gender: str) -> float:
183
- if not gender or gender == "any": return 0.0
184
- tokens = gender_tokens(gender)
185
- blob = f"{row.get('tags','')} {row.get('short_desc','')}".lower()
186
- return 0.08 if any(t in blob for t in tokens) else 0.0
187
-
188
  def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None,
189
  occasion: str=None, age_range: str="any") -> pd.DataFrame:
190
  m = pd.Series(True, index=df.index)
@@ -198,7 +190,7 @@ def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None,
198
  m &= (df["age_range"].fillna("any").isin([age_range, "any"]))
199
  return df[m]
200
 
201
- # ---------------- Embeddings + FAISS (MiniLM) ----------------
202
  class EmbeddingIndex:
203
  def __init__(self, docs: List[str], model_id: str):
204
  self.model_id = model_id
@@ -213,10 +205,24 @@ class EmbeddingIndex:
213
  sims, idxs = self.index.search(qv, topn)
214
  return sims[0], idxs[0]
215
 
216
- EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # fast & solid on CPU
217
  EMB_INDEX = EmbeddingIndex(CATALOG["doc"].tolist(), EMBED_MODEL_ID)
218
 
219
- # ---------------- Query building ----------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  def expand_with_synonyms(tokens: List[str]) -> List[str]:
221
  out = []
222
  for t in tokens:
@@ -227,15 +233,18 @@ def expand_with_synonyms(tokens: List[str]) -> List[str]:
227
  return out
228
 
229
  def profile_to_query(profile: Dict) -> str:
230
- # Weighted, doc-aligned query (interests + synonyms) + occasion + age + gender signal
231
  interests = [t.strip().lower() for t in profile.get("interests", []) if t.strip()]
232
  expanded = expand_with_synonyms(interests)
233
  expanded = expanded + expanded # weight x2
234
  occasion = (profile.get("occasion", "") or "").lower()
235
  age = profile.get("age_range", "any")
236
  gender = (profile.get("gender", "any") or "any").lower()
 
 
237
  parts = []
238
  if expanded: parts.append(", ".join(expanded))
 
239
  if occasion: parts.append(occasion)
240
  if age and age != "any": parts.append(age)
241
  if gender and gender != "any":
@@ -259,16 +268,25 @@ def recommend_topk(profile: Dict, k: int=3) -> pd.DataFrame:
259
  if df_f.empty:
260
  df_f = CATALOG
261
 
262
- # Gather candidates within the subset and apply a small gender boost
 
 
 
 
 
 
 
 
263
  cand = []
264
  for i, sim in zip(idxs, sims):
265
  i = int(i)
266
  if i in df_f.index:
267
- boost = soft_gender_boost(CATALOG.loc[i], profile.get("gender","any"))
 
268
  cand.append((i, float(sim) + boost))
269
  cand.sort(key=lambda x: -x[1])
270
 
271
- # Pick unique by name
272
  seen, picks = set(), []
273
  for gi, score in cand:
274
  nm = CATALOG.loc[gi, "name"]
@@ -288,7 +306,7 @@ def recommend_topk(profile: Dict, k: int=3) -> pd.DataFrame:
288
  res["similarity"] = [sim_map.get(int(gi), np.nan) for gi in sel]
289
  return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
290
 
291
- # ---------------- LLM generator (Flan-T5) ----------------
292
  LLM_ID = "google/flan-t5-small"
293
  try:
294
  _tok = AutoTokenizer.from_pretrained(LLM_ID)
@@ -296,7 +314,7 @@ try:
296
  LLM = pipeline("text2text-generation", model=_mdl, tokenizer=_tok)
297
  except Exception as e:
298
  LLM = None
299
- print("LLM load failed, will fallback to rule-based. Error:", e)
300
 
301
  def _run_llm(prompt: str, max_new_tokens=128) -> str:
302
  if LLM is None:
@@ -308,7 +326,6 @@ def _parse_json_maybe(s: str) -> dict:
308
  try:
309
  return json.loads(s)
310
  except Exception:
311
- # try to extract {...}
312
  m = re.search(r"\{.*\}", s, flags=re.S)
313
  if m:
314
  try:
@@ -318,33 +335,32 @@ def _parse_json_maybe(s: str) -> dict:
318
  return {}
319
 
320
  def llm_generate_item(profile: Dict) -> Dict:
321
- # Prompt to produce a single gift idea JSON
322
  prompt = f"""
323
  You are GIfty, a gift recommender. Create ONE gift idea as JSON with keys:
324
  name, short_desc, price_usd, occasion_tags, persona_fit.
325
  Constraints:
326
- - Fit the recipient profile.
327
- - price_usd must be a number within the budget range.
328
  - Keep text concise, friendly, and realistic.
329
 
330
- Recipient profile:
 
 
 
 
331
  interests = {profile.get('interests', [])}
332
  occasion = {profile.get('occasion','birthday')}
333
- age_group = {profile.get('age_range','any')}
334
- gender = {profile.get('gender','any')}
335
  budget_min = {profile.get('budget_min', 10)}
336
  budget_max = {profile.get('budget_max', 100)}
337
 
338
  Return ONLY JSON.
339
  """
340
- txt = _run_llm(prompt, max_new_tokens=160)
341
  data = _parse_json_maybe(txt)
342
  if not data:
343
- # fallback rule-based if LLM unavailable or malformed
344
- core = (profile.get("interests",[ "hobby" ])[0] or "hobby").strip()
345
- name = f"Custom {core} accessory with initials ({profile.get('occasion','birthday')})"
346
  return {
347
- "name": name,
348
  "short_desc": f"Thoughtful personalized {core} accessory tailored to their taste.",
349
  "price_usd": float(np.clip(profile.get("budget_max", 50) or 50, 10, 300)),
350
  "occasion_tags": profile.get("occasion","birthday"),
@@ -352,7 +368,6 @@ Return ONLY JSON.
352
  "age_range": profile.get("age_range","any"),
353
  "image_url": ""
354
  }
355
- # ensure numeric price and bounds
356
  try:
357
  p = float(data.get("price_usd", profile.get("budget_max", 50)))
358
  except Exception:
@@ -370,31 +385,30 @@ Return ONLY JSON.
370
 
371
  def llm_generate_message(profile: Dict) -> str:
372
  prompt = f"""
373
- Write a short, warm greeting message (2–3 sentences) in English for a gift card.
 
374
  Recipient name: {profile.get('recipient_name','Friend')}
 
375
  Occasion: {profile.get('occasion','birthday')}
376
  Interests: {', '.join(profile.get('interests', []))}
377
  Age group: {profile.get('age_range','any')}
378
  Gender: {profile.get('gender','any')}
379
- Tone: {profile.get('tone','warm and friendly')}
380
- Avoid emojis. Keep it sincere and concise.
381
  """
382
  txt = _run_llm(prompt, max_new_tokens=90)
383
  if not txt:
384
- # fallback
385
- return (f"Dear {profile.get('recipient_name','Friend')},\n"
386
- f"Happy {profile.get('occasion','birthday')}! Wishing you health, joy, and wonderful memories. "
387
  f"With {profile.get('tone','warm and friendly')}.")
388
  return txt.strip()
389
 
390
- # ---------------- Rendering helpers (HTML with right-side thumbnail) ----------------
391
  def md_escape(text: str) -> str:
392
  return str(text).replace("|","\\|").replace("*","\\*").replace("_","\\_")
393
 
394
  def render_top3_html(df: pd.DataFrame) -> str:
395
  if df is None or df.empty:
396
  return "<em>No results found.</em>"
397
- # Simple cards with image on the right
398
  rows = []
399
  for _, r in df.iterrows():
400
  name = md_escape(r.get("name",""))
@@ -407,7 +421,7 @@ def render_top3_html(df: pd.DataFrame) -> str:
407
  sim_str = f"{sim:.3f}" if pd.notna(sim) else "β€”"
408
  img_html = f'<img src="{img}" alt="" style="width:84px;height:84px;object-fit:cover;border-radius:10px;margin-left:12px;" />' if img else ""
409
  card = f"""
410
- <div style="display:flex;align-items:flex-start;justify-content:space-between;gap:10px;padding:10px;border:1px solid #eee;border-radius:12px;margin-bottom:8px;">
411
  <div style="flex:1;min-width:0;">
412
  <div style="font-weight:700;">{name}</div>
413
  <div style="font-size:0.95em;margin-top:4px;">{desc}</div>
@@ -421,87 +435,144 @@ def render_top3_html(df: pd.DataFrame) -> str:
421
  rows.append(card)
422
  return "\n".join(rows)
423
 
424
- # ---------------- Gradio UI ----------------
425
- EXAMPLES = [
426
- [["tech","music"], "birthday", 20, 60, "Noa", "adult (18–64)", "any", "warm and friendly"],
427
- [["home","cooking","practical"], "housewarming", 25, 45, "Daniel", "adult (18–64)", "male", "warm"],
428
- [["games","photography"], "birthday", 30, 120, "Omer", "teen (13–17)", "male", "fun"],
429
- [["reading","design","aesthetic"], "thank_you", 15, 35, "Maya", "any", "female", "friendly"],
430
- ]
431
 
432
- def ui_predict(interests_list: List[str], occasion: str, budget_min: float, budget_max: float,
433
- recipient_name: str, age_label: str, gender: str, tone: str):
434
- try:
435
- # sanity
436
- if budget_min is None: budget_min = 20.0
437
- if budget_max is None: budget_max = 60.0
438
- if budget_min > budget_max:
439
- budget_min, budget_max = budget_max, budget_min
440
 
441
- age_range = AGE_OPTIONS.get(age_label, "any")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  profile = {
443
- "recipient_name": recipient_name or "Friend",
 
444
  "interests": interests_list or [],
445
- "occasion": occasion or "birthday",
446
- "budget_min": float(budget_min),
447
- "budget_max": float(budget_max),
448
- "budget_usd": float(budget_max),
449
  "age_range": age_range,
450
- "gender": gender or "any",
451
- "tone": tone or "warm and friendly",
452
  }
453
 
 
454
  top3 = recommend_topk(profile, k=3)
455
  gen = llm_generate_item(profile)
456
  msg = llm_generate_message(profile)
457
 
458
- top3_html = render_top3_html(top3)
459
- gen_md = f"**{md_escape(gen['name'])}**\n\n{md_escape(gen['short_desc'])}\n\n~${gen['price_usd']:.0f}"
460
- return top3_html, gen_md, msg
461
- except Exception as e:
462
- return f"<div style='color:#b00;'>⚠️ Error: {e}</div>", "", ""
463
 
464
- with gr.Blocks() as demo:
465
- gr.Markdown(TITLE)
466
-
467
- with gr.Row():
468
- interests = gr.CheckboxGroup(
469
- label="Interests (select a few)",
470
- choices=INTEREST_OPTIONS,
471
- value=["tech","music"],
472
- interactive=True
 
 
 
473
  )
474
- with gr.Row():
475
- occasion = gr.Dropdown(label="Occasion", choices=OCCASION_OPTIONS, value="birthday")
476
- age = gr.Dropdown(label="Age group", choices=list(AGE_OPTIONS.keys()), value="adult (18–64)")
477
- gender = gr.Dropdown(label="Recipient gender", choices=GENDER_OPTIONS, value="any")
478
-
479
- # Two budget sliders (compatible with older Gradio)
480
- with gr.Row():
481
- budget_min = gr.Slider(label="Min budget (USD)", minimum=5, maximum=500, step=1, value=20)
482
- budget_max = gr.Slider(label="Max budget (USD)", minimum=5, maximum=500, step=1, value=60)
483
-
484
- with gr.Row():
485
- recipient_name = gr.Textbox(label="Recipient name", value="Noa")
486
- tone = gr.Textbox(label="Message tone", value="warm and friendly")
487
-
488
- go = gr.Button("Get GIfty 🎯")
489
-
490
- out_top3 = gr.HTML(label="Top-3 recommendations") # HTML to support right-side thumbnails
491
- out_gen = gr.Markdown(label="Generated item")
492
- out_msg = gr.Markdown(label="Personalized message")
493
-
494
- gr.Examples(
495
- EXAMPLES,
496
- [interests, occasion, budget_min, budget_max, recipient_name, age, gender, tone],
497
- label="Quick examples",
498
- )
499
-
500
- go.click(
501
- ui_predict,
502
- [interests, occasion, budget_min, budget_max, recipient_name, age, gender, tone],
503
- [out_top3, out_gen, out_msg]
504
- )
505
 
506
  if __name__ == "__main__":
507
  demo.launch()
 
1
  # app.py
2
+ # 🎁 GIfty β€” Smart Gift Recommender (Embeddings + FAISS + LLM)
3
+ # Dataset: ckandemir/amazon-products
4
  # UI: Gradio (English)
5
  #
6
+ # Features:
7
+ # - Sentence-Transformers (MiniLM) + FAISS (cosine via normalized embeddings)
8
+ # - LLM generator (Flan-T5-small) for the 4th gift + greeting
9
+ # - Relationship & Tone inputs that affect both retrieval weighting and LLM outputs
10
+ # - Image thumbnails on the right
11
+ # - Quick Examples placed visually at the top via CSS order
12
+ # - Budget range: RangeSlider if available, else two Sliders as fallback
13
 
14
  import os, re, json, random
15
  from typing import Dict, List, Tuple
 
20
  from datasets import load_dataset
21
  from sentence_transformers import SentenceTransformer
22
  import faiss
 
 
23
  from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
24
 
25
+ # --------------------- Config ---------------------
26
+ MAX_ROWS = int(os.getenv("MAX_ROWS", "6000"))
27
  TITLE = "# 🎁 GIfty β€” Smart Gift Recommender\n*Top-3 similar picks + 1 generated idea + personalized message*"
28
 
29
  OCCASION_OPTIONS = [
 
31
  "housewarming", "christmas", "hanukkah", "thank_you",
32
  ]
33
 
34
+ RELATIONSHIP_OPTIONS = [
35
+ "friend", "close friend", "partner/spouse", "family", "parent",
36
+ "sibling", "child", "colleague", "manager", "client", "teacher"
37
+ ]
38
+
39
  AGE_OPTIONS = {
40
  "any": "any",
41
  "kid (3–12)": "kids",
 
46
 
47
  GENDER_OPTIONS = ["any", "female", "male", "nonbinary"]
48
 
49
+ TONE_OPTIONS = [
50
+ "warm and friendly", "heartfelt and emotional", "playful and fun",
51
+ "formal and polite", "professional", "minimalist and concise"
52
+ ]
53
+
54
  INTEREST_OPTIONS = [
55
  "reading","writing","tech","travel","fitness","cooking","tea","coffee",
56
  "games","movies","plants","music","design","stationery","home","experience",
 
58
  "photography","outdoors","pets","beauty","jewelry"
59
  ]
60
 
61
+ # Query expansion (helps match catalog wording)
62
  SYNONYMS = {
63
  "music": ["audio", "headphones", "vinyl", "earbuds", "speaker"],
64
  "tech": ["electronics", "gadgets", "computer", "smart", "device"],
 
67
  "cooking": ["kitchen", "cookware", "chef", "bake"],
68
  "fitness": ["sports", "yoga", "run", "workout"],
69
  "photography": ["camera", "lens", "tripod"],
70
+ "travel": ["luggage", "passport", "map"],
71
  "beauty": ["skincare", "makeup", "fragrance", "cosmetic"],
72
  "jewelry": ["ring", "necklace", "bracelet"],
73
+ "coffee": ["espresso", "mug", "grinder"],
74
+ "tea": ["teapot", "infuser"],
75
  "plants": ["garden", "planter", "indoor"],
76
  "reading": ["book", "novel", "literature"],
77
  "writing": ["notebook", "pen", "planner"],
 
82
  "experience": ["voucher", "ticket", "workshop"],
83
  }
84
 
85
+ # --------------------- Data loading & schema ---------------------
86
  def _to_price_usd(x):
87
+ s = str(x).strip().replace("$","").replace(",","")
88
+ try: return float(s)
89
+ except: return np.nan
 
 
90
 
91
  def _infer_age_from_category(cat: str) -> str:
92
  s = (cat or "").lower()
 
128
  "persona_fit": get("category"),
129
  "image_url": get("image") if "image" in cols else "",
130
  })
 
131
  out["name"] = out["name"].astype(str).str.strip().str.slice(0, 120)
132
  out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 500)
133
  out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
134
  out["persona_fit"] = out["persona_fit"].astype(str).str.lower()
 
135
  out["occasion_tags"] = out["tags"].map(_infer_occasion_tags)
136
  out["age_range"] = out["tags"].map(_infer_age_from_category).fillna("any")
137
  return out
 
152
  ds = load_dataset("ckandemir/amazon-products", split="train")
153
  raw = ds.to_pandas()
154
  except Exception:
 
155
  raw = pd.DataFrame({
156
  "Product Name": ["Wireless Earbuds", "Coffee Sampler", "Strategy Board Game"],
157
  "Description": [
 
171
 
172
  CATALOG = load_catalog()
173
 
174
+ # --------------------- Business filters ---------------------
175
  def _contains_ci(series: pd.Series, needle: str) -> pd.Series:
176
  if not needle: return pd.Series(True, index=series.index)
177
  pat = re.escape(needle)
178
  return series.fillna("").str.contains(pat, case=False, regex=True)
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None,
181
  occasion: str=None, age_range: str="any") -> pd.DataFrame:
182
  m = pd.Series(True, index=df.index)
 
190
  m &= (df["age_range"].fillna("any").isin([age_range, "any"]))
191
  return df[m]
192
 
193
+ # --------------------- Embeddings + FAISS ---------------------
194
  class EmbeddingIndex:
195
  def __init__(self, docs: List[str], model_id: str):
196
  self.model_id = model_id
 
205
  sims, idxs = self.index.search(qv, topn)
206
  return sims[0], idxs[0]
207
 
208
+ EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # best balance CPU speed/quality
209
  EMB_INDEX = EmbeddingIndex(CATALOG["doc"].tolist(), EMBED_MODEL_ID)
210
 
211
+ # --------------------- Query building ---------------------
212
+ REL_TO_TOKENS = {
213
+ "partner/spouse": ["romantic", "couple"],
214
+ "close friend": ["personal", "fun"],
215
+ "friend": ["friendly"],
216
+ "family": ["family"],
217
+ "parent": ["parent"],
218
+ "sibling": ["sibling"],
219
+ "child": ["kids", "play"],
220
+ "colleague": ["office", "work"],
221
+ "manager": ["professional"],
222
+ "client": ["professional", "thank_you"],
223
+ "teacher": ["teacher", "thank_you"]
224
+ }
225
+
226
  def expand_with_synonyms(tokens: List[str]) -> List[str]:
227
  out = []
228
  for t in tokens:
 
233
  return out
234
 
235
  def profile_to_query(profile: Dict) -> str:
236
+ """Weighted, doc-aligned query (interests+synonyms) + occasion + age + gender + relationship."""
237
  interests = [t.strip().lower() for t in profile.get("interests", []) if t.strip()]
238
  expanded = expand_with_synonyms(interests)
239
  expanded = expanded + expanded # weight x2
240
  occasion = (profile.get("occasion", "") or "").lower()
241
  age = profile.get("age_range", "any")
242
  gender = (profile.get("gender", "any") or "any").lower()
243
+ rel = (profile.get("relationship","friend") or "friend").lower()
244
+ rel_tokens = REL_TO_TOKENS.get(rel, [])
245
  parts = []
246
  if expanded: parts.append(", ".join(expanded))
247
+ if rel_tokens: parts.append(", ".join(rel_tokens))
248
  if occasion: parts.append(occasion)
249
  if age and age != "any": parts.append(age)
250
  if gender and gender != "any":
 
268
  if df_f.empty:
269
  df_f = CATALOG
270
 
271
+ # Small gender-aware re-ranking
272
+ def gender_tokens(g: str) -> List[str]:
273
+ g = (g or "any").lower()
274
+ if g == "female": return ["women", "woman", "female", "her"]
275
+ if g == "male": return ["men", "man", "male", "him"]
276
+ if g == "nonbinary": return ["unisex", "gender neutral", "they"]
277
+ return ["unisex"]
278
+
279
+ gts = gender_tokens(profile.get("gender","any"))
280
  cand = []
281
  for i, sim in zip(idxs, sims):
282
  i = int(i)
283
  if i in df_f.index:
284
+ blob = f"{CATALOG.loc[i, 'tags']} {CATALOG.loc[i, 'short_desc']}".lower()
285
+ boost = 0.08 if any(t in blob for t in gts) else 0.0
286
  cand.append((i, float(sim) + boost))
287
  cand.sort(key=lambda x: -x[1])
288
 
289
+ # Unique by name
290
  seen, picks = set(), []
291
  for gi, score in cand:
292
  nm = CATALOG.loc[gi, "name"]
 
306
  res["similarity"] = [sim_map.get(int(gi), np.nan) for gi in sel]
307
  return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
308
 
309
+ # --------------------- LLM generator (Flan-T5-small) ---------------------
310
  LLM_ID = "google/flan-t5-small"
311
  try:
312
  _tok = AutoTokenizer.from_pretrained(LLM_ID)
 
314
  LLM = pipeline("text2text-generation", model=_mdl, tokenizer=_tok)
315
  except Exception as e:
316
  LLM = None
317
+ print("LLM load failed, fallback to rule-based. Error:", e)
318
 
319
  def _run_llm(prompt: str, max_new_tokens=128) -> str:
320
  if LLM is None:
 
326
  try:
327
  return json.loads(s)
328
  except Exception:
 
329
  m = re.search(r"\{.*\}", s, flags=re.S)
330
  if m:
331
  try:
 
335
  return {}
336
 
337
  def llm_generate_item(profile: Dict) -> Dict:
 
338
  prompt = f"""
339
  You are GIfty, a gift recommender. Create ONE gift idea as JSON with keys:
340
  name, short_desc, price_usd, occasion_tags, persona_fit.
341
  Constraints:
342
+ - Fit the recipient profile and relationship.
343
+ - price_usd must be numeric and within the given budget range.
344
  - Keep text concise, friendly, and realistic.
345
 
346
+ Recipient:
347
+ name = {profile.get('recipient_name','Friend')}
348
+ relationship = {profile.get('relationship','friend')}
349
+ gender = {profile.get('gender','any')}
350
+ age_group = {profile.get('age_range','any')}
351
  interests = {profile.get('interests', [])}
352
  occasion = {profile.get('occasion','birthday')}
 
 
353
  budget_min = {profile.get('budget_min', 10)}
354
  budget_max = {profile.get('budget_max', 100)}
355
 
356
  Return ONLY JSON.
357
  """
358
+ txt = _run_llm(prompt, max_new_tokens=180)
359
  data = _parse_json_maybe(txt)
360
  if not data:
361
+ core = (profile.get("interests",["hobby"])[0] or "hobby").strip()
 
 
362
  return {
363
+ "name": f"Custom {core} accessory with initials ({profile.get('occasion','birthday')})",
364
  "short_desc": f"Thoughtful personalized {core} accessory tailored to their taste.",
365
  "price_usd": float(np.clip(profile.get("budget_max", 50) or 50, 10, 300)),
366
  "occasion_tags": profile.get("occasion","birthday"),
 
368
  "age_range": profile.get("age_range","any"),
369
  "image_url": ""
370
  }
 
371
  try:
372
  p = float(data.get("price_usd", profile.get("budget_max", 50)))
373
  except Exception:
 
385
 
386
  def llm_generate_message(profile: Dict) -> str:
387
  prompt = f"""
388
+ Write a {profile.get('tone','warm and friendly')} greeting in English (2–3 short sentences) for a gift card.
389
+ Use the relationship to set the level of warmth/formality.
390
  Recipient name: {profile.get('recipient_name','Friend')}
391
+ Relationship: {profile.get('relationship','friend')}
392
  Occasion: {profile.get('occasion','birthday')}
393
  Interests: {', '.join(profile.get('interests', []))}
394
  Age group: {profile.get('age_range','any')}
395
  Gender: {profile.get('gender','any')}
396
+ Avoid emojis.
 
397
  """
398
  txt = _run_llm(prompt, max_new_tokens=90)
399
  if not txt:
400
+ return (f"Dear {profile.get('recipient_name','Friend')}, "
401
+ f"happy {profile.get('occasion','birthday')}! Wishing you health, joy, and wonderful memories. "
 
402
  f"With {profile.get('tone','warm and friendly')}.")
403
  return txt.strip()
404
 
405
+ # --------------------- Rendering (HTML cards with right thumbnail) ---------------------
406
  def md_escape(text: str) -> str:
407
  return str(text).replace("|","\\|").replace("*","\\*").replace("_","\\_")
408
 
409
  def render_top3_html(df: pd.DataFrame) -> str:
410
  if df is None or df.empty:
411
  return "<em>No results found.</em>"
 
412
  rows = []
413
  for _, r in df.iterrows():
414
  name = md_escape(r.get("name",""))
 
421
  sim_str = f"{sim:.3f}" if pd.notna(sim) else "β€”"
422
  img_html = f'<img src="{img}" alt="" style="width:84px;height:84px;object-fit:cover;border-radius:10px;margin-left:12px;" />' if img else ""
423
  card = f"""
424
+ <div style="display:flex;align-items:flex-start;justify-content:space-between;gap:10px;padding:10px;border:1px solid #eee;border-radius:12px;margin-bottom:8px;background:#fff;">
425
  <div style="flex:1;min-width:0;">
426
  <div style="font-weight:700;">{name}</div>
427
  <div style="font-size:0.95em;margin-top:4px;">{desc}</div>
 
435
  rows.append(card)
436
  return "\n".join(rows)
437
 
438
+ # --------------------- Gradio UI ---------------------
439
+ CSS = """
440
+ #examples { order: 1; }
441
+ #form { order: 2; }
442
+ """
 
 
443
 
444
+ with gr.Blocks(css=CSS) as demo:
445
+ gr.Markdown(TITLE)
 
 
 
 
 
 
446
 
447
+ # We'll build the form first (so we can reference components), but show Examples on top via CSS order.
448
+ with gr.Column(elem_id="examples"):
449
+ gr.Markdown("### Quick examples")
450
+ # Placeholders; we will link them after creating components.
451
+ # (We will create Examples at the end once components exist.)
452
+
453
+ with gr.Column(elem_id="form"):
454
+ with gr.Row():
455
+ recipient_name = gr.Textbox(label="Recipient name", value="Noa")
456
+ relationship = gr.Dropdown(label="Relationship", choices=RELATIONSHIP_OPTIONS, value="friend")
457
+
458
+ with gr.Row():
459
+ interests = gr.CheckboxGroup(
460
+ label="Interests (select a few)",
461
+ choices=INTEREST_OPTIONS,
462
+ value=["tech","music"],
463
+ interactive=True
464
+ )
465
+
466
+ with gr.Row():
467
+ occasion = gr.Dropdown(label="Occasion", choices=OCCASION_OPTIONS, value="birthday")
468
+ age = gr.Dropdown(label="Age group", choices=list(AGE_OPTIONS.keys()), value="adult (18–64)")
469
+ gender = gr.Dropdown(label="Recipient gender", choices=GENDER_OPTIONS, value="any")
470
+
471
+ # Budget: RangeSlider if available, else two sliders fallback
472
+ RangeSlider = getattr(gr, "RangeSlider", None)
473
+ if RangeSlider is not None:
474
+ budget_range = RangeSlider(label="Budget range (USD)", minimum=5, maximum=500, step=1, value=[20, 60])
475
+ budget_min, budget_max = None, None # placeholders for signature compatibility
476
+ else:
477
+ with gr.Row():
478
+ budget_min = gr.Slider(label="Min budget (USD)", minimum=5, maximum=500, step=1, value=20)
479
+ budget_max = gr.Slider(label="Max budget (USD)", minimum=5, maximum=500, step=1, value=60)
480
+ budget_range = gr.State(value=None)
481
+
482
+ tone = gr.Dropdown(label="Message tone", choices=TONE_OPTIONS, value="warm and friendly")
483
+
484
+ go = gr.Button("Get GIfty 🎯")
485
+
486
+ out_top3 = gr.HTML(label="Top-3 recommendations")
487
+ out_gen = gr.Markdown(label="Generated item")
488
+ out_msg = gr.Markdown(label="Personalized message")
489
+
490
+ # Now that all inputs exist, render Examples at the top container:
491
+ EXAMPLES = [
492
+ # interests, occasion, (budget), (or min,max), name, relationship, age, gender, tone
493
+ [["tech","music"], "birthday", [20, 60] if RangeSlider else None, 20 if budget_min else None, 60 if budget_max else None, "Noa", "friend", "adult (18–64)", "any", "warm and friendly"],
494
+ [["home","cooking","practical"], "housewarming", [25, 45] if RangeSlider else None, 25 if budget_min else None, 45 if budget_max else None, "Daniel", "colleague", "adult (18–64)", "male", "professional"],
495
+ [["games","photography"], "birthday", [30, 120] if RangeSlider else None, 30 if budget_min else None, 120 if budget_max else None, "Omer", "close friend", "teen (13–17)", "male", "playful and fun"],
496
+ [["reading","design","aesthetic"], "thank_you", [15, 35] if RangeSlider else None, 15 if budget_min else None, 35 if budget_max else None, "Maya", "partner/spouse", "any", "female", "heartfelt and emotional"],
497
+ ]
498
+
499
+ # Build the list of components according to the active budget control
500
+ if RangeSlider:
501
+ example_inputs = [interests, occasion, budget_range, recipient_name, relationship, age, gender, tone]
502
+ else:
503
+ example_inputs = [interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone]
504
+
505
+ # Insert the Examples widget into the top column now
506
+ with gr.Column(elem_id="examples"):
507
+ gr.Examples(EXAMPLES, inputs=example_inputs)
508
+
509
+ # --------- Predict function wiring ----------
510
+ def ui_predict(
511
+ interests_list: List[str], occasion_val: str,
512
+ budget_rng_or_min, # either [min,max] or min
513
+ maybe_max_or_name, # when RangeSlider -> recipient_name; else -> budget_max
514
+ maybe_name_or_rel, # when RangeSlider -> relationship; else -> recipient_name
515
+ rel_or_age, # when RangeSlider -> age; else -> relationship
516
+ age_or_gender, # when RangeSlider -> gender; else -> age
517
+ gender_or_tone, # when RangeSlider -> tone; else -> gender
518
+ tone_maybe=None
519
+ ):
520
+ # Disambiguate inputs based on whether we used RangeSlider or not
521
+ use_range = isinstance(budget_rng_or_min, (list, tuple))
522
+ if use_range:
523
+ budget_min_val = float(budget_rng_or_min[0])
524
+ budget_max_val = float(budget_rng_or_min[1])
525
+ recipient_name_val = str(maybe_max_or_name or "Friend")
526
+ relationship_val = str(maybe_name_or_rel or "friend")
527
+ age_label_val = str(rel_or_age or "any")
528
+ gender_val = str(age_or_gender or "any")
529
+ tone_val = str(gender_or_tone or "warm and friendly")
530
+ else:
531
+ budget_min_val = float(budget_rng_or_min if budget_rng_or_min is not None else 20)
532
+ budget_max_val = float(maybe_max_or_name if maybe_max_or_name is not None else 60)
533
+ recipient_name_val = str(maybe_name_or_rel or "Friend")
534
+ relationship_val = str(rel_or_age or "friend")
535
+ age_label_val = str(age_or_gender or "any")
536
+ gender_val = str(gender_or_tone or "any")
537
+ tone_val = str(tone_maybe or "warm and friendly")
538
+
539
+ if budget_min_val > budget_max_val:
540
+ budget_min_val, budget_max_val = budget_max_val, budget_min_val
541
+
542
+ age_range = AGE_OPTIONS.get(age_label_val, "any")
543
  profile = {
544
+ "recipient_name": recipient_name_val or "Friend",
545
+ "relationship": relationship_val or "friend",
546
  "interests": interests_list or [],
547
+ "occasion": occasion_val or "birthday",
548
+ "budget_min": budget_min_val,
549
+ "budget_max": budget_max_val,
550
+ "budget_usd": budget_max_val,
551
  "age_range": age_range,
552
+ "gender": gender_val or "any",
553
+ "tone": tone_val or "warm and friendly",
554
  }
555
 
556
+ # Retrieval + generation
557
  top3 = recommend_topk(profile, k=3)
558
  gen = llm_generate_item(profile)
559
  msg = llm_generate_message(profile)
560
 
561
+ return render_top3_html(top3), f"**{md_escape(gen['name'])}**\n\n{md_escape(gen['short_desc'])}\n\n~${gen['price_usd']:.0f}", msg
 
 
 
 
562
 
563
+ # Wire the button
564
+ if RangeSlider:
565
+ go.click(
566
+ ui_predict,
567
+ [interests, occasion, budget_range, recipient_name, relationship, age, gender, tone],
568
+ [out_top3, out_gen, out_msg]
569
+ )
570
+ else:
571
+ go.click(
572
+ ui_predict,
573
+ [interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone],
574
+ [out_top3, out_gen, out_msg]
575
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
576
 
577
  if __name__ == "__main__":
578
  demo.launch()