Danielos100 commited on
Commit
78ad9fc
ยท
verified ยท
1 Parent(s): 228df34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +897 -611
app.py CHANGED
@@ -1,14 +1,13 @@
1
- # app.py โ€” Gifty (revised)
2
- # ๐ŸŽ GIfty โ€” Smart Gift Recommender
3
- # Data: ckandemir/amazon-products
4
- # Retrieval: MiniLM-L12-v2 embeddings + FAISS (cosine), with simple on-disk cache
5
- # DIY Generation: small instruct LMs via HF pipeline (default: flan-t5-small) with JSON validate+repair (no padding)
6
- # Greeting: short LLM completion
7
- # Image: SD-Turbo (optional)
8
- # UI: Gradio; Quick Examples; Budget RangeSlider; DIY JSON + readable card
9
-
10
- import os, re, json, random, hashlib, pathlib
11
- from typing import Dict, List, Tuple
12
 
13
  import numpy as np
14
  import pandas as pd
@@ -16,31 +15,39 @@ import gradio as gr
16
  from datasets import load_dataset
17
 
18
  from sentence_transformers import SentenceTransformer
19
- import faiss
20
-
21
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, pipeline
22
-
23
  import torch
24
- from diffusers import AutoPipelineForText2Image
 
25
 
26
  # --------------------- Config ---------------------
27
- MAX_ROWS = int(os.getenv("MAX_ROWS", "8000"))
28
- TITLE = "# ๐ŸŽ GIfty โ€” Smart Gift Recommender\n*Top-3 catalog picks + 1 DIY gift (JSON) + personalized message*"
29
 
30
- # Retrieval model (embedding)
 
 
 
31
  EMBED_MODEL_ID = os.getenv("EMBED_MODEL_ID", "sentence-transformers/all-MiniLM-L12-v2")
32
- EMBED_CACHE_DIR = os.getenv("EMBED_CACHE_DIR", "./.gifty_cache")
33
- pathlib.Path(EMBED_CACHE_DIR).mkdir(parents=True, exist_ok=True)
34
 
35
- # DIY generation model (text)
36
- GEN_MODEL_ID = os.getenv("GEN_MODEL_ID", "google/flan-t5-small")
37
- OUTPUT_LANG = os.getenv("OUTPUT_LANG", "en") # "en" or "he"
38
- MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "360"))
39
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "260"))
40
- DIY_MAX_ATTEMPTS = int(os.getenv("DIY_MAX_ATTEMPTS", "4"))
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Image gen toggle
43
- ENABLE_IMAGE = os.getenv("ENABLE_IMAGE", "1") == "1"
44
 
45
  # ===== UI options =====
46
  INTEREST_OPTIONS = [
@@ -48,51 +55,24 @@ INTEREST_OPTIONS = [
48
  "Gaming","Photography","Hiking","Movies","Crafts","Pets","Wellness","Collecting","Food",
49
  "Home decor","Science"
50
  ]
51
-
52
  OCCASION_UI = [
53
  "Birthday","Wedding / Engagement","Anniversary","Graduation","New baby","Housewarming",
54
  "Retirement","Holidays","Valentineโ€™s Day","Promotion / New job","Get well soon"
55
  ]
56
- # Canonical tokens used in filtering/query
57
  OCCASION_CANON = {
58
- "Birthday":"birthday",
59
- "Wedding / Engagement":"wedding",
60
- "Anniversary":"anniversary",
61
- "Graduation":"graduation",
62
- "New baby":"new_baby",
63
- "Housewarming":"housewarming",
64
- "Retirement":"retirement",
65
- "Holidays":"holidays",
66
- "Valentineโ€™s Day":"valentines",
67
- "Promotion / New job":"promotion",
68
- "Get well soon":"get_well"
69
  }
70
-
71
  RECIPIENT_RELATIONSHIPS = [
72
- "Family - Parent",
73
- "Family - Sibling",
74
- "Family - Child",
75
- "Family - Other relative",
76
- "Friend",
77
- "Colleague",
78
- "Boss",
79
- "Romantic partner",
80
- "Teacher / Mentor",
81
- "Neighbor",
82
- "Client / Business partner",
83
  ]
84
-
85
  MESSAGE_TONES = [
86
  "Formal","Casual","Funny","Heartfelt","Inspirational","Playful","Romantic","Appreciative","Encouraging",
87
  ]
88
-
89
- AGE_OPTIONS = {
90
- "any":"any",
91
- "kid (3โ€“12)":"kids",
92
- "teen (13โ€“17)":"teens",
93
- "adult (18โ€“64)":"adult",
94
- "senior (65+)":"senior",
95
- }
96
  GENDER_OPTIONS = ["any","female","male","nonbinary"]
97
 
98
  # Query expansion by interest
@@ -118,160 +98,190 @@ SYNONYMS = {
118
  "home decor":["home","decor","wall art","candle"],
119
  "science":["lab","experiment","STEM","microscope"],
120
  }
121
-
122
- # Relationship tokens (soft guidance to retrieval)
123
  REL_TO_TOKENS = {
124
- "Family - Parent": ["parent", "family"],
125
- "Family - Sibling": ["sibling", "family"],
126
- "Family - Child": ["kids", "play", "family"],
127
- "Family - Other relative": ["family", "relative"],
128
  "Friend": ["friendly"],
129
- "Colleague": ["office", "work", "professional"],
130
- "Boss": ["executive", "professional", "premium"],
131
- "Romantic partner": ["romantic", "couple"],
132
- "Teacher / Mentor": ["teacher", "mentor", "thank_you"],
133
- "Neighbor": ["neighbor", "housewarming"],
134
- "Client / Business partner": ["professional", "thank_you", "premium"],
135
  }
136
 
137
  # --------------------- Data loading & schema ---------------------
138
- def _to_price_usd(x):
139
- s = str(x).strip().replace("$","").replace(",","")
140
- try: return float(s)
141
- except: return np.nan
142
-
143
- def _infer_age_from_category(cat: str) -> str:
144
- s = (cat or "").lower()
145
- if any(k in s for k in ["baby","toddler","infant"]): return "kids"
146
- if "toys & games" in s or "board games" in s or "toy" in s: return "kids"
147
- if any(k in s for k in ["teen","young adult","ya"]): return "teens"
148
- return "any"
149
-
150
- def _infer_occasion_tags(cat: str) -> str:
151
- s = (cat or "").lower()
152
- tags = set(["birthday"]) # default
153
- if any(k in s for k in ["home & kitchen","furniture","home dรฉcor","home decor","garden","appliance","cookware","kitchen"]):
154
- tags.update(["housewarming"])
155
- if any(k in s for k in ["beauty","jewelry","watch","fragrance","cosmetic","makeup","skincare"]):
156
- tags.update(["valentines","anniversary"])
157
- if any(k in s for k in ["toys","board game","puzzle","lego","kids"]):
158
- tags.update(["holidays"])
159
- if any(k in s for k in ["office","stationery","notebook","pen","planner"]):
160
- tags.update(["graduation","promotion"])
161
- if any(k in s for k in ["electronics","camera","audio","headphones","gaming","computer"]):
162
- tags.update(["holidays"])
163
- if any(k in s for k in ["book","novel","literature"]):
164
- tags.update(["graduation"])
165
- if any(k in s for k in ["baby","maternity","newborn","stroller"]):
166
- tags.update(["new_baby"])
167
- if any(k in s for k in ["wedding","engagement","bridal"]):
168
- tags.update(["wedding"])
169
- if any(k in s for k in ["retirement","senior gifts"]):
170
- tags.update(["retirement"])
171
- if any(k in s for k in ["health","wellness","get well","recovery"]):
172
- tags.update(["get_well"])
173
- return ",".join(sorted(tags))
174
 
175
  def map_amazon_to_schema(df_raw: pd.DataFrame) -> pd.DataFrame:
176
- cols = {c.lower().strip(): c for c in df_raw.columns}
177
- get = lambda key: df_raw.get(cols.get(key, ""), "")
 
 
 
 
 
 
178
  out = pd.DataFrame({
179
- "name": get("product name"),
180
- "short_desc": get("description"),
181
- "tags": get("category"),
182
- "price_usd": get("selling price").map(_to_price_usd) if "selling price" in cols else np.nan,
183
- "age_range": "",
184
- "gender_tags": "any",
185
- "occasion_tags": "",
186
- "persona_fit": get("category"),
187
- "image_url": get("image") if "image" in cols else "",
188
  })
189
- out["name"] = out["name"].astype(str).str.strip().str.slice(0, 120)
190
- out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 500)
191
  out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
192
- out["persona_fit"] = out["persona_fit"].astype(str).str.lower()
193
- out["occasion_tags"] = out["tags"].map(_infer_occasion_tags)
194
- out["age_range"] = out["tags"].map(_infer_age_from_category).fillna("any")
195
  return out
196
 
197
- def build_doc(row: pd.Series) -> str:
198
- return " | ".join([
199
- str(row.get("name","")),
200
- str(row.get("short_desc","")),
201
- str(row.get("tags","")),
202
- str(row.get("persona_fit","")),
203
- str(row.get("occasion_tags","")),
204
- str(row.get("age_range","")),
205
- ])
206
 
207
  def load_catalog() -> pd.DataFrame:
208
- try:
209
- ds = load_dataset("ckandemir/amazon-products", split="train")
210
- raw = ds.to_pandas()
211
- except Exception:
212
- raw = pd.DataFrame({
213
- "Product Name": ["Wireless Earbuds","Coffee Sampler","Strategy Board Game"],
214
- "Description": [
215
- "Compact earbuds with noise isolation and long battery life.",
216
- "Four single-origin roasts from small roasters.",
217
- "Modern eurogame for 2โ€“4 players, 45โ€“60 minutes."
218
- ],
219
- "Category": ["Electronics | Audio","Grocery | Coffee","Toys & Games | Board Games"],
220
- "Selling Price": ["$59.00","$34.00","$39.00"],
221
- "Image": ["","",""]
222
- })
223
  df = map_amazon_to_schema(raw).drop_duplicates(subset=["name","short_desc"])
224
- # EDA cleanups: drop missing price, cap to <= 500
225
  df = df[pd.notna(df["price_usd"])].copy()
226
- df = df[df["price_usd"] <= 500].reset_index(drop=True)
227
- # limit rows
228
  if len(df) > MAX_ROWS:
229
  df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
230
- df["doc"] = df.apply(build_doc, axis=1)
 
 
 
 
 
231
  return df
232
 
233
  CATALOG = load_catalog()
234
 
235
- # --------------------- Embeddings + FAISS (with simple cache) ---------------------
236
- class EmbeddingIndex:
237
- def __init__(self, docs: List[str], model_id: str):
238
  self.model_id = model_id
 
239
  self.model = SentenceTransformer(model_id)
240
  self.embs = self._load_or_build(docs)
241
- self.index = faiss.IndexFlatIP(self.embs.shape[1]) # cosine via normalized vectors
242
- self.index.add(self.embs)
243
 
244
- def _cache_paths(self, n_docs: int) -> Tuple[str, str]:
245
- h = hashlib.md5((self.model_id + f"|{n_docs}").encode()).hexdigest()[:10]
246
- npy = os.path.join(EMBED_CACHE_DIR, f"emb_{h}.npy")
247
- idx = os.path.join(EMBED_CACHE_DIR, f"faiss_{h}.index")
248
- return npy, idx
249
 
250
  def _load_or_build(self, docs: List[str]) -> np.ndarray:
251
- npy_path, _ = self._cache_paths(len(docs))
252
- if os.path.exists(npy_path):
253
  try:
254
- embs = np.load(npy_path)
255
  if embs.shape[0] == len(docs):
 
256
  return embs
257
  except Exception:
258
  pass
259
- # build
260
  embs = self.model.encode(docs, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=True)
261
  try:
262
- np.save(npy_path, embs)
 
 
263
  except Exception:
264
- pass
265
  return embs
266
 
267
- def search(self, query: str, topn: int):
268
- qv = self.model.encode([query], convert_to_numpy=True, normalize_embeddings=True)
269
- sims, idxs = self.index.search(qv, topn)
270
- return sims[0], idxs[0]
271
 
272
- EMB_INDEX = EmbeddingIndex(CATALOG["doc"].tolist(), EMBED_MODEL_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
- # --------------------- Query building ---------------------
275
  def expand_with_synonyms(tokens: List[str]) -> List[str]:
276
  out = []
277
  for t in tokens:
@@ -282,321 +292,638 @@ def expand_with_synonyms(tokens: List[str]) -> List[str]:
282
  return out
283
 
284
  def profile_to_query(profile: Dict) -> str:
 
285
  inter = [i.lower() for i in profile.get("interests", []) if i]
286
- expanded = expand_with_synonyms(inter)
287
- expanded = expanded + expanded # weight x2
288
  rel_tokens = REL_TO_TOKENS.get(profile.get("relationship","Friend"), [])
 
 
289
  parts = []
290
- if expanded: parts.append(", ".join(expanded))
291
  if rel_tokens: parts.append(", ".join(rel_tokens))
292
- occ = OCCASION_CANON.get(profile.get("occ_ui","Birthday"), "birthday")
293
  parts.append(occ)
294
- age = profile.get("age_range","any")
295
- if age != "any": parts.append(age)
296
- g = (profile.get("gender","any") or "any").lower()
297
- if g != "any": parts.append("women" if g=="female" else ("men" if g=="male" else "unisex"))
298
- return " | ".join(parts)
299
-
300
- def _contains_ci(series: pd.Series, needle: str) -> pd.Series:
301
- if not needle: return pd.Series(True, index=series.index)
302
- return series.fillna("").str.contains(re.escape(needle), case=False, regex=True)
303
-
304
- def filter_business(df: pd.DataFrame, budget_min=None, budget_max=None,
305
- occasion_canon: str=None, age_range: str="any") -> pd.DataFrame:
306
- m = pd.Series(True, index=df.index)
307
- if budget_min is not None:
308
- m &= df["price_usd"].fillna(0) >= float(budget_min)
309
- if budget_max is not None:
310
- m &= df["price_usd"].fillna(1e9) <= float(budget_max)
311
- if occasion_canon:
312
- m &= _contains_ci(df["occasion_tags"], occasion_canon)
313
- if age_range and age_range != "any":
314
- m &= (df["age_range"].fillna("any").isin([age_range, "any"]))
315
- return df[m]
316
-
317
- def recommend_topk(profile: Dict, k: int=3) -> pd.DataFrame:
318
- query = profile_to_query(profile)
319
- sims, idxs = EMB_INDEX.search(query, topn=min(max(k*80, k), len(CATALOG)))
320
- df_f = filter_business(
321
- CATALOG,
322
- budget_min=profile.get("budget_min"),
323
- budget_max=profile.get("budget_max"),
324
- occasion_canon=OCCASION_CANON.get(profile.get("occ_ui","Birthday"), "birthday"),
325
- age_range=profile.get("age_range","any"),
326
- )
327
- if df_f.empty: df_f = CATALOG
328
- df_f_idx = set(df_f.index.tolist())
329
-
330
- # soft gender boost
331
- def gender_tokens(g: str) -> List[str]:
332
- g = (g or "any").lower()
333
- if g == "female": return ["women","woman","female","her"]
334
- if g == "male": return ["men","man","male","him"]
335
- if g == "nonbinary": return ["unisex","gender neutral","they"]
336
- return ["unisex"]
337
-
338
- gts = gender_tokens(profile.get("gender","any"))
339
- cand = []
340
- for i, sim in zip(idxs, sims):
341
- i = int(i)
342
- if i in df_f_idx:
343
- blob = f"{CATALOG.loc[i,'tags']} {CATALOG.loc[i,'short_desc']}".lower()
344
- boost = 0.08 if any(t in blob for t in gts) else 0.0
345
- cand.append((i, float(sim) + boost))
346
- cand.sort(key=lambda x: -x[1])
347
-
348
- seen, picks = set(), []
349
- for gi, score in cand:
350
- nm = CATALOG.loc[gi, "name"]
351
- if nm in seen: continue
352
- seen.add(nm)
353
- picks.append((gi, score))
354
- if len(picks) >= k: break
355
-
356
- if not picks:
357
- res = df_f.head(k).copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  res["similarity"] = np.nan
359
- return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
360
 
361
- sel = [gi for gi,_ in picks]
362
- res = CATALOG.loc[sel].copy()
363
- res["similarity"] = [dict(picks).get(int(i), np.nan) for i in sel]
364
- return res[["name","short_desc","price_usd","occasion_tags","persona_fit","age_range","image_url","similarity"]]
 
365
 
366
- # --------------------- LLM plumbing (DIY + Greeting) ---------------------
 
 
367
 
368
- def load_text_pipeline(model_id: str):
369
- trust=True
370
- if "flan" in model_id or "t5" in model_id:
371
- tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust)
372
- mdl = AutoModelForSeq2SeqLM.from_pretrained(model_id, trust_remote_code=trust)
373
- return pipeline("text2text-generation", model=mdl, tokenizer=tok, device_map="auto", trust_remote_code=trust)
374
- else:
375
- tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust)
376
- mdl = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=trust)
377
- return pipeline("text-generation", model=mdl, tokenizer=tok, device_map="auto", trust_remote_code=trust)
378
 
379
- try:
380
- DIY_PIPE = load_text_pipeline(GEN_MODEL_ID)
381
- except Exception as e:
382
- DIY_PIPE = None
383
- print("DIY LLM load failed:", e)
384
-
385
- # Small greeting model (can reuse DIY_PIPE)
386
- GREETING_PIPE = DIY_PIPE
387
-
388
- # ---- JSON helpers ----
389
- GENERIC_NAMES = {"diy gift","gift","personalized gift","handmade gift","custom gift","ืžืชื ื”","ืžืชื ื” ืื™ืฉื™ืช","ืขืฉื” ื–ืืช ื‘ืขืฆืžืš"}
390
-
391
- def _f(x, fb=0.0):
392
- try: return float(x)
393
- except: return float(fb)
394
-
395
- def try_parse_json(text: str):
396
- if not text: return None
397
- m = re.search(r"(\{[\s\S]*\})", text.strip())
398
- if not m: return None
399
- blob = m.group(1)
400
  try:
401
- return json.loads(blob)
 
 
 
 
 
 
 
 
402
  except Exception:
403
- blob = re.sub(r",\s*}\s*$", "}", blob)
404
- blob = re.sub(r",\s*\]", "]", blob)
405
- try: return json.loads(blob)
406
- except: return None
407
-
408
- def truncate_prompt(pipe, text: str, max_tokens: int) -> str:
409
- tok = pipe.tokenizer
410
- ids = tok(text, truncation=True, max_length=max_tokens, return_tensors=None).get("input_ids", [])
411
- return tok.decode(ids, skip_special_tokens=True) if ids else text
412
-
413
- # ---- DIY prompt, validate & repair (no padding) ----
414
-
415
- def diy_prompt(profile: Dict) -> str:
416
- lang = "English" if OUTPUT_LANG == "en" else "Hebrew"
417
- name = profile.get("recipient_name","Recipient")
418
- rel = profile.get("relationship","Friend")
419
- age = profile.get("age_range","any")
420
- gen = profile.get("gender","any")
421
- ints = ", ".join(profile.get("interests",[])) or "general"
422
- occ = profile.get("occ_ui","Birthday")
423
- lo, hi = int(profile.get("budget_min",10)), int(profile.get("budget_max",100))
424
-
425
- return "\n".join([
426
- f"Invent ONE original DIY gift idea from scratch for this recipient. Write all VALUES in {lang}.",
427
- "Return JSON ONLY with exactly these keys (and nothing else):",
428
- "gift_name, overview, materials_needed, step_by_step_instructions, estimated_cost_usd, estimated_time_minutes",
429
- "",
430
- "Hard requirements:",
431
- "- Strongly reflect the recipient's interests and the occasion.",
432
- "- overview MUST mention the recipient by NAME and include relationship, age_group, gender, and the occasion.",
433
- "- gift_name must be SPECIFIC (not generic), 4โ€“10 words, include at least one interest keyword.",
434
- f"- estimated_cost_usd between ${lo}-${hi}; estimated_time_minutes 20โ€“240.",
435
- "- materials_needed: at least 5 concise items with quantities.",
436
- "- step_by_step_instructions: at least 6 practical, ordered steps.",
437
- "Forbidden gift_name terms: DIY Gift, Gift, Personalized Gift, Handmade Gift, Custom Gift.",
438
- "",
439
- f"Recipient: name={name}; relationship={rel}; age_group={age}; gender={gen}.",
440
- f"Interests: {ints}. Occasion: {occ}.",
441
- "JSON:"
442
- ])
443
-
444
- def diy_validate(g: dict, profile: Dict) -> Tuple[bool, List[str]]:
445
- errs=[]
446
- # keys
447
- req=["gift_name","overview","materials_needed","step_by_step_instructions","estimated_cost_usd","estimated_time_minutes"]
448
- for k in req:
449
- if k not in g: errs.append(f"missing key: {k}")
450
- # name
451
- n=str(g.get("gift_name",""))
452
- if not n.strip(): errs.append("gift_name empty")
453
- if any(b in n.strip().lower() for b in GENERIC_NAMES): errs.append("gift_name generic")
454
- if len(n.split())<3: errs.append("gift_name too short")
455
- # overview mentions
456
- ov=str(g.get("overview",""))
457
- if profile.get("recipient_name","") and profile.get("recipient_name") not in ov: errs.append("overview missing recipient name")
458
- for field,label in [("relationship","relationship"),("age_range","age_group"),("gender","gender"),("occ_ui","occasion")]:
459
- val=str(profile.get(field,""))
460
- if val and (val.split()[0] not in ov): errs.append(f"overview missing {label}")
461
- # lists
462
- mats=g.get("materials_needed", [])
463
- steps=g.get("step_by_step_instructions", [])
464
- if not isinstance(mats, list) or len(mats)<5: errs.append("materials_needed len < 5")
465
- if not isinstance(steps, list) or len(steps)<6: errs.append("steps len < 6")
466
- # numbers
467
- lo, hi = _f(profile.get("budget_min",10),10), _f(profile.get("budget_max",100),100)
468
- cost=_f(g.get("estimated_cost_usd"), -1)
469
- if not (lo <= cost <= hi): errs.append(f"cost not in budget [{lo},{hi}]")
470
- mins=int(_f(g.get("estimated_time_minutes"), -1))
471
- if not (20 <= mins <= 240): errs.append("time not in 20..240")
472
- return (len(errs)==0), errs
473
-
474
- def diy_repair_prompt(profile: Dict, last: dict, errors: List[str]) -> str:
475
- lang = "English" if OUTPUT_LANG == "en" else "Hebrew"
476
- return "\n".join([
477
- f"Fix ONLY the following problems in this JSON. Keep the same idea and style. Return JSON ONLY. Write all VALUES in {lang}.",
478
- "Errors:",
479
- *[f"- {e}" for e in errors],
480
- "JSON to fix:",
481
- json.dumps(last, ensure_ascii=False)
482
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
  def diy_generate(profile: Dict) -> Tuple[dict, str]:
485
- if DIY_PIPE is None:
486
- return {}, "DIY model not loaded"
487
- # attempt 1: creative
488
- prompt = diy_prompt(profile)
489
- pr = truncate_prompt(DIY_PIPE, prompt, MAX_INPUT_TOKENS)
490
- out = DIY_PIPE(pr, do_sample=True, temperature=0.9, top_p=0.95, max_new_tokens=MAX_NEW_TOKENS, truncation=True)
491
- if not isinstance(out, list): out=[out]
492
- texts = [o.get("generated_text","") for o in out]
493
- candidates = [try_parse_json(t) or {} for t in texts]
494
-
495
- # pick first valid
496
- for cand in candidates:
497
- ok, errs = diy_validate(cand, profile)
498
- if ok:
499
- return cand, "ok"
500
- last = cand
501
-
502
- # repair loop (deterministic)
503
- attempts = 1
504
- while attempts < DIY_MAX_ATTEMPTS:
505
- ok, errs = diy_validate(last, profile)
506
- if ok:
507
- return last, "ok"
508
- fix_pr = diy_repair_prompt(profile, last, errs)
509
- fix_pr = truncate_prompt(DIY_PIPE, fix_pr, MAX_INPUT_TOKENS)
510
- fixed = DIY_PIPE(fix_pr, do_sample=False, max_new_tokens=MAX_NEW_TOKENS, truncation=True)
511
- fixed = (fixed if isinstance(fixed, list) else [fixed])[0].get("generated_text","")
512
- fixed = try_parse_json(fixed) or last
513
- last = fixed
514
- attempts += 1
515
- return last, "partial"
516
-
517
- # ---- Greeting generation ----
518
-
519
- def greeting_prompt(profile: Dict) -> str:
520
- tone = profile.get('tone','Heartfelt')
521
- name = profile.get('recipient_name','Friend')
522
- rel = profile.get('relationship','Friend')
523
- occ = profile.get('occ_ui','Birthday')
524
- ints = ", ".join(profile.get('interests', []))
525
- age = profile.get('age_range','any')
526
- gen = profile.get('gender','any')
527
- return f"""
528
- Write a short greeting (2โ€“3 sentences) in English for a gift card.
529
- Tone: {tone}
530
- Recipient: {name} ({rel})
531
- Occasion: {occ}
532
- Interests: {ints}
533
- Age group: {age}; Gender: {gen}
534
- Avoid emojis.
535
- """
536
-
537
- def llm_generate_message(profile: Dict) -> str:
538
- if GREETING_PIPE is None:
539
- return (f"Dear {profile.get('recipient_name','Friend')}, happy {profile.get('occ_ui','Birthday').lower()}! "
540
- f"Wishing you joy and wonderful memories.")
541
- pr = truncate_prompt(GREETING_PIPE, greeting_prompt(profile), MAX_INPUT_TOKENS)
542
- out = GREETING_PIPE(pr, do_sample=False, max_new_tokens=90, truncation=True)
543
- out = out if isinstance(out, list) else [out]
544
- txt = out[0].get("generated_text","")
545
- return txt.strip() or (f"Dear {profile.get('recipient_name','Friend')}, happy {profile.get('occ_ui','Birthday').lower()}!")
546
 
547
- # --------------------- Image generation (SD-Turbo) ---------------------
 
 
 
 
 
 
 
548
 
549
- def load_image_pipeline():
550
- if not ENABLE_IMAGE:
551
- return None
552
- try:
553
- device = "cuda" if torch.cuda.is_available() else "cpu"
554
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
555
- pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sd-turbo", torch_dtype=dtype)
556
- pipe.to(device)
557
- return pipe
558
- except Exception as e:
559
- print("Image pipeline load failed:", e)
560
- return None
561
 
562
- IMG_PIPE = load_image_pipeline()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
 
565
- def generate_gift_image_from_diy(diy: Dict):
566
- if IMG_PIPE is None or not diy:
567
- return None
568
- name = diy.get('gift_name','gift')
569
- ov = diy.get('overview','product photo of handmade gift')
570
- prompt = (
571
- f"{name}: {ov}. Style: product photo, soft studio lighting, minimal background, realistic, high detail."
572
- )
573
- try:
574
- img = IMG_PIPE(
575
- prompt,
576
- num_inference_steps=2,
577
- guidance_scale=0.0,
578
- width=512, height=512
579
- ).images[0]
580
- return img
581
- except Exception as e:
582
- print("Image generation failed:", e)
583
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
 
585
  # --------------------- Rendering ---------------------
586
-
587
  def md_escape(text: str) -> str:
588
  return str(text).replace("|","\\|").replace("*","\\*").replace("_","\\_")
589
 
590
- def render_top3_html(df: pd.DataFrame) -> str:
 
 
 
 
 
 
 
591
  if df is None or df.empty:
592
- return "<em>No results found.</em>"
593
  rows = []
594
  for _, r in df.iterrows():
595
  name = md_escape(r.get("name",""))
596
- desc = md_escape(r.get("short_desc",""))
597
  price = r.get("price_usd")
598
  sim = r.get("similarity")
599
- age = r.get("age_range","any")
600
  img = r.get("image_url","") or ""
601
  price_str = f"${price:.0f}" if pd.notna(price) else "N/A"
602
  sim_str = f"{sim:.3f}" if pd.notna(sim) else "โ€”"
@@ -607,7 +934,7 @@ def render_top3_html(df: pd.DataFrame) -> str:
607
  <div style="font-weight:700;">{name}</div>
608
  <div style="font-size:0.95em;margin-top:4px;">{desc}</div>
609
  <div style="font-size:0.9em;margin-top:6px;opacity:0.8;">
610
- Price: <b>{price_str}</b> ยท Age: <code>{age}</code> ยท Similarity: <code>{sim_str}</code>
611
  </div>
612
  </div>
613
  {img_html}
@@ -616,174 +943,133 @@ def render_top3_html(df: pd.DataFrame) -> str:
616
  rows.append(card)
617
  return "\n".join(rows)
618
 
619
-
620
- def render_diy_md(d: Dict) -> str:
621
- if not d:
622
- return "<em>DIY generation failed.</em>"
623
- name = md_escape(d.get("gift_name",""))
624
- ov = md_escape(d.get("overview",""))
625
- cost = d.get("estimated_cost_usd", "โ€”")
626
- mins = d.get("estimated_time_minutes", "โ€”")
627
- mats = d.get("materials_needed", [])
628
- steps= d.get("step_by_step_instructions", [])
629
- mats_md = "\n".join([f"- {md_escape(str(m))}" for m in mats]) if isinstance(mats, list) else "- โ€”"
630
- steps_md= "\n".join([f"{i+1}. {md_escape(str(s))}" for i,s in enumerate(steps)]) if isinstance(steps, list) else "1. โ€”"
631
- return f"""
632
- ### DIY Gift โ€” {name}
633
-
634
- {ov}
635
-
636
- **Estimated cost:** ${cost} ยท **Estimated time:** {mins} min
637
-
638
- **Materials needed:**
639
- {mats_md}
640
-
641
- **Step-by-step:**
642
- {steps_md}
643
- """
644
-
645
  # --------------------- Gradio UI ---------------------
646
- CSS = """
647
- #examples { order: 1; }
648
- #form { order: 2; }
 
 
 
 
649
  """
650
-
651
- with gr.Blocks(css=CSS) as demo:
652
  gr.Markdown(TITLE)
653
 
654
- with gr.Column(elem_id="examples"):
655
- gr.Markdown("### Quick examples")
656
-
657
- with gr.Column(elem_id="form"):
658
- with gr.Row():
659
- recipient_name = gr.Textbox(label="Recipient name", value="Rotem")
660
- relationship = gr.Dropdown(label="Relationship", choices=RECIPIENT_RELATIONSHIPS, value="Romantic partner")
661
-
662
- with gr.Row():
663
- interests = gr.CheckboxGroup(
664
- label="Interests (select a few)", choices=INTEREST_OPTIONS,
665
- value=["Reading","Fashion","Home decor"], interactive=True
666
- )
667
-
668
- with gr.Row():
669
- occasion = gr.Dropdown(label="Occasion", choices=OCCASION_UI, value="Valentineโ€™s Day")
670
- age = gr.Dropdown(label="Age group", choices=list(AGE_OPTIONS.keys()), value="adult (18โ€“64)")
671
- gender = gr.Dropdown(label="Recipient gender", choices=GENDER_OPTIONS, value="female")
672
-
673
- RangeSlider = getattr(gr, "RangeSlider", None)
674
- if RangeSlider is not None:
675
- budget_range = RangeSlider(label="Budget range (USD)", minimum=5, maximum=500, step=1, value=[30, 60])
676
- budget_min, budget_max = None, None
677
- else:
678
- with gr.Row():
679
- budget_min = gr.Slider(label="Min budget (USD)", minimum=5, maximum=500, step=1, value=30)
680
- budget_max = gr.Slider(label="Max budget (USD)", minimum=5, maximum=500, step=1, value=60)
681
- budget_range = gr.State(value=None)
682
-
683
- tone = gr.Dropdown(label="Message tone", choices=MESSAGE_TONES, value="Romantic")
684
-
685
- go = gr.Button("Get GIfty ๐ŸŽฏ")
686
-
687
- out_top3 = gr.HTML(label="Top-3 recommendations")
688
- out_diy_json = gr.JSON(label="DIY Gift (JSON)")
689
- out_diy_md = gr.Markdown(label="DIY Gift (readable)")
690
- out_gen_img = gr.Image(label="DIY Gift image", type="pil")
691
- out_msg = gr.Markdown(label="Personalized message")
692
-
693
- # examples (render on top via CSS)
694
- if RangeSlider:
695
- example_inputs = [interests, occasion, budget_range, recipient_name, relationship, age, gender, tone]
696
- EXAMPLES = [
697
- [["Reading","Fashion","Home decor"], "Valentineโ€™s Day", [30,60], "Rotem", "Romantic partner", "adult (18โ€“64)", "female", "Romantic"],
698
- [["Technology","Movies"], "Birthday", [25,45], "Daniel", "Friend", "adult (18โ€“64)", "male", "Funny"],
699
- [["Gaming","Photography"], "Birthday", [30,120], "Omer", "Family - Sibling", "teen (13โ€“17)", "male", "Playful"],
700
- [["Home decor","Cooking"], "Housewarming", [25,45], "Noa", "Neighbor", "adult (18โ€“64)", "any", "Appreciative"],
701
- ]
702
- else:
703
- example_inputs = [interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone]
704
- EXAMPLES = [
705
- [["Reading","Fashion","Home decor"], "Valentineโ€™s Day", 30, 60, "Rotem", "Romantic partner", "adult (18โ€“64)", "female", "Romantic"],
706
- [["Technology","Movies"], "Birthday", 25, 45, "Daniel", "Friend", "adult (18โ€“64)", "male", "Funny"],
707
- [["Gaming","Photography"], "Birthday", 30, 120, "Omer", "Family - Sibling", "teen (13โ€“17)", "male", "Playful"],
708
- [["Home decor","Cooking"], "Housewarming", 25, 45, "Noa", "Neighbor", "adult (18โ€“64)", "any", "Appreciative"],
709
- ]
710
-
711
- with gr.Column(elem_id="examples"):
712
- gr.Examples(EXAMPLES, inputs=example_inputs)
713
-
714
- # --- predict wiring ---
715
- def ui_predict(
716
- interests_list, occasion_val,
717
- budget_rng_or_min,
718
- maybe_max_or_name,
719
- maybe_name_or_rel,
720
- rel_or_age,
721
- age_or_gender,
722
- gender_or_tone,
723
- tone_maybe=None
724
- ):
725
- # Disambiguate RangeSlider vs two Sliders
726
- use_range = isinstance(budget_rng_or_min, (list, tuple))
727
- if use_range:
728
- bmin = float(budget_rng_or_min[0]); bmax = float(budget_rng_or_min[1])
729
- name = str(maybe_max_or_name or "Friend")
730
- rel = str(maybe_name_or_rel or "Friend")
731
- age_label = str(rel_or_age or "any")
732
- gender_val = str(age_or_gender or "any")
733
- tone_val = str(gender_or_tone or "Heartfelt")
734
- else:
735
- bmin = float(budget_rng_or_min if budget_rng_or_min is not None else 20)
736
- bmax = float(maybe_max_or_name if maybe_max_or_name is not None else 60)
737
- name = str(maybe_name_or_rel or "Friend")
738
- rel = str(rel_or_age or "Friend")
739
- age_label = str(age_or_gender or "any")
740
- gender_val = str(gender_or_tone or "any")
741
- tone_val = str(tone_maybe or "Heartfelt")
742
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743
  if bmin > bmax: bmin, bmax = bmax, bmin
744
 
745
- age_range = AGE_OPTIONS.get(age_label, "any")
746
  profile = {
747
- "recipient_name": name,
748
- "relationship": rel,
749
  "interests": interests_list or [],
750
  "occ_ui": occasion_val or "Birthday",
751
  "budget_min": bmin,
752
  "budget_max": bmax,
753
- "budget_usd": bmax,
754
- "age_range": age_range,
755
- "gender": gender_val or "any",
756
  "tone": tone_val or "Heartfelt",
757
  }
758
 
759
- # retrieval
760
- top3 = recommend_topk(profile, k=3)
761
- top3_html = render_top3_html(top3)
762
 
763
- # DIY gift (generate-from-scratch, JSON)
764
- diy_json, diy_status = diy_generate(profile)
765
  diy_md = render_diy_md(diy_json)
766
 
767
- # DIY image (optional)
768
- diy_img = generate_gift_image_from_diy(diy_json)
769
-
770
- # greeting
771
- msg = llm_generate_message(profile)
772
 
773
- return top3_html, diy_json, diy_md, diy_img, msg
774
 
775
- if RangeSlider:
776
- go.click(
777
- ui_predict,
778
- [interests, occasion, budget_range, recipient_name, relationship, age, gender, tone],
779
- [out_top3, out_diy_json, out_diy_md, out_gen_img, out_msg]
780
- )
781
- else:
782
- go.click(
783
- ui_predict,
784
- [interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone],
785
- [out_top3, out_diy_json, out_diy_md, out_gen_img, out_msg]
786
- )
787
 
788
  if __name__ == "__main__":
789
  demo.launch()
 
1
+ # app.py
2
+ # ๐ŸŽ GIfty+ โ€” Smart Gift Recommender (original Spaces app + Hybrid Ranker v2)
3
+ # Dataset default: Danielos100/Amazon_products_clean (override via DATASET_ID env)
4
+ # Retrieval: sentence-transformers/all-MiniLM-L12-v2 (cosine on budget-filtered subset)
5
+ # DIY: FLAN-only (strict prompts + sanitizers)
6
+ # Message: FLAN-based, ืœื ื’ื ืจื™ โ€” ืกื˜ื•ื›ืกื˜ื™ ืขื ื•ืœื™ื“ืฆื™ื” ื•ืื ื˜ื™-ื“ื•ืคืœื™ืงืฆื™ื”
7
+ # UI: Examples-table ืœืžืขืœื” (ืงืœื™ืง-ื˜ื•ืื•-ืคื™ืœ), ื˜ื•ืคืก ืคืชื•ื— ื•ืžืกื•ื“ืจ ื‘ืฉื•ืจื•ืช, ื‘ืœื™ JSON
8
+
9
+ import os, re, json, hashlib, pathlib, random
10
+ from typing import Dict, List, Tuple, Optional, Any
 
11
 
12
  import numpy as np
13
  import pandas as pd
 
15
  from datasets import load_dataset
16
 
17
  from sentence_transformers import SentenceTransformer
18
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
19
  import torch
20
+
21
+ print(f"===== Application Startup at {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
22
 
23
  # --------------------- Config ---------------------
24
+ TITLE = "# ๐ŸŽ GIfty+ โ€” Smart Gift Recommender\n*Top-3 catalog picks + 1 DIY gift + personalized message*"
 
25
 
26
+ DATASET_ID = os.getenv("DATASET_ID", "Danielos100/Amazon_products_clean")
27
+ DATASET_SPLIT = os.getenv("DATASET_SPLIT", "train")
28
+
29
+ MAX_ROWS = int(os.getenv("MAX_ROWS", "12000"))
30
  EMBED_MODEL_ID = os.getenv("EMBED_MODEL_ID", "sentence-transformers/all-MiniLM-L12-v2")
 
 
31
 
32
+ # ---- Writable cache dir (no /data requirement) ----
33
+ def resolve_cache_dir() -> str:
34
+ candidates = []
35
+ if os.getenv("EMBED_CACHE_DIR"):
36
+ candidates.append(os.getenv("EMBED_CACHE_DIR"))
37
+ candidates += [os.path.join(os.getcwd(), ".gifty_cache"), "/tmp/.gifty_cache"]
38
+ for p in candidates:
39
+ try:
40
+ pathlib.Path(p).mkdir(parents=True, exist_ok=True)
41
+ test = pathlib.Path(p) / ".write_test"
42
+ with open(test, "w") as f: f.write("ok")
43
+ test.unlink(missing_ok=True)
44
+ return p
45
+ except Exception:
46
+ continue
47
+ return os.getcwd()
48
 
49
+ EMBED_CACHE_DIR = resolve_cache_dir()
50
+ print(f"[CACHE] Using EMBED_CACHE_DIR={EMBED_CACHE_DIR}")
51
 
52
  # ===== UI options =====
53
  INTEREST_OPTIONS = [
 
55
  "Gaming","Photography","Hiking","Movies","Crafts","Pets","Wellness","Collecting","Food",
56
  "Home decor","Science"
57
  ]
 
58
  OCCASION_UI = [
59
  "Birthday","Wedding / Engagement","Anniversary","Graduation","New baby","Housewarming",
60
  "Retirement","Holidays","Valentineโ€™s Day","Promotion / New job","Get well soon"
61
  ]
 
62
  OCCASION_CANON = {
63
+ "Birthday":"birthday","Wedding / Engagement":"wedding","Anniversary":"anniversary",
64
+ "Graduation":"graduation","New baby":"new_baby","Housewarming":"housewarming",
65
+ "Retirement":"retirement","Holidays":"holidays","Valentineโ€™s Day":"valentines",
66
+ "Promotion / New job":"promotion","Get well soon":"get_well"
 
 
 
 
 
 
 
67
  }
 
68
  RECIPIENT_RELATIONSHIPS = [
69
+ "Family - Parent","Family - Sibling","Family - Child","Family - Other relative",
70
+ "Friend","Colleague","Boss","Romantic partner","Teacher / Mentor","Neighbor","Client / Business partner",
 
 
 
 
 
 
 
 
 
71
  ]
 
72
  MESSAGE_TONES = [
73
  "Formal","Casual","Funny","Heartfelt","Inspirational","Playful","Romantic","Appreciative","Encouraging",
74
  ]
75
+ AGE_OPTIONS = {"any":"any","kid (3โ€“12)":"kids","teen (13โ€“17)":"teens","adult (18โ€“64)":"adult","senior (65+)":"senior"}
 
 
 
 
 
 
 
76
  GENDER_OPTIONS = ["any","female","male","nonbinary"]
77
 
78
  # Query expansion by interest
 
98
  "home decor":["home","decor","wall art","candle"],
99
  "science":["lab","experiment","STEM","microscope"],
100
  }
 
 
101
  REL_TO_TOKENS = {
102
+ "Family - Parent": ["parent","family"],
103
+ "Family - Sibling": ["sibling","family"],
104
+ "Family - Child": ["kids","play","family"],
105
+ "Family - Other relative": ["family","relative"],
106
  "Friend": ["friendly"],
107
+ "Colleague": ["office","work","professional"],
108
+ "Boss": ["executive","professional","premium"],
109
+ "Romantic partner": ["romantic","couple"],
110
+ "Teacher / Mentor": ["teacher","mentor","thank_you"],
111
+ "Neighbor": ["neighbor","housewarming"],
112
+ "Client / Business partner": ["professional","thank_you","premium"],
113
  }
114
 
115
  # --------------------- Data loading & schema ---------------------
116
+ _CURRENCY_RE = re.compile(r"[^\d.,\-]+")
117
+ _NUM_RE = re.compile(r"(\d+(?:[.,]\d+)?)")
118
+ _RANGE_SEP = re.compile(r"\s*(?:-|โ€“|โ€”|to)\s*")
119
+
120
+ def _to_price_usd(x) -> float:
121
+ if pd.isna(x): return np.nan
122
+ s = str(x).strip().lower()
123
+ if _RANGE_SEP.search(s):
124
+ left = _RANGE_SEP.split(s)[0]
125
+ s = left
126
+ s = _CURRENCY_RE.sub(" ", s)
127
+ m = _NUM_RE.search(s.replace(",", "."))
128
+ try:
129
+ return float(m.group(1)) if m else np.nan
130
+ except Exception:
131
+ return np.nan
132
+
133
+ def _first_present(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
134
+ cols_lower = {c.lower(): c for c in df.columns}
135
+ for c in candidates:
136
+ if c in df.columns: return c
137
+ if c.lower() in cols_lower: return cols_lower[c.lower()]
138
+ return None
139
+
140
+ def _auto_price_col(df: pd.DataFrame) -> Optional[str]:
141
+ for c in df.columns:
142
+ s = df[c]
143
+ if pd.api.types.is_numeric_dtype(s):
144
+ nonneg = s.dropna()
145
+ if not nonneg.empty and (nonneg.between(0.5, 10000)).mean() > 0.6:
146
+ return c
147
+ for c in df.columns:
148
+ sample = df[c].astype(str).head(200).str.lower().str.contains(r"\$|โ‚ช|eur|usd|ยฃ|โ‚ฌ|\d")
149
+ if sample.mean() > 0.5:
150
+ return c
151
+ return None
152
 
153
  def map_amazon_to_schema(df_raw: pd.DataFrame) -> pd.DataFrame:
154
+ name_c = _first_present(df_raw, ["product name","title","name","product_title"])
155
+ desc_c = _first_present(df_raw, ["description","product_description","feature","about"])
156
+ cat_c = _first_present(df_raw, ["category","categories","main_cat","product_category"])
157
+ price_c= _first_present(df_raw, ["selling price","price","current_price","list_price","price_amount","actual_price","price_usd"])
158
+ if price_c is None:
159
+ price_c = _auto_price_col(df_raw)
160
+ img_c = _first_present(df_raw, ["image","image_url","imageurl","imUrl","img","img_url"])
161
+
162
  out = pd.DataFrame({
163
+ "name": df_raw.get(name_c, pd.Series("", index=df_raw.index)),
164
+ "short_desc": df_raw.get(desc_c, pd.Series("", index=df_raw.index)),
165
+ "tags": df_raw.get(cat_c, pd.Series("", index=df_raw.index)),
166
+ "price_usd": df_raw.get(price_c, pd.Series(np.nan, index=df_raw.index)).map(_to_price_usd),
167
+ "image_url": df_raw.get(img_c, pd.Series("", index=df_raw.index)),
 
 
 
 
168
  })
169
+ out["name"] = out["name"].astype(str).str.strip().str.slice(0, 160)
170
+ out["short_desc"] = out["short_desc"].astype(str).str.strip().str.slice(0, 600)
171
  out["tags"] = out["tags"].astype(str).str.replace("|", ", ").str.lower()
 
 
 
172
  return out
173
 
174
+ def extract_top_cat(tags: str) -> str:
175
+ s = (tags or "").lower()
176
+ if "|" in s: return s.split("|", 1)[0].strip()
177
+ if ">" in s: return s.split(">", 1)[0].strip()
178
+ return s.strip().split(",")[0] if s else ""
 
 
 
 
179
 
180
  def load_catalog() -> pd.DataFrame:
181
+ ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
182
+ raw = ds.to_pandas()
183
+
 
 
 
 
 
 
 
 
 
 
 
 
184
  df = map_amazon_to_schema(raw).drop_duplicates(subset=["name","short_desc"])
 
185
  df = df[pd.notna(df["price_usd"])].copy()
186
+ df = df[(df["price_usd"] > 0) & (df["price_usd"] <= 500)].reset_index(drop=True)
187
+
188
  if len(df) > MAX_ROWS:
189
  df = df.sample(n=MAX_ROWS, random_state=42).reset_index(drop=True)
190
+
191
+ df["doc"] = (df["name"].fillna("") + " | " + df["tags"].fillna("") + " | " + df["short_desc"].fillna("")).str.strip()
192
+ df["top_cat"] = df["tags"].map(extract_top_cat)
193
+ df["blob"] = (df["name"].fillna("") + " " + df["tags"].fillna("") + " " + df["short_desc"].fillna("")).str.lower()
194
+
195
+ print(f"[DATA] dataset={DATASET_ID} split={DATASET_SPLIT} rows_final={len(df)}")
196
  return df
197
 
198
  CATALOG = load_catalog()
199
 
200
+ # --------------------- Embeddings (with dataset-aware cache) ---------------------
201
+ class EmbeddingBank:
202
+ def __init__(self, docs: List[str], model_id: str, dataset_tag: str):
203
  self.model_id = model_id
204
+ self.dataset_tag = dataset_tag
205
  self.model = SentenceTransformer(model_id)
206
  self.embs = self._load_or_build(docs)
 
 
207
 
208
+ def _cache_path(self, n_docs: int) -> str:
209
+ h = hashlib.md5((self.dataset_tag + "|" + self.model_id + f"|{n_docs}").encode()).hexdigest()[:10]
210
+ return os.path.join(EMBED_CACHE_DIR, f"emb_{h}.npy")
 
 
211
 
212
  def _load_or_build(self, docs: List[str]) -> np.ndarray:
213
+ path = self._cache_path(len(docs))
214
+ if os.path.exists(path):
215
  try:
216
+ embs = np.load(path, mmap_mode="r")
217
  if embs.shape[0] == len(docs):
218
+ print("[EMB] mmap-loaded from cache")
219
  return embs
220
  except Exception:
221
  pass
222
+ print("[EMB] Building embeddingsโ€ฆ")
223
  embs = self.model.encode(docs, convert_to_numpy=True, normalize_embeddings=True, show_progress_bar=True)
224
  try:
225
+ np.save(self._cache_path(len(docs)), embs)
226
+ embs = np.load(self._cache_path(len(docs)), mmap_mode="r")
227
+ print(f"[EMB] Saved & mmap-loaded: {embs.shape}")
228
  except Exception:
229
+ print("[EMB] Cache save failed; using RAM only")
230
  return embs
231
 
232
+ def query_vec(self, text: str) -> np.ndarray:
233
+ return self.model.encode([text], convert_to_numpy=True, normalize_embeddings=True)[0]
234
+
235
+ EMB = EmbeddingBank(CATALOG["doc"].tolist(), EMBED_MODEL_ID, DATASET_ID)
236
 
237
+ # ---- tokens per item for fast overlap (used by recommender bonuses) ----
238
+ _tok_rx = re.compile(r"[a-z0-9][a-z0-9\-']*")
239
+ def _tok_set(text: str) -> set:
240
+ return set(_tok_rx.findall(str(text).lower()))
241
+ if "tok_set" not in CATALOG.columns:
242
+ CATALOG["tok_set"] = (
243
+ CATALOG["name"].fillna("") + " " +
244
+ CATALOG["tags"].fillna("") + " " +
245
+ CATALOG["short_desc"].fillna("")
246
+ ).map(_tok_set)
247
+
248
+ # ====================== Recommendations โ€” Hybrid Ranker v2 ======================
249
+ # ื“ื’ืฉ ื—ื–ืง ื™ื•ืชืจ ืขืœ ืชื—ื‘ื™ื‘ื™ื; Gender/Age ืžืกื ื ื™ื ื‘ืœื‘ื“; ื—ื™ื–ื•ืง Occasion; ืจื™ืจืื ืงืจ ืื•ืคืฆื™ื•ื ืœื™; ื’ื™ื•ื•ืŸ (MMR)
250
+ try:
251
+ from sentence_transformers import CrossEncoder
252
+ except Exception:
253
+ CrossEncoder = None
254
+
255
+ RERANK_MODEL_ID = os.getenv("RERANK_MODEL_ID", "cross-encoder/ms-marco-MiniLM-L-6-v2")
256
+ _CE_MODEL = None
257
+
258
+ def _load_cross_encoder():
259
+ global _CE_MODEL
260
+ if _CE_MODEL is not None:
261
+ return _CE_MODEL
262
+ if CrossEncoder is None:
263
+ return None
264
+ try:
265
+ _CE_MODEL = CrossEncoder(RERANK_MODEL_ID, device="cpu")
266
+ print(f"[RERANK] Loaded: {RERANK_MODEL_ID}")
267
+ except Exception as e:
268
+ print(f"[RERANK] Failed to load CE: {e}")
269
+ _CE_MODEL = None
270
+ return _CE_MODEL
271
+
272
+ OCCASION_PRIORS = {
273
+ "valentines": [("jewelry",0.12),("chocolate",0.10),("candle",0.08),("romantic",0.08),("couple",0.08),("heart",0.06)],
274
+ "birthday": [("fun",0.06),("game",0.06),("personalized",0.06),("gift set",0.05),("surprise",0.04)],
275
+ "anniversary":[("couple",0.10),("jewelry",0.10),("photo",0.08),("frame",0.06),("memory",0.06),("candle",0.06)],
276
+ "graduation": [("journal",0.10),("planner",0.08),("office",0.08),("coffee",0.06),("motivation",0.06)],
277
+ "housewarming":[("home",0.10),("kitchen",0.08),("decor",0.10),("candle",0.06),("serving",0.06)],
278
+ "new_baby": [("baby",0.12),("nursery",0.10),("soft",0.06),("blanket",0.06)],
279
+ "retirement": [("relax",0.08),("hobby",0.08),("travel",0.06),("book",0.06)],
280
+ "holidays": [("holiday",0.10),("winter",0.08),("chocolate",0.08),("cozy",0.06),("family",0.06)],
281
+ "promotion": [("desk",0.10),("office",0.10),("premium",0.08),("organizer",0.06)],
282
+ "get_well": [("cozy",0.10),("tea",0.08),("soothing",0.06),("care",0.06)],
283
+ }
284
 
 
285
  def expand_with_synonyms(tokens: List[str]) -> List[str]:
286
  out = []
287
  for t in tokens:
 
292
  return out
293
 
294
  def profile_to_query(profile: Dict) -> str:
295
+ # ื“ื’ืฉ ร—3 ืœืชื—ื‘ื™ื‘ื™ื; ืœืœื ืื–ื›ื•ืจ ื’ื™ืœ/ืžื’ื“ืจ ื‘ื ื•ืกื— ื›ื“ื™ ืœื ืœืชืช ืžืฉืงืœ ืกืžื ื˜ื™
296
  inter = [i.lower() for i in profile.get("interests", []) if i]
297
+ expanded = expand_with_synonyms(inter) * 3
 
298
  rel_tokens = REL_TO_TOKENS.get(profile.get("relationship","Friend"), [])
299
+ occ = OCCASION_CANON.get(profile.get("occ_ui","Birthday"), "birthday")
300
+
301
  parts = []
302
+ if expanded: parts.append(", ".join(expanded))
303
  if rel_tokens: parts.append(", ".join(rel_tokens))
 
304
  parts.append(occ)
305
+ tail = f"gift ideas for a {profile.get('relationship','Friend')} for {occ}; likes {', '.join(inter) or 'general'}"
306
+ return " | ".join([p for p in parts if p]) + " | " + tail
307
+
308
+ def _gender_ok_mask(gender: str) -> np.ndarray:
309
+ g = (gender or "any").lower()
310
+ blob = CATALOG["blob"]
311
+ has_male = blob.str.contains(r"\b(men|man's|mens|male|for men)\b", regex=True, na=False)
312
+ has_female = blob.str.contains(r"\b(women|woman's|womens|female|for women|dress)\b", regex=True, na=False)
313
+ has_unisex = blob.str.contains(r"\bunisex|gender neutral\b", regex=True, na=False)
314
+ if g == "female":
315
+ return (~has_male | has_unisex).to_numpy()
316
+ if g == "male":
317
+ return (~has_female | has_unisex).to_numpy()
318
+ return np.ones(len(blob), dtype=bool)
319
+
320
+ def _mask_by_age(age: str, blob: pd.Series) -> np.ndarray:
321
+ # ืกื™ื ื•ืŸ ื‘ืœื‘ื“ (ืœืœื ืขื•ื ืฉ/ืžืฉืงืœ)
322
+ KIDS_RX = r"\b(?:kid|kids|child|children|toddler|baby|boys?|girls?|kid\'s|children\'s)\b"
323
+ TEEN_RX = r"\b(?:teen|teens|young adult|ya)\b"
324
+ is_kidsy = blob.str.contains(KIDS_RX, regex=True, na=False)
325
+ is_teen = blob.str.contains(TEEN_RX, regex=True, na=False)
326
+ if age in ("adult","senior"):
327
+ return (~is_kidsy).to_numpy()
328
+ if age == "teens":
329
+ return ((~is_kidsy) | is_teen).to_numpy()
330
+ if age == "kids":
331
+ return (is_kidsy | (~is_teen & is_kidsy)).to_numpy()
332
+ return np.ones(len(blob), dtype=bool)
333
+
334
+ def _interest_bonus(profile: Dict, idx: np.ndarray) -> np.ndarray:
335
+ ints = [i.lower() for i in profile.get("interests", []) if i]
336
+ syns = [s for it in ints for s in SYNONYMS.get(it, [])]
337
+ vocab = set(ints + syns)
338
+ if not vocab or idx.size == 0:
339
+ return np.zeros(len(idx), dtype="float32")
340
+ counts = np.array([len(CATALOG["tok_set"].iat[i] & vocab) for i in idx], dtype="float32")
341
+ counts = np.clip(counts, 0, 6)
342
+ return 0.10 * counts # ืžืฉืงืœ ื—ื–ืง ืœืชื—ื‘ื™ื‘ื™ื
343
+
344
+ def _occasion_bonus(idx: np.ndarray, occ_ui: str) -> np.ndarray:
345
+ slug = OCCASION_CANON.get(occ_ui or "Birthday", "birthday")
346
+ pri = OCCASION_PRIORS.get(slug, [])
347
+ if not pri or idx.size == 0:
348
+ return np.zeros(len(idx), dtype="float32")
349
+ bl = CATALOG["blob"].to_numpy()
350
+ out = np.zeros(len(idx), dtype="float32")
351
+ for j, i in enumerate(idx):
352
+ text = bl[i]
353
+ bonus = 0.0
354
+ for kw, w in pri:
355
+ if kw in text:
356
+ bonus += w
357
+ out[j] = min(bonus, 0.15)
358
+ return out
359
+
360
+ def _minmax(x: np.ndarray) -> np.ndarray:
361
+ if x.size == 0: return x
362
+ lo, hi = float(np.min(x)), float(np.max(x))
363
+ if hi <= lo + 1e-9: return np.zeros_like(x)
364
+ return (x - lo) / (hi - lo)
365
+
366
+ def _mmr_select(cand_idx: np.ndarray, scores: np.ndarray, k: int, lambda_: float=0.7) -> np.ndarray:
367
+ if cand_idx.size <= k:
368
+ order = np.argsort(-scores)
369
+ return cand_idx[order][:k]
370
+ picked = []
371
+ rest = list(range(len(cand_idx)))
372
+ rel = _minmax(scores)
373
+ V = np.asarray(EMB.embs, dtype="float32")[cand_idx]
374
+ V = V / (np.linalg.norm(V, axis=1, keepdims=True) + 1e-8)
375
+ while len(picked) < k and rest:
376
+ if not picked:
377
+ j = int(np.argmax(rel[rest]))
378
+ picked.append(rest.pop(j))
379
+ continue
380
+ sim_to_sel = []
381
+ for c in rest:
382
+ sims = V[c] @ V[picked].T
383
+ smax = float(sims if np.ndim(sims) == 0 else np.max(sims))
384
+ sim_to_sel.append(smax)
385
+ sim_to_sel = np.array(sim_to_sel, dtype="float32")
386
+ mmr = lambda_ * rel[rest] - (1 - lambda_) * sim_to_sel
387
+ j = int(np.argmax(mmr))
388
+ picked.append(rest.pop(j))
389
+ return cand_idx[np.array(picked, dtype=int)]
390
+
391
+ def recommend_top3_budget_first(profile: Dict) -> pd.DataFrame:
392
+ # 1) ืกื™ื ื•ืŸ ืœืคื™ ืชืงืฆื™ื‘ + ื’ื™ืœ (ืกื™ื ื•ืŸ ื‘ืœื‘ื“) + ืžื’ื“ืจ (ืกื™ื ื•ืŸ ื‘ืœื‘ื“)
393
+ lo = float(profile.get("budget_min", 0))
394
+ hi = float(profile.get("budget_max", 1e9))
395
+ m_price = (CATALOG["price_usd"].values >= lo) & (CATALOG["price_usd"].values <= hi)
396
+ m_age = _mask_by_age(profile.get("age_range","any"), CATALOG["blob"])
397
+ m_gender_ok = _gender_ok_mask(profile.get("gender","any"))
398
+ base_mask = m_price & m_age & m_gender_ok
399
+ idx = np.where(base_mask)[0]
400
+ if idx.size == 0:
401
+ idx = np.where(m_price & m_gender_ok)[0]
402
+ if idx.size == 0:
403
+ lo2, hi2 = max(0, lo*0.8), (hi*1.2 if hi < 1e8 else hi)
404
+ m_price2 = (CATALOG["price_usd"].values >= lo2) & (CATALOG["price_usd"].values <= hi2)
405
+ idx = np.where(m_price2 & m_gender_ok)[0]
406
+ if idx.size == 0:
407
+ cheapest = np.argsort(CATALOG["price_usd"].values)[:3]
408
+ res = CATALOG.iloc[cheapest].copy()
409
  res["similarity"] = np.nan
410
+ return res[["name","short_desc","price_usd","image_url","similarity"]].reset_index(drop=True)
411
 
412
+ # 2) ืืžื‘ื“ื™ื ื’ + ืžื—ื™ืจ + ืชื—ื‘ื™ื‘ื™ื + Occasion
413
+ q = profile_to_query(profile)
414
+ qv = EMB.query_vec(q).astype("float32")
415
+ X = np.asarray(EMB.embs, dtype="float32")[idx]
416
+ emb_sims = X @ qv
417
 
418
+ target_price = (lo + hi)/2.0 if hi > lo else hi
419
+ prices = CATALOG.iloc[idx]["price_usd"].to_numpy()
420
+ price_bonus = np.clip(0.12 - np.abs(prices - target_price)/max(target_price,1.0), 0, 0.12).astype("float32")
421
 
422
+ int_bonus = _interest_bonus(profile, idx)
423
+ occ_bonus = _occasion_bonus(idx, profile.get("occ_ui","Birthday"))
 
 
 
 
 
 
 
 
424
 
425
+ pre_score = emb_sims + price_bonus + int_bonus + occ_bonus
426
+
427
+ # 3) ืžื•ืขืžื“ื™ื
428
+ K1 = min(64, idx.size)
429
+ top_local = np.argpartition(-pre_score, K1-1)[:K1]
430
+ cand_idx = idx[top_local]
431
+
432
+ emb_norm = _minmax(emb_sims[top_local])
433
+ price_norm = _minmax(price_bonus[top_local])
434
+ int_norm = _minmax(int_bonus[top_local])
435
+ occ_norm = _minmax(occ_bonus[top_local])
436
+
437
+ # 4) ืจื™ืจืื ืงืจ ืื•ืคืฆื™ื•ื ืœื™ (ืื ืืคืฉืจ)
 
 
 
 
 
 
 
 
438
  try:
439
+ from sentence_transformers import CrossEncoder as _CE
440
+ ce = _load_cross_encoder()
441
+ if ce is not None:
442
+ docs = CATALOG.loc[cand_idx, "doc"].tolist()
443
+ pairs = [(q, d) for d in docs]
444
+ ce_raw = np.array(ce.predict(pairs), dtype="float32")
445
+ ce_norm = _minmax(ce_raw)
446
+ else:
447
+ ce_norm = np.zeros_like(emb_norm)
448
  except Exception:
449
+ ce_norm = np.zeros_like(emb_norm)
450
+
451
+ # 5) ืฆื™ื•ืŸ ืกื•ืคื™ (ื“ื’ืฉ ื—ื–ืง ืœืชื—ื‘ื™ื‘ื™ื ื“ืจืš int_norm + ื”ืฉืื™ืœืชื”)
452
+ final = (
453
+ 0.56 * emb_norm +
454
+ 0.26 * ce_norm +
455
+ 0.10 * int_norm +
456
+ 0.05 * occ_norm +
457
+ 0.03 * price_norm
458
+ ).astype("float32")
459
+
460
+ # 6) ื’ื™ื•ื•ืŸ MMR ื•ื‘ื—ื™ืจืช ื˜ื•ืค-3
461
+ pick_idx = _mmr_select(cand_idx, final, k=min(3, cand_idx.size), lambda_=0.7)
462
+
463
+ res = CATALOG.loc[pick_idx].copy()
464
+ pos = {int(cand_idx[i]): i for i in range(len(cand_idx))}
465
+ res["similarity"] = [float(final[pos[int(i)]]) for i in pick_idx]
466
+ return res[["name","short_desc","price_usd","image_url","similarity"]].reset_index(drop=True)
467
+
468
+ # ====================== END Recommendations ======================
469
+
470
+
471
+ # --------------------- DIY (FLAN-only) โ€” strict prompts + sanitizers ---------------------
472
+ print("transformers:", __import__("transformers").__version__, "| torch:", torch.__version__)
473
+ DIY_MODEL_ID = os.getenv("DIY_MODEL_ID", "google/flan-t5-small")
474
+ DIY_DEVICE = torch.device("cpu")
475
+ MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "384"))
476
+ DIY_MAX_NEW_TOKENS = int(os.getenv("DIY_MAX_NEW_TOKENS", "120"))
477
+
478
+ INTEREST_ALIASES = {
479
+ "Reading": ["book", "novel", "literary"],
480
+ "Fashion": ["style", "chic", "silk"],
481
+ "Home decor": ["candle", "wall", "jar"],
482
+ "Technology": ["tech", "gadget", "usb"],
483
+ "Movies": ["film", "cinema", "poster"],
484
+ }
485
+ FALLBACK_NOUNS = ["Kit", "Set", "Bundle", "Box", "Pack"]
486
+
487
+ _diy_cache = {}
488
+ def _load_flan(mid: str):
489
+ if mid in _diy_cache: return _diy_cache[mid]
490
+ tok = AutoTokenizer.from_pretrained(mid, use_fast=True, trust_remote_code=True)
491
+ mdl = AutoModelForSeq2SeqLM.from_pretrained(mid, trust_remote_code=True, use_safetensors=True).to(DIY_DEVICE).eval()
492
+ _diy_cache[mid] = (tok, mdl)
493
+ print(f"[DIY] Loaded FLAN: {mid}")
494
+ return _diy_cache[mid]
495
+
496
+ @torch.inference_mode()
497
+ def _gen(tok, mdl, prompt: str, max_new_tokens=64, do_sample=False, temperature=0.9, top_p=0.95, seed=None) -> str:
498
+ if seed is None: seed = random.randint(1, 10_000_000)
499
+ random.seed(seed); torch.manual_seed(seed)
500
+ enc = tok(prompt, truncation=True, max_length=MAX_INPUT_TOKENS, return_tensors="pt")
501
+ enc = {k: v.to(DIY_DEVICE) for k,v in enc.items()}
502
+ kw = dict(max_new_tokens=max_new_tokens, eos_token_id=tok.eos_token_id, pad_token_id=tok.eos_token_id)
503
+ if do_sample:
504
+ kw.update(dict(do_sample=True, temperature=temperature, top_p=top_p))
505
+ else:
506
+ kw.update(dict(do_sample=False, num_beams=1))
507
+ out = mdl.generate(**enc, **kw)
508
+ return tok.decode(out[0], skip_special_tokens=True).strip()
509
+
510
+ def _choose_interest_token(interests: List[str]) -> str:
511
+ for it in interests:
512
+ cand = INTEREST_ALIASES.get(it, [])
513
+ if cand: return random.choice(cand)
514
+ return (interests[0].split()[0].lower() if interests else "gift")
515
+
516
+ def _title_case(s: str) -> str:
517
+ s = re.sub(r'\s+', ' ', s).strip()
518
+ s = re.sub(r'["โ€œโ€โ€˜โ€™]+', '', s)
519
+ return " ".join([w.capitalize() for w in s.split()])
520
+
521
+ def _sanitize_name(name: str, interests: List[str]) -> str:
522
+ bad = [r"^the name\b", r"\bmember of the family\b", r"^name\b", r"^title\b"]
523
+ for b in bad:
524
+ name = re.sub(b, "", name, flags=re.I).strip()
525
+ name = re.sub(r'[:\-โ€“โ€”]+$', "", name).strip()
526
+ alias = _choose_interest_token(interests)
527
+ base = name.lower()
528
+ if alias not in base:
529
+ tokens = [t for t in re.split(r"[\s\-]+", name) if t]
530
+ if len(tokens) < 4:
531
+ name = f"{alias.capitalize()} " + " ".join([t.capitalize() for t in tokens]) if tokens else f"{alias.capitalize()} {random.choice(FALLBACK_NOUNS)}"
532
+ else:
533
+ tokens.insert(1, alias.capitalize())
534
+ name = " ".join(tokens)
535
+ name = re.sub(r'\b(Home Decor:?\s*){2,}', 'Home Decor ', name, flags=re.I)
536
+ name = _title_case(name)[:80]
537
+ if len(name.split()) < 3:
538
+ noun = random.choice(FALLBACK_NOUNS)
539
+ name = f"{alias.capitalize()} {noun}"
540
+ return name
541
+
542
+ def _split_list_text(s: str, seps: List[str]) -> List[str]:
543
+ s = s.strip()
544
+ for sep in seps:
545
+ if sep in s:
546
+ parts = [p.strip(" -โ€ข*.,;:") for p in s.split(sep)]
547
+ parts = [p for p in parts if p]
548
+ if len(parts) >= 2:
549
+ return parts
550
+ parts = [p.strip(" -โ€ข*.,;:") for p in re.split(r"[\n\r;]+", s)]
551
+ return [p for p in parts if p]
552
+
553
+ def _coerce_materials(items: List[str]) -> List[str]:
554
+ out = []
555
+ for it in items:
556
+ it = re.sub(r'\s+', ' ', it).strip(" -โ€ข*.,;:")
557
+ if not it: continue
558
+ it = re.sub(r'(\b\w+\b)(?:\s+\1){2,}', r'\1', it, flags=re.I)
559
+ if len(it) > 60: it = it[:58] + "โ€ฆ"
560
+ if not re.search(r"\d", it):
561
+ it = it + " x1"
562
+ if it.lower() not in [x.lower() for x in out]:
563
+ out.append(it)
564
+ if len(out) >= 8: break
565
+ base = ["Small gift box x1","Decorative paper x2","Twine 2 m","Cardstock sheets x2","Double-sided tape x1","Stickers x8","Ribbon 1 m","Fine-tip marker x1"]
566
+ i = 0
567
+ while len(out) < 6 and i < len(base):
568
+ if base[i].lower() not in [x.lower() for x in out]:
569
+ out.append(base[i])
570
+ i += 1
571
+ return out[:8]
572
+
573
+ def _coerce_steps(items: List[str]) -> List[str]:
574
+ out = []
575
+ for it in items:
576
+ it = it.strip(" -โ€ข*.,;:")
577
+ if not it: continue
578
+ it = re.sub(r'\s+', ' ', it)
579
+ if len(it) > 120: it = it[:118] + "โ€ฆ"
580
+ it = re.sub(r'^(?:\d+[\).\s-]*)', '', it)
581
+ it = it[0].upper() + it[1:] if it else it
582
+ out.append(it)
583
+ if len(out) >= 8: break
584
+ while len(out) < 6:
585
+ out.append(f"Refine and decorate step {len(out)+1}")
586
+ return out[:8]
587
+
588
+ def _only_int(s: str) -> Optional[int]:
589
+ m = re.search(r"-?\d+", s)
590
+ return int(m.group()) if m else None
591
+
592
+ def _clamp_num(v, lo, hi, default):
593
+ try:
594
+ x = float(v)
595
+ return int(min(max(x, lo), hi))
596
+ except:
597
+ return int((lo + hi) / 2 if default is None else default)
598
+
599
+ def _build_json(profile: Dict, fields: Dict[str, Any]) -> Dict[str, Any]:
600
+ mats = _coerce_materials(fields.get("materials_needed", []))
601
+ steps = _coerce_steps(fields.get("steps", []))
602
+ cost = _clamp_num(fields.get("estimated_cost_usd"), profile["budget_min"], profile["budget_max"], default=None)
603
+ minutes = _clamp_num(fields.get("estimated_time_minutes"), 20, 180, default=60)
604
+ return {
605
+ "gift_name": fields.get("gift_name","DIY Gift Idea").strip()[:80],
606
+ "overview": fields.get("overview","A practical handmade gift tailored to the recipient.").strip(),
607
+ "materials_needed": mats,
608
+ "steps": steps,
609
+ "estimated_cost_usd": cost,
610
+ "estimated_time_minutes": minutes,
611
+ }
612
 
613
  def diy_generate(profile: Dict) -> Tuple[dict, str]:
614
+ tok, mdl = _load_flan(DIY_MODEL_ID)
615
+ p = {
616
+ "recipient_name": profile.get("recipient_name","Recipient"),
617
+ "relationship": profile.get("relationship","Friend"),
618
+ "occ_ui": profile.get("occ_ui","Birthday"),
619
+ "occasion": profile.get("occ_ui","Birthday"),
620
+ "interests": profile.get("interests",[]),
621
+ "budget_min": int(float(profile.get("budget_min",10))),
622
+ "budget_max": int(float(profile.get("budget_max",100))),
623
+ "age_range": profile.get("age_range","any"),
624
+ "gender": profile.get("gender","any")
625
+ }
626
+ lang = "English"
627
+ ints_str = ", ".join(p["interests"]) or "general"
628
+
629
+ # 1) NAME
630
+ prompt_name = (
631
+ f"Return ONLY a DIY gift NAME in Title Case (4โ€“8 words). "
632
+ f"Must include at least one interest token from: {', '.join(sum(([it]+INTEREST_ALIASES.get(it,[]) for it in p['interests']), [])) or 'gift'}. "
633
+ f"Occasion: {p['occ_ui']}. Relationship: {p['relationship']}. Language: {lang}. "
634
+ "Forbidden: the words 'name', 'title', 'family'. No quotes, no trailing punctuation.\n"
635
+ "Examples:\n"
636
+ "Reading โ†’ Literary Candle Bookmark Kit\n"
637
+ "Technology โ†’ Gadget Cable Organizer Set\n"
638
+ "Home decor โ†’ Rustic Jar Candle Bundle\n"
639
+ "Output:"
640
+ )
641
+ raw_name = _gen(tok, mdl, prompt_name, max_new_tokens=24, do_sample=False)
642
+ name = _sanitize_name(raw_name, p["interests"])
643
+
644
+ # 2) OVERVIEW
645
+ prompt_over = (
646
+ f"Write EXACTLY 2 sentences in {lang} for a handmade gift called '{name}'. "
647
+ f"Mention {p['recipient_name']} ({p['relationship']}) and the occasion ({p['occ_ui']}). "
648
+ f"Explain how it reflects the interests: {ints_str}. "
649
+ "No lists, no emojis. Output only the two sentences."
650
+ )
651
+ overview = _gen(tok, mdl, prompt_over, max_new_tokens=80, do_sample=True, temperature=0.9, top_p=0.95)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
 
653
+ # 3) MATERIALS
654
+ prompt_mat = (
655
+ f"List 6 concise materials with quantities to make '{name}' cheaply. "
656
+ f"Keep total within {p['budget_min']}-{p['budget_max']} USD. "
657
+ "Output ONLY a comma-separated list (e.g., 'glass jar x2, soy tealights x4, ...')."
658
+ )
659
+ mats_txt = _gen(tok, mdl, prompt_mat, max_new_tokens=96, do_sample=False)
660
+ materials = _split_list_text(mats_txt, [",", ";"])
661
 
662
+ # 4) STEPS
663
+ prompt_steps = (
664
+ f"Write 6 short imperative steps to make '{name}'. "
665
+ "Output ONLY a semicolon-separated list."
666
+ )
667
+ steps_txt = _gen(tok, mdl, prompt_steps, max_new_tokens=120, do_sample=True, temperature=0.9, top_p=0.95)
668
+ steps = _split_list_text(steps_txt, [";", "\n"])
 
 
 
 
 
669
 
670
+ # 5) COST
671
+ prompt_cost = (
672
+ f"Return ONE integer total cost in USD between {p['budget_min']}-{p['budget_max']}. Output NUMBER only."
673
+ )
674
+ cost_txt = _gen(tok, mdl, prompt_cost, max_new_tokens=6, do_sample=False)
675
+ cost = _only_int(cost_txt)
676
+
677
+ # 6) MINUTES
678
+ time_txt = _gen(tok, mdl, "Return ONE integer minutes between 20 and 180. Output NUMBER only.",
679
+ max_new_tokens=6, do_sample=False)
680
+ minutes = _only_int(time_txt)
681
+
682
+ idea = _build_json(p, {
683
+ "gift_name": name,
684
+ "overview": overview,
685
+ "materials_needed": materials,
686
+ "steps": steps,
687
+ "estimated_cost_usd": cost,
688
+ "estimated_time_minutes": minutes,
689
+ })
690
+ return idea, "ok"
691
+
692
+ # --------------------- Personalized Message (FLAN, ืžื’ื•ื•ืŸ + ื•ืœื™ื“ืฆื™ื”) ---------------------
693
+ # ืžื‘ื•ืกืก ืื—ื“-ืœืื—ื“ ืขืœ ื”ืงื•ื“ ืžื”ืงื•ืœืื‘ ืฉืœืš, ืžื•ืชืื ืœืฉื™ืžื•ืฉ ื™ืฉื™ืจ ื‘ืืคืœื™ืงืฆื™ื”
694
+ MSG_MODEL_ID = "google/flan-t5-small"
695
+ MSG_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
696
+ TEMP_RANGE = (0.88, 1.10)
697
+ TOPP_RANGE = (0.90, 0.96)
698
+ REP_PENALTY = 1.12
699
+ MSG_MAX_NEW_TOKENS = 90
700
+ MSG_MAX_TRIES = 4
701
+ _last_msg: Optional[str] = None
702
+ _msg_tok, _msg_mdl = None, None
703
+
704
+ TONE_STYLES: Dict[str, Dict[str, List[str]]] = {
705
+ "Formal": {
706
+ "system": "Write 2โ€“3 refined sentences with professional courtesy and clarity.",
707
+ "rules": [
708
+ "You may begin with 'Dear {name},' but keep it concise.",
709
+ "Use precise vocabulary; avoid colloquialisms.",
710
+ "Conclude with a dignified line."
711
+ ],
712
+ },
713
+ "Casual": {
714
+ "system": "Write 2โ€“3 relaxed sentences with natural, friendly language.",
715
+ "rules": [
716
+ "Keep it light and conversational.",
717
+ "Reference one concrete interest detail.",
718
+ "End upbeat without clichรฉs."
719
+ ],
720
+ },
721
+ "Funny": {
722
+ "system": "Write 2โ€“3 witty sentences with playful humor.",
723
+ "rules": [
724
+ "Add one subtle pun linked to the occasion or interests.",
725
+ "No slapstick; keep it tasteful.",
726
+ "End with a cheeky nudge."
727
+ ],
728
+ },
729
+ "Heartfelt": {
730
+ "system": "Write 2โ€“3 warm, sincere sentences with genuine sentiment.",
731
+ "rules": [
732
+ "Open with an image or specific detail; avoid templates.",
733
+ "Let one verb carry the energy; minimal adjectives.",
734
+ "Close with a crisp, personal wish."
735
+ ],
736
+ },
737
+ "Inspirational": {
738
+ "system": "Write 2โ€“3 uplifting sentences with forward-looking energy.",
739
+ "rules": [
740
+ "Honor a trait or effort implied by the interests.",
741
+ "Use a subtle metaphor; avoid grandiose platitudes.",
742
+ "Finish with a compact, future-facing line."
743
+ ],
744
+ },
745
+ "Playful": {
746
+ "system": "Write 2โ€“3 lively sentences with bounce and rhythm.",
747
+ "rules": [
748
+ "Sneak a gentle internal rhyme or alliteration.",
749
+ "Keep syntax varied and musical.",
750
+ "Land on a spirited close."
751
+ ],
752
+ },
753
+ "Romantic": {
754
+ "system": "Write 2โ€“3 intimate sentences, warm and elegant.",
755
+ "rules": [
756
+ "Reference a shared moment or interest; keep it subtle.",
757
+ "No clichรฉs or over-sweet phrasing.",
758
+ "End with a soft, affectionate note."
759
+ ],
760
+ },
761
+ "Appreciative": {
762
+ "system": "Write 2โ€“3 sentences that express genuine appreciation.",
763
+ "rules": [
764
+ "Name a specific quality or habit tied to the interests.",
765
+ "Avoid business thank-you clichรฉs.",
766
+ "Close with concise gratitude."
767
+ ],
768
+ },
769
+ "Encouraging": {
770
+ "system": "Write 2โ€“3 supportive sentences that motivate gently.",
771
+ "rules": [
772
+ "Acknowledge progress or perseverance (hinted by interests).",
773
+ "Offer one practical, hopeful sentiment.",
774
+ "Finish with a compact encouragement."
775
+ ],
776
+ },
777
+ }
778
+ BAN_PHRASES = [
779
+ "wishing you all the best",
780
+ "may your day be filled",
781
+ "on this special day",
782
+ "from the bottom of my heart",
783
+ "best wishes on your",
784
+ "warm wishes",
785
+ ]
786
+ OPENERS = [
787
+ "Hereโ€™s to a moment that fits you perfectly:",
788
+ "A note made just for you:",
789
+ "Because you make celebrations easy to love:",
790
+ "For a day that sounds like you:",
791
+ ]
792
+ CLOSERS = [
793
+ "Enjoy every bitโ€”youโ€™ve earned it.",
794
+ "Keep doing the things that light you up.",
795
+ "Hereโ€™s to more of what makes you, you.",
796
+ "Let this be a spark for the year ahead.",
797
+ ]
798
 
799
+ def _msg_load():
800
+ global _msg_tok, _msg_mdl
801
+ if _msg_tok is None or _msg_mdl is None:
802
+ _msg_tok = AutoTokenizer.from_pretrained(MSG_MODEL_ID)
803
+ _msg_mdl = AutoModelForSeq2SeqLM.from_pretrained(MSG_MODEL_ID)
804
+ _msg_mdl.to(MSG_DEVICE).eval()
805
+ return _msg_tok, _msg_mdl
806
+
807
+ def _norm(s: str) -> str:
808
+ return re.sub(r"\s+", " ", s or "").strip()
809
+
810
+ def _sentences_n(s: str) -> int:
811
+ return len([p for p in re.split(r"(?<=[.!?])\s+", s.strip()) if p])
812
+
813
+ def _contains_any(text: str, terms: List[str]) -> bool:
814
+ t = text.lower()
815
+ return any(term and term.lower() in t for term in terms)
816
+
817
+ def _too_similar(a: str, b: str, n=3, thr=0.85) -> bool:
818
+ def ngrams(txt):
819
+ toks = re.findall(r"[a-zA-Z']+", txt.lower())
820
+ return set(tuple(toks[i:i+n]) for i in range(max(0, len(toks)-n+1)))
821
+ A, B = ngrams(a), ngrams(b)
822
+ if not A or not B: return False
823
+ j = len(A & B) / max(1, len(A | B))
824
+ return j >= thr
825
+
826
+ def _clean_occasion(occ: str) -> str:
827
+ return (occ or "").replace("โ€™","'").strip()
828
+
829
+ def _build_prompt(profile: Dict[str, Any]) -> Tuple[str, Dict[str,str]]:
830
+ name = profile.get("recipient_name", "Friend")
831
+ rel = profile.get("relationship", "Friend")
832
+ occ = _clean_occasion(profile.get("occ_ui") or profile.get("occasion") or "Birthday")
833
+ tone = profile.get("tone", "Heartfelt")
834
+ ints = ", ".join(profile.get("interests", [])) or "general interests"
835
+
836
+ style = TONE_STYLES.get(tone, TONE_STYLES["Heartfelt"])
837
+ opener = random.choice(OPENERS)
838
+ closer = random.choice(CLOSERS)
839
+ spice = random.choice([
840
+ "Use one concrete visual detail.",
841
+ "Shift the rhythm slightly in the second sentence.",
842
+ "Let one verb carry most of the energy; keep adjectives minimal.",
843
+ "Add a gentle internal rhyme."
844
+ ])
845
 
846
+ lines = [
847
+ "You are a skilled copywriter. Generate a short gift-card message in English (2โ€“3 sentences).",
848
+ f"Recipient: {name} ({rel}). Occasion: {occ}. Interests: {ints}. Tone: {tone}.",
849
+ style["system"],
850
+ "Rules:",
851
+ *[f"- {r}" for r in style["rules"]],
852
+ f"- Avoid clichรฉs such as: {', '.join(BAN_PHRASES)}.",
853
+ "- No emojis. No bullet points.",
854
+ "- Do not start with 'Dear' unless Tone is Formal.",
855
+ f"- Start with: \"{opener}\" (continue naturally, not as a header).",
856
+ f"- End with a natural line similar to: \"{closer}\" (rephrase; do not quote).",
857
+ f"- {spice}",
858
+ "Output only the message; no extra commentary.",
859
+ ]
860
+ return "\n".join(lines), dict(name=name, occ=occ)
861
+
862
+ @torch.inference_mode()
863
+ def generate_personal_message(profile: Dict[str, Any], seed: Optional[int]=None, previous_message: Optional[str]=None) -> Dict[str, Any]:
864
+ global _last_msg
865
+ tok, mdl = _msg_load()
866
+ if seed is None:
867
+ seed = random.randint(1, 10_000_000)
868
+
869
+ tried = []
870
+ for attempt in range(1, MSG_MAX_TRIES+1):
871
+ random.seed(seed); torch.manual_seed(seed)
872
+ prompt, need = _build_prompt(profile)
873
+ temp = random.uniform(*TEMP_RANGE)
874
+ topp = random.uniform(*TOPP_RANGE)
875
+
876
+ enc = tok(prompt, truncation=True, max_length=512, return_tensors="pt").to(MSG_DEVICE)
877
+ out_ids = mdl.generate(
878
+ **enc,
879
+ do_sample=True,
880
+ temperature=temp,
881
+ top_p=topp,
882
+ max_new_tokens=MSG_MAX_NEW_TOKENS,
883
+ repetition_penalty=REP_PENALTY,
884
+ pad_token_id=tok.eos_token_id,
885
+ eos_token_id=tok.eos_token_id,
886
+ )
887
+ text = _norm(tok.decode(out_ids[0], skip_special_tokens=True))
888
+
889
+ ok_len = 1 <= _sentences_n(text) <= 3
890
+ name_ok = _contains_any(text, [need["name"]])
891
+ occ_ok = _contains_any(text, [need["occ"], need["occ"].split()[0]])
892
+ ban_ok = not _contains_any(text, BAN_PHRASES)
893
+ prev = previous_message or _last_msg
894
+ dup_ok = (prev is None) or (not _too_similar(text, prev, n=3, thr=0.85))
895
+
896
+ if all([ok_len, name_ok, occ_ok, ban_ok, dup_ok]):
897
+ _last_msg = text
898
+ return {"message": text, "meta": {"tone": profile.get("tone","Heartfelt"),
899
+ "temperature": round(temp,2), "top_p": round(topp,2),
900
+ "seed": seed, "attempt": attempt, "model": MSG_MODEL_ID}}
901
+ tried.append({"text": text}); seed += 17
902
+
903
+ fallback = tried[-1]["text"] if tried else f"Happy {(_clean_occasion(profile.get('occ_ui') or 'day')).lower()}, {profile.get('recipient_name','Friend')}!"
904
+ _last_msg = fallback
905
+ return {"message": fallback, "meta": {"failed": True, "model": MSG_MODEL_ID, "tone": profile.get("tone","Heartfelt")}}
906
 
907
  # --------------------- Rendering ---------------------
 
908
  def md_escape(text: str) -> str:
909
  return str(text).replace("|","\\|").replace("*","\\*").replace("_","\\_")
910
 
911
+ def first_sentence(s: str, max_chars: int = 140) -> str:
912
+ s = (s or "").strip()
913
+ if not s: return ""
914
+ cut = s.split(". ")[0]
915
+ if len(cut) > max_chars: cut = cut[:max_chars-1] + "โ€ฆ"
916
+ return cut
917
+
918
+ def render_top3_html(df: pd.DataFrame, age_label: str) -> str:
919
  if df is None or df.empty:
920
+ return "<em>No results found within the current filters.</em>"
921
  rows = []
922
  for _, r in df.iterrows():
923
  name = md_escape(r.get("name",""))
924
+ desc = md_escape(first_sentence(r.get("short_desc","")))
925
  price = r.get("price_usd")
926
  sim = r.get("similarity")
 
927
  img = r.get("image_url","") or ""
928
  price_str = f"${price:.0f}" if pd.notna(price) else "N/A"
929
  sim_str = f"{sim:.3f}" if pd.notna(sim) else "โ€”"
 
934
  <div style="font-weight:700;">{name}</div>
935
  <div style="font-size:0.95em;margin-top:4px;">{desc}</div>
936
  <div style="font-size:0.9em;margin-top:6px;opacity:0.8;">
937
+ Price: <b>{price_str}</b> ยท Age: <code>{age_label}</code> ยท Score: <code>{sim_str}</code>
938
  </div>
939
  </div>
940
  {img_html}
 
943
  rows.append(card)
944
  return "\n".join(rows)
945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  # --------------------- Gradio UI ---------------------
947
+ with gr.Blocks(
948
+ title="๐ŸŽ GIfty โ€” Recommender + DIY",
949
+ css="""
950
+ #explain {opacity:.85;font-size:.92em;margin-bottom:8px;}
951
+ /* ื”ืกืชืจืช ืžืกื’ืจืช/ื”ื™ื™ืœื™ื™ื˜ ื›ืชื•ืžื™ื ืฉืœ ื˜ื‘ืœืช ื”ื“ื•ื’ืžืื•ืช */
952
+ .handsontable .wtBorder, .handsontable .htBorders, .handsontable .wtBorder.current { display: none !important; }
953
+ .gr-dataframe table td:focus { outline: none !important; box-shadow: none !important; }
954
  """
955
+ ) as demo:
 
956
  gr.Markdown(TITLE)
957
 
958
+ # ===== EXAMPLES TABLE (Top, clickable) =====
959
+ gr.Markdown("### Quick examples (click a row to auto-fill)", elem_id="explain")
960
+
961
+ EXAMPLES = [
962
+ # interests, occasion, bmin, bmax, name, relationship, age_label, gender, tone
963
+ (["Technology","Movies"], "Birthday", 25, 45, "Daniel", "Friend", "adult (18โ€“64)", "male", "Funny"),
964
+ (["Art","Reading","Home decor"], "Anniversary", 30, 60, "Rotem", "Romantic partner", "adult (18โ€“64)", "female", "Romantic"),
965
+ (["Gaming","Photography"], "Birthday", 30,120, "Omer", "Family - Sibling", "teen (13โ€“17)", "male", "Playful"),
966
+ (["Reading","Art"], "Graduation", 15, 35, "Maya", "Friend", "adult (18โ€“64)", "female", "Heartfelt"),
967
+ (["Science","Crafts"], "Holidays", 15, 30, "Adam", "Family - Child", "kid (3โ€“12)", "any", "Encouraging"),
968
+ ]
969
+ EX_COLS = ["Recipient","Relationship","Interests","Occasion","Age group","Gender","Min $","Max $","Tone"]
970
+ EX_DF = pd.DataFrame([
971
+ [name, rel, " + ".join(interests), occ, age, gender, bmin, bmax, tone]
972
+ for (interests, occ, bmin, bmax, name, rel, age, gender, tone) in EXAMPLES
973
+ ], columns=EX_COLS)
974
+
975
+ ex_df = gr.Dataframe(value=EX_DF, interactive=False, wrap=True)
976
+ gr.Markdown("---")
977
+
978
+ # === Inputs (always open) ===
979
+ with gr.Row():
980
+ recipient_name = gr.Textbox(label="Recipient name", value="Daniel")
981
+ relationship = gr.Dropdown(label="Relationship", choices=RECIPIENT_RELATIONSHIPS, value="Friend")
982
+ with gr.Row():
983
+ occasion = gr.Dropdown(label="Occasion", choices=OCCASION_UI, value="Birthday")
984
+ age = gr.Dropdown(label="Age group", choices=list(AGE_OPTIONS.keys()), value="adult (18โ€“64)")
985
+ gender = gr.Dropdown(label="Recipient gender", choices=GENDER_OPTIONS, value="male")
986
+ interests = gr.CheckboxGroup(label="Interests (select a few)", choices=INTEREST_OPTIONS,
987
+ value=["Technology","Movies"], interactive=True)
988
+ with gr.Row():
989
+ budget_min = gr.Slider(label="Min budget (USD)", minimum=5, maximum=500, step=1, value=25)
990
+ budget_max = gr.Slider(label="Max budget (USD)", minimum=5, maximum=500, step=1, value=45)
991
+ tone = gr.Dropdown(label="Message tone", choices=MESSAGE_TONES, value="Funny")
992
+
993
+ # Action button and outputs
994
+ go = gr.Button("Get GIfty")
995
+ gr.Markdown("### ๐ŸŽฏ Recommendations")
996
+ out_top3 = gr.HTML()
997
+ gr.Markdown("### ๐Ÿ› ๏ธ DIY Gift")
998
+ out_diy_md = gr.Markdown()
999
+ gr.Markdown("### ๐Ÿ’Œ Personalized Message")
1000
+ out_msg = gr.Markdown()
1001
+
1002
+ # ---- row click handler (fill form) ----
1003
+ def _on_example_select(evt: gr.SelectData):
1004
+ r = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
1005
+ if isinstance(r, (list, tuple)): r = r[0]
1006
+ r = int(r)
1007
+ row = EX_DF.iloc[r]
1008
+ ints = [s.strip() for s in str(row["Interests"]).split("+")]
1009
+ return (
1010
+ ints, row["Occasion"], int(row["Min $"]), int(row["Max $"]),
1011
+ row["Recipient"], row["Relationship"], row["Age group"], row["Gender"], row["Tone"]
1012
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1013
 
1014
+ ex_df.select(
1015
+ _on_example_select,
1016
+ outputs=[interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone]
1017
+ )
1018
+
1019
+ # ---- UI predict ----
1020
+ def render_diy_md(j: dict) -> str:
1021
+ if not j: return "_DIY generation failed._"
1022
+ steps = j.get('step_by_step_instructions', j.get('steps', []))
1023
+ parts = [
1024
+ f"**{j.get('gift_name','(no name)')}**",
1025
+ "",
1026
+ j.get("overview","").strip(),
1027
+ "",
1028
+ "**Materials**",
1029
+ "\n".join(f"- {m}" for m in j.get("materials_needed", [])),
1030
+ "",
1031
+ "**Steps**",
1032
+ "\n".join(f"{i+1}. {s}" for i, s in enumerate(steps)),
1033
+ "",
1034
+ f"**Estimated cost:** ${j.get('estimated_cost_usd','?')} ยท **Time:** {j.get('estimated_time_minutes','?')} min"
1035
+ ]
1036
+ return "\n".join(parts)
1037
+
1038
+ def ui_predict(interests_list, occasion_val, bmin, bmax, name, rel, age_label, gender_val, tone_val):
1039
+ try:
1040
+ bmin = float(bmin); bmax = float(bmax)
1041
+ except Exception:
1042
+ bmin, bmax = 5.0, 500.0
1043
  if bmin > bmax: bmin, bmax = bmax, bmin
1044
 
 
1045
  profile = {
1046
+ "recipient_name": name or "Friend",
1047
+ "relationship": rel or "Friend",
1048
  "interests": interests_list or [],
1049
  "occ_ui": occasion_val or "Birthday",
1050
  "budget_min": bmin,
1051
  "budget_max": bmax,
1052
+ "age_range": AGE_OPTIONS.get(age_label, "any"),
1053
+ "gender": (gender_val or "any").lower(),
 
1054
  "tone": tone_val or "Heartfelt",
1055
  }
1056
 
1057
+ top3 = recommend_top3_budget_first(profile)
1058
+ top3_html = render_top3_html(top3, age_label)
 
1059
 
1060
+ diy_json, _status = diy_generate(profile)
 
1061
  diy_md = render_diy_md(diy_json)
1062
 
1063
+ msg_obj = generate_personal_message(profile)
1064
+ msg = msg_obj["message"]
 
 
 
1065
 
1066
+ return top3_html, diy_md, msg
1067
 
1068
+ go.click(
1069
+ ui_predict,
1070
+ [interests, occasion, budget_min, budget_max, recipient_name, relationship, age, gender, tone],
1071
+ [out_top3, out_diy_md, out_msg]
1072
+ )
 
 
 
 
 
 
 
1073
 
1074
  if __name__ == "__main__":
1075
  demo.launch()