rairo commited on
Commit
2c5e6a5
·
verified ·
1 Parent(s): add7275

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +95 -100
main.py CHANGED
@@ -1,12 +1,11 @@
1
  """
2
- main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v2.6)
3
 
4
- ✅ Fixed: Search Regression (Now searches Brand, Category, & Description).
5
- Feature: Store Preference Detection ("Price at OK Mart?").
6
- ✅ Logic: Single Item (Best First) vs Basket (Cheapest Total).
7
  ✅ "Analyst Engine": Enhanced Data Flattening & Comparison Logic.
8
  ✅ "Visual Engine": Lists, Products, & Meal-to-Recipe recognition.
9
- ✅ Memory Logic: Short-Term Sliding Window.
10
 
11
  ENV VARS:
12
  - GOOGLE_API_KEY=...
@@ -115,7 +114,7 @@ app = Flask(__name__)
115
  CORS(app)
116
 
117
  # =========================
118
- # 1. ETL Layer (Ingestion - Deep Flattening)
119
  # =========================
120
 
121
  def _norm(s: Any) -> str:
@@ -140,10 +139,6 @@ def _safe_json_loads(s: str, fallback: Any):
140
  return fallback
141
 
142
  def fetch_and_flatten_data() -> pd.DataFrame:
143
- """
144
- Fetches product data and creates a 'search_vector' for deep fuzzy matching.
145
- Includes: Name, Brand, Category Strings.
146
- """
147
  all_products = []
148
  page = 1
149
 
@@ -174,19 +169,15 @@ def fetch_and_flatten_data() -> pd.DataFrame:
174
  p_id = int(p.get("id") or 0)
175
  p_name = str(p.get("name") or "Unknown")
176
 
177
- # --- Deep Metadata Extraction ---
178
  brand_obj = p.get("brand") or {}
179
  brand_name = str(brand_obj.get("brand_name") or "")
180
 
181
- # Extract ALL category names (parent, sub, etc.)
182
  cats = p.get("categories") or []
183
  cat_names = [str(c.get("name") or "") for c in cats]
184
  cat_str = " ".join(cat_names)
185
-
186
- # Base Category (for grouping)
187
  primary_cat = cat_names[0] if cat_names else "General"
188
 
189
- # Create a Search Vector: "Top Chef Jasmine Rice Rice & Pasta Groceries"
190
  search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
191
 
192
  views = int(p.get("view_count") or 0)
@@ -195,11 +186,10 @@ def fetch_and_flatten_data() -> pd.DataFrame:
195
  prices = p.get("prices") or []
196
 
197
  if not prices:
198
- # No Price? Still index for "Out of Stock" awareness
199
  rows.append({
200
  "product_id": p_id,
201
  "product_name": p_name,
202
- "search_vector": search_vector, # KEY UPGRADE
203
  "brand": brand_name,
204
  "category": primary_cat,
205
  "retailer": "Listing",
@@ -219,7 +209,7 @@ def fetch_and_flatten_data() -> pd.DataFrame:
219
  rows.append({
220
  "product_id": p_id,
221
  "product_name": p_name,
222
- "search_vector": search_vector, # KEY UPGRADE
223
  "brand": brand_name,
224
  "category": primary_cat,
225
  "retailer": r_name,
@@ -246,21 +236,18 @@ def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
246
  return _data_cache["df"]
247
 
248
  # =========================
249
- # 2. Analyst Engine (Smart Search & Logic)
250
  # =========================
251
 
252
  def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
253
- """
254
- Searches against the 'search_vector' (Name + Brand + Categories).
255
- """
256
  if df.empty or not query: return df
257
  q_norm = _norm(query)
258
 
259
- # 1. Direct match in vector
260
  mask = df['search_vector'].str.contains(q_norm, regex=False)
261
  matches = df[mask].copy()
262
 
263
- # 2. Token overlap fallback (if query is "Cheap Rice", matches "Rice")
264
  if matches.empty:
265
  q_tokens = set(q_norm.split())
266
  def token_score(text):
@@ -275,26 +262,13 @@ def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.Da
275
 
276
  if matches.empty: return matches
277
 
278
- # 3. Sort: Views (Popularity) -> Price (Low)
279
  matches = matches.sort_values(by=['views', 'price'], ascending=[False, True])
280
  return matches.head(limit)
281
 
282
- def detect_retailer_preference(query: str) -> Optional[str]:
283
- """Detects if user asked for a specific store."""
284
- query = query.lower()
285
- # Hardcoded known retailers for robustness
286
- known_stores = ["ok mart", "ok supermarket", "tm pick n pay", "pick n pay", "spar", "food lovers", "choppies", "gains"]
287
- for store in known_stores:
288
- if store in query:
289
- return store # Return the detected string to match loosely
290
- return None
291
-
292
  def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
293
  """
294
- The Core Logic:
295
- - Single Item: Returns 'Best Option' + 'Others'.
296
- - Basket: Returns 'Best Basket' + 'Breakdown'.
297
- - Preference: Filters for specific store if requested.
298
  """
299
  df = get_market_index()
300
  if df.empty:
@@ -303,7 +277,7 @@ def calculate_basket_optimization(item_names: List[str], preferred_retailer: str
303
  found_items = []
304
  missing_global = []
305
 
306
- # 1. Resolve Items
307
  for item in item_names:
308
  hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
309
 
@@ -311,58 +285,83 @@ def calculate_basket_optimization(item_names: List[str], preferred_retailer: str
311
  missing_global.append(item)
312
  continue
313
 
314
- # Group hits by Product Name to aggregate offers
315
- # We take the most popular product match
316
- best_product_name = hits.iloc[0]['product_name']
317
- product_offers = hits[hits['product_name'] == best_product_name]
318
 
319
- # Sort offers: Price Ascending
320
- product_offers = product_offers.sort_values('price', ascending=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  offers_list = []
323
  for _, r in product_offers.iterrows():
324
- offers_list.append({
325
- "retailer": r['retailer'],
326
- "price": float(r['price'])
327
- })
328
 
329
  found_items.append({
330
  "query": item,
331
- "product_name": best_product_name,
332
- "category": str(hits.iloc[0]['category']),
333
- "offers": offers_list, # All available prices for this item
334
- "best_price": offers_list[0]['price'],
335
- "best_retailer": offers_list[0]['retailer']
336
  })
337
 
338
  if not found_items:
339
  return {"actionable": True, "found_items": [], "global_missing": missing_global}
340
 
341
- # 2. Logic: Single vs Multi
342
- is_basket = len(found_items) > 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- result = {
345
  "actionable": True,
346
- "is_basket": is_basket,
347
  "found_items": found_items,
348
  "global_missing": missing_global,
 
 
349
  "preferred_retailer": preferred_retailer
350
  }
351
 
352
- # 3. Store Preference Logic (User asked: "Rice at OK Mart?")
353
- if preferred_retailer and not is_basket:
354
- item = found_items[0]
355
- # Find the offer from the preferred store
356
- pref_offer = next((o for o in item['offers'] if preferred_retailer.lower() in o['retailer'].lower()), None)
357
- result['preferred_offer'] = pref_offer
358
- result['comparison_vs_best'] = None
359
-
360
- if pref_offer:
361
- diff = pref_offer['price'] - item['best_price']
362
- result['comparison_vs_best'] = diff # +ve means preferred is expensive, 0 means best
363
-
364
- return result
365
-
366
  def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
367
  remaining = amount_usd / 1.06
368
  units = 0.0
@@ -489,32 +488,32 @@ def gemini_chat_response(transcript: str, intent: Dict, analyst_data: Dict, chat
489
  PROMPT = f"""
490
  You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
491
  Role: Intelligent Shopping Companion.
492
- Goal: Shortest path to value. Give answers, not promises.
493
 
494
  INPUT: "{transcript}"
495
  INTENT: {intent.get('intent')}
496
  CONTEXT:
497
  {context_str}
498
 
499
- LOGIC RULES (Strict Adherence):
500
 
501
- 1. **SINGLE ITEM QUERY** (e.g. "Price of Rice"):
502
- - **Primary**: State the CHEAPEST option immediately. "I found [Product] at [Retailer] for **$[Price]**."
503
- - **Comparison**: List 1-2 other options. "Also available at [Store B] ($X) and [Store C] ($Y)."
504
- - **Store Preference**: If user asked "Rice at OK Mart?", state that price FIRST, then say if it's cheaper elsewhere.
505
-
506
- 2. **BASKET QUERY** (e.g. "Rice, Oil, and Soap"):
507
- - Provide the **Total Basket Cost** at the cheapest single store.
508
- - Provide the Breakdown.
509
- - Mention if splitting stores saves significant money.
 
 
 
510
 
511
- 3. **MISSING ITEMS**:
512
- - Be honest. "I couldn't find a current price for [Item]."
513
-
514
  4. **CASUAL**:
515
- - Reset context if user says "Hi".
516
 
517
- TONE: Helpful, direct, Zimbabwean. Use Markdown for prices.
518
  """
519
 
520
  try:
@@ -535,9 +534,9 @@ def gemini_generate_4step_plan(transcript: str, analyst_result: Dict) -> str:
535
  DATA: {json.dumps(analyst_result, indent=2, default=str)}
536
  SECTIONS:
537
  1. **Catalogue Found ✅** (Table: Item | Store | Price)
538
- 2. **Missing 😔** (Estimates)
539
- 3. **Recommendation 💡**
540
- 4. **Budget Tips**
541
  """
542
  try:
543
  resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
@@ -556,7 +555,7 @@ def health():
556
  "ok": True,
557
  "offers_indexed": len(df),
558
  "api_source": PRICE_API_BASE,
559
- "persona": "Jessica v2.6 (Deep Search)"
560
  })
561
 
562
  @app.post("/chat")
@@ -581,12 +580,8 @@ def chat():
581
  intent_data = gemini_detect_intent(msg)
582
  intent_type = intent_data.get("intent", "CASUAL_CHAT")
583
  items = intent_data.get("items", [])
584
- store_pref = intent_data.get("store_preference") # Extracted from Gemini
585
 
586
- # Store Preference Override (RegEx backup)
587
- if not store_pref:
588
- store_pref = detect_retailer_preference(msg)
589
-
590
  analyst_data = {}
591
 
592
  if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
@@ -630,7 +625,7 @@ def analyze_image():
630
  analyst_data = {}
631
 
632
  if img_type == "IRRELEVANT" and not items:
633
- prompt = f"User uploaded photo of {description}. Compliment it if appropriate (pet/nature), then explain you are a shopping bot."
634
  response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
635
 
636
  elif items:
@@ -656,7 +651,7 @@ def analyze_image():
656
 
657
  @app.post("/api/call-briefing")
658
  def call_briefing():
659
- # ... (Same as before, abbreviated for length but logic remains)
660
  body = request.get_json(silent=True) or {}
661
  pid = body.get("profile_id")
662
  username = body.get("username")
 
1
  """
2
+ main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v2.7)
3
 
4
+ ✅ Fixed: Basket Comparison (Compares totals across ALL stores, showing missing items).
5
+ Fixed: Brand Loyalty (Explicitly states if exact brand is missing & suggests closest).
6
+ ✅ Logic: "Market Matrix" calculates basket cost for every retailer found.
7
  ✅ "Analyst Engine": Enhanced Data Flattening & Comparison Logic.
8
  ✅ "Visual Engine": Lists, Products, & Meal-to-Recipe recognition.
 
9
 
10
  ENV VARS:
11
  - GOOGLE_API_KEY=...
 
114
  CORS(app)
115
 
116
  # =========================
117
+ # 1. ETL Layer (Deep Search Indexing)
118
  # =========================
119
 
120
  def _norm(s: Any) -> str:
 
139
  return fallback
140
 
141
  def fetch_and_flatten_data() -> pd.DataFrame:
 
 
 
 
142
  all_products = []
143
  page = 1
144
 
 
169
  p_id = int(p.get("id") or 0)
170
  p_name = str(p.get("name") or "Unknown")
171
 
 
172
  brand_obj = p.get("brand") or {}
173
  brand_name = str(brand_obj.get("brand_name") or "")
174
 
 
175
  cats = p.get("categories") or []
176
  cat_names = [str(c.get("name") or "") for c in cats]
177
  cat_str = " ".join(cat_names)
 
 
178
  primary_cat = cat_names[0] if cat_names else "General"
179
 
180
+ # Deep Search Vector
181
  search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
182
 
183
  views = int(p.get("view_count") or 0)
 
186
  prices = p.get("prices") or []
187
 
188
  if not prices:
 
189
  rows.append({
190
  "product_id": p_id,
191
  "product_name": p_name,
192
+ "search_vector": search_vector,
193
  "brand": brand_name,
194
  "category": primary_cat,
195
  "retailer": "Listing",
 
209
  rows.append({
210
  "product_id": p_id,
211
  "product_name": p_name,
212
+ "search_vector": search_vector,
213
  "brand": brand_name,
214
  "category": primary_cat,
215
  "retailer": r_name,
 
236
  return _data_cache["df"]
237
 
238
  # =========================
239
+ # 2. Analyst Engine (Matrix & Fallbacks)
240
  # =========================
241
 
242
  def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
 
 
 
243
  if df.empty or not query: return df
244
  q_norm = _norm(query)
245
 
246
+ # 1. Exact/Partial Vector Match
247
  mask = df['search_vector'].str.contains(q_norm, regex=False)
248
  matches = df[mask].copy()
249
 
250
+ # 2. Token Overlap Fallback
251
  if matches.empty:
252
  q_tokens = set(q_norm.split())
253
  def token_score(text):
 
262
 
263
  if matches.empty: return matches
264
 
 
265
  matches = matches.sort_values(by=['views', 'price'], ascending=[False, True])
266
  return matches.head(limit)
267
 
 
 
 
 
 
 
 
 
 
 
268
  def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
269
  """
270
+ Generates a FULL MARKET MATRIX.
271
+ Returns best store, plus how EVERY other store performed.
 
 
272
  """
273
  df = get_market_index()
274
  if df.empty:
 
277
  found_items = []
278
  missing_global = []
279
 
280
+ # 1. Resolve Items & Check Brand Fidelity
281
  for item in item_names:
282
  hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
283
 
 
285
  missing_global.append(item)
286
  continue
287
 
288
+ best_match = hits.iloc[0]
 
 
 
289
 
290
+ # --- Brand Fidelity Check ---
291
+ # Did the user ask for "Top Chef" but we got "Mega Basmati"?
292
+ q_norm = _norm(item)
293
+ res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
294
+
295
+ # Simple heuristic: If query has 2+ words, and <50% of them are in result, it's a sub.
296
+ q_tokens = q_norm.split()
297
+ is_substitute = False
298
+ if len(q_tokens) > 1:
299
+ found_tokens = sum(1 for t in q_tokens if t in res_norm)
300
+ if found_tokens < len(q_tokens) / 2: # Loose threshold
301
+ is_substitute = True
302
+
303
+ # Aggregate all offers for this specific product ID
304
+ product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
305
 
306
  offers_list = []
307
  for _, r in product_offers.iterrows():
308
+ offers_list.append({"retailer": r['retailer'], "price": float(r['price'])})
 
 
 
309
 
310
  found_items.append({
311
  "query": item,
312
+ "product_name": str(best_match['product_name']),
313
+ "is_substitute": is_substitute, # KEY FEATURE
314
+ "offers": offers_list,
315
+ "best_price": offers_list[0]['price']
 
316
  })
317
 
318
  if not found_items:
319
  return {"actionable": True, "found_items": [], "global_missing": missing_global}
320
 
321
+ # 2. MARKET MATRIX (Comparison across all stores)
322
+ # Get unique retailers involved in these products
323
+ all_involved_retailers = set()
324
+ for f in found_items:
325
+ for o in f['offers']:
326
+ all_involved_retailers.add(o['retailer'])
327
+
328
+ store_comparison = []
329
+
330
+ for retailer in all_involved_retailers:
331
+ total_price = 0.0
332
+ found_count = 0
333
+ missing_in_store = []
334
+
335
+ for item in found_items:
336
+ # Find price at this retailer
337
+ price = next((o['price'] for o in item['offers'] if o['retailer'] == retailer), None)
338
+ if price:
339
+ total_price += price
340
+ found_count += 1
341
+ else:
342
+ missing_in_store.append(item['product_name'])
343
+
344
+ store_comparison.append({
345
+ "retailer": retailer,
346
+ "total_price": total_price,
347
+ "found_count": found_count,
348
+ "total_items": len(found_items),
349
+ "missing_items": missing_in_store
350
+ })
351
+
352
+ # Sort Matrix: Most Items Found -> Lowest Price
353
+ store_comparison.sort(key=lambda x: (-x['found_count'], x['total_price']))
354
 
355
+ return {
356
  "actionable": True,
357
+ "is_basket": len(found_items) > 1,
358
  "found_items": found_items,
359
  "global_missing": missing_global,
360
+ "market_matrix": store_comparison[:4], # Top 4 comparison
361
+ "best_store": store_comparison[0] if store_comparison else None,
362
  "preferred_retailer": preferred_retailer
363
  }
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
366
  remaining = amount_usd / 1.06
367
  units = 0.0
 
488
  PROMPT = f"""
489
  You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
490
  Role: Intelligent Shopping Companion.
491
+ Goal: Shortest path to value. Complete Transparency.
492
 
493
  INPUT: "{transcript}"
494
  INTENT: {intent.get('intent')}
495
  CONTEXT:
496
  {context_str}
497
 
498
+ LOGIC RULES:
499
 
500
+ 1. **BASKET COMPARISON (Transparency)**:
501
+ - If `market_matrix` has multiple stores, **COMPARE THEM**.
502
+ - Example: "Spar is **$6.95** (All items). OK Mart is **$4.00**, but misses Cooking Oil."
503
+ - Don't just show the winner. Show the ecosystem.
504
+
505
+ 2. **BRAND LOYALTY (Graceful Fallback)**:
506
+ - If `is_substitute` is TRUE for an item, say:
507
+ "I couldn't find **[Query Brand]** exactly, so I've used **[Found Product]** ($Price) as a placeholder."
508
+ - Be honest about brand mismatches.
509
+
510
+ 3. **SINGLE ITEMS**:
511
+ - Best price first, then list 1-2 others.
512
 
 
 
 
513
  4. **CASUAL**:
514
+ - Reset if user says "Hi".
515
 
516
+ TONE: Helpful, direct, Zimbabwean. Use Markdown.
517
  """
518
 
519
  try:
 
534
  DATA: {json.dumps(analyst_result, indent=2, default=str)}
535
  SECTIONS:
536
  1. **Catalogue Found ✅** (Table: Item | Store | Price)
537
+ 2. **Missing/Substitutes ⚠️** (Be clear about brand swaps)
538
+ 3. **Store Comparison 📊** (List the Top 3 stores totals)
539
+ 4. **Recommendation 💡**
540
  """
541
  try:
542
  resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
 
555
  "ok": True,
556
  "offers_indexed": len(df),
557
  "api_source": PRICE_API_BASE,
558
+ "persona": "Jessica v2.7 (Matrix & Loyalty)"
559
  })
560
 
561
  @app.post("/chat")
 
580
  intent_data = gemini_detect_intent(msg)
581
  intent_type = intent_data.get("intent", "CASUAL_CHAT")
582
  items = intent_data.get("items", [])
583
+ store_pref = intent_data.get("store_preference")
584
 
 
 
 
 
585
  analyst_data = {}
586
 
587
  if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
 
625
  analyst_data = {}
626
 
627
  if img_type == "IRRELEVANT" and not items:
628
+ prompt = f"User uploaded photo of {description}. Compliment it if appropriate, then explain you are a shopping bot."
629
  response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
630
 
631
  elif items:
 
651
 
652
  @app.post("/api/call-briefing")
653
  def call_briefing():
654
+ # ... (Same as before)
655
  body = request.get_json(silent=True) or {}
656
  pid = body.get("profile_id")
657
  username = body.get("username")