rairo commited on
Commit
540c3fc
·
verified ·
1 Parent(s): 9f8e288

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +73 -42
main.py CHANGED
@@ -1,10 +1,10 @@
1
  """
2
- main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v3.0)
3
 
4
- ✅ Feature: "Concept Exploder" (Converts "Plan a Braai" -> Shopping List).
5
- ✅ Feature: "Hybrid Valuation" (Estimates prices for missing items in Plans).
6
- ✅ Feature: "Market Intelligence" (Pre-calculated Voice Context).
7
- ✅ UI Match: Restored v1 Markdown Tables & Creative Tips.
8
  ✅ Core: Deep Vector Search + Market Matrix + Store Preferences.
9
 
10
  ENV VARS:
@@ -236,39 +236,60 @@ def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
236
  return _data_cache["df"]
237
 
238
  # =========================
239
- # 2. Analyst Engine (Matrix & Calculations)
240
  # =========================
241
 
242
  def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
 
 
 
 
 
 
 
243
  if df.empty or not query: return df
244
  q_norm = _norm(query)
 
245
 
246
- # 1. Exact/Partial Vector Match
247
- mask = df['search_vector'].str.contains(q_norm, regex=False)
248
- matches = df[mask].copy()
249
-
250
- # 2. Token Overlap Fallback
251
- if matches.empty:
252
- q_tokens = set(q_norm.split())
253
- def token_score(text):
254
- if not isinstance(text, str): return 0
255
- text_tokens = set(text.split())
256
- if not text_tokens: return 0
257
- return len(q_tokens.intersection(text_tokens))
 
 
 
 
 
 
 
 
258
 
259
- df_scored = df.copy()
260
- df_scored['score'] = df_scored['search_vector'].apply(token_score)
261
- matches = df_scored[df_scored['score'] > 0]
262
 
 
 
 
 
 
 
263
  if matches.empty: return matches
264
 
265
- matches = matches.sort_values(by=['views', 'price'], ascending=[False, True])
 
 
266
  return matches.head(limit)
267
 
268
  def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
269
  """
270
- Generates a FULL MARKET MATRIX.
271
- Returns best store, plus how EVERY other store performed.
272
  """
273
  df = get_market_index()
274
  if df.empty:
@@ -291,11 +312,13 @@ def calculate_basket_optimization(item_names: List[str], preferred_retailer: str
291
  q_norm = _norm(item)
292
  res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
293
  q_tokens = q_norm.split()
 
294
  is_substitute = False
295
- if len(q_tokens) > 1:
296
- found_tokens = sum(1 for t in q_tokens if t in res_norm)
297
- if found_tokens < len(q_tokens) / 2:
298
- is_substitute = True
 
299
 
300
  # Aggregate all offers
301
  product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
@@ -385,7 +408,7 @@ def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
385
  }
386
 
387
  # =========================
388
- # 3. Gemini Helpers (The Intelligence)
389
  # =========================
390
 
391
  def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
@@ -401,10 +424,11 @@ def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
401
  - EVENT_PLANNING: "Plan a braai", "Wedding list", "Dinner for 5" (Implicit lists).
402
 
403
  Extract:
404
- - items: list of specific products found.
405
  - utility_amount: number
406
  - store_preference: if a specific store is named (e.g. "at OK Mart").
407
  - is_event_planning: boolean (true if user asks to plan an event but lists no items).
 
408
 
409
  JSON Schema:
410
  {
@@ -413,7 +437,8 @@ def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
413
  "items": ["string"],
414
  "utility_amount": number,
415
  "store_preference": "string",
416
- "is_event_planning": boolean
 
417
  }
418
  """
419
  try:
@@ -422,10 +447,10 @@ def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
422
  contents=PROMPT + "\nTranscript: " + transcript,
423
  config=types.GenerateContentConfig(response_mime_type="application/json")
424
  )
425
- return _safe_json_loads(resp.text, {"actionable": False, "intent": "CASUAL_CHAT"})
426
  except Exception as e:
427
  logger.error(f"Intent Detect Error: {e}")
428
- return {"actionable": False, "intent": "CASUAL_CHAT"}
429
 
430
  def gemini_explode_concept(transcript: str) -> List[str]:
431
  """
@@ -436,7 +461,7 @@ def gemini_explode_concept(transcript: str) -> List[str]:
436
  PROMPT = f"""
437
  User wants to plan an event: "{transcript}".
438
  Generate a STRICT list of 10-15 essential Zimbabwean shopping items for this.
439
- Use local terms (e.g., 'Boerewors', 'Maize Meal', 'Mazoe', 'Charcoal').
440
  Return ONLY a JSON list of strings.
441
  """
442
  try:
@@ -492,29 +517,35 @@ def gemini_chat_response(transcript: str, intent: Dict, analyst_data: Dict, chat
492
  if analyst_data:
493
  context_str += f"ANALYST DATA: {json.dumps(analyst_data, default=str)}\n"
494
 
 
 
495
  PROMPT = f"""
496
  You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
497
  Role: Intelligent Shopping Companion.
498
  Goal: Shortest path to value. Complete Transparency.
499
 
500
  INPUT: "{transcript}"
 
501
  INTENT: {intent.get('intent')}
502
  CONTEXT:
503
  {context_str}
504
 
505
  LOGIC RULES:
506
 
507
- 1. **BASKET COMPARISON**:
 
 
508
  - If `market_matrix` has multiple stores, compare totals.
509
  - "Spar is **$6.95**, OK Mart is **$4.00** (but missing Oil)."
510
 
511
- 2. **BRAND SUBSTITUTES**:
512
- - If `is_substitute` is TRUE: "I couldn't find **[Query]**, so I used **[Found]** ($Price) as a placeholder."
 
513
 
514
- 3. **SINGLE ITEMS**:
515
  - Best price first, then others.
516
 
517
- 4. **CASUAL**:
518
  - Reset if user says "Hi".
519
 
520
  TONE: Helpful, direct, Zimbabwean. Use Markdown.
@@ -579,7 +610,7 @@ def health():
579
  "ok": True,
580
  "offers_indexed": len(df),
581
  "api_source": PRICE_API_BASE,
582
- "persona": "Jessica v3.0 (Event Planner)"
583
  })
584
 
585
  @app.post("/chat")
@@ -747,7 +778,7 @@ def call_briefing():
747
  def log_call_usage():
748
  """
749
  Post-Call Orchestrator.
750
- v3.0 Upgrade: Handles Concept Explosion for Event Planning.
751
  """
752
  body = request.get_json(silent=True) or {}
753
  pid = body.get("profile_id")
@@ -780,7 +811,7 @@ def log_call_usage():
780
  if target_items:
781
  analyst_result = calculate_basket_optimization(target_items)
782
 
783
- # v3.0: Even if missing items, we generate plan because prompt will ESTIMATE them
784
  md_content = gemini_generate_4step_plan(transcript, analyst_result)
785
 
786
  plan_data = {
 
1
  """
2
+ main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v3.1)
3
 
4
+ ✅ Feature: "Vernacular Engine" (Shona/Ndebele/English Input -> Native Response).
5
+ ✅ Feature: "Precision Search" (Prioritizes exact phrase matches over popularity).
6
+ ✅ Feature: "Concept Exploder" (Event Planning -> Shopping List).
7
+ ✅ UI/UX: "Nearest Match" phrasing for substitutions.
8
  ✅ Core: Deep Vector Search + Market Matrix + Store Preferences.
9
 
10
  ENV VARS:
 
236
  return _data_cache["df"]
237
 
238
  # =========================
239
+ # 2. Analyst Engine (Precision Search & Matrix)
240
  # =========================
241
 
242
  def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
243
+ """
244
+ Precision Search Algorithm.
245
+ Prioritizes:
246
+ 1. Exact sequential match in Name/Vector (Highest Score)
247
+ 2. Token overlap (Medium Score)
248
+ 3. Views/Popularity (Tie-breaker)
249
+ """
250
  if df.empty or not query: return df
251
  q_norm = _norm(query)
252
+ q_tokens = set(q_norm.split())
253
 
254
+ def scoring_algo(row):
255
+ score = 0
256
+ vector = row['search_vector']
257
+
258
+ # 1. Exact Name Match (Highest)
259
+ if q_norm == _norm(row['product_name']):
260
+ score += 1000
261
+
262
+ # 2. Sequential Vector Match (High)
263
+ if q_norm in vector:
264
+ score += 500
265
+
266
+ # 3. Brand Match
267
+ if row['brand'].lower() in q_norm:
268
+ score += 200
269
+
270
+ # 4. Token Overlap
271
+ text_tokens = set(vector.split())
272
+ overlap = len(q_tokens.intersection(text_tokens))
273
+ score += (overlap * 50)
274
 
275
+ return score
 
 
276
 
277
+ df_scored = df.copy()
278
+ df_scored['match_score'] = df_scored.apply(scoring_algo, axis=1)
279
+
280
+ # Filter out zero matches
281
+ matches = df_scored[df_scored['match_score'] > 0]
282
+
283
  if matches.empty: return matches
284
 
285
+ # Sort: Match Score (Desc) -> Views (Desc) -> Price (Asc)
286
+ matches = matches.sort_values(by=['match_score', 'views', 'price'], ascending=[False, False, True])
287
+
288
  return matches.head(limit)
289
 
290
  def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
291
  """
292
+ Generates a FULL MARKET MATRIX with Precision Search.
 
293
  """
294
  df = get_market_index()
295
  if df.empty:
 
312
  q_norm = _norm(item)
313
  res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
314
  q_tokens = q_norm.split()
315
+
316
  is_substitute = False
317
+ # If query has brand/spec but result score is low-ish (not exact name match), flag it.
318
+ # Using a simple heuristic for now based on token overlap vs query length
319
+ found_tokens = sum(1 for t in q_tokens if t in res_norm)
320
+ if len(q_tokens) > 1 and found_tokens < len(q_tokens):
321
+ is_substitute = True
322
 
323
  # Aggregate all offers
324
  product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
 
408
  }
409
 
410
  # =========================
411
+ # 3. Gemini Helpers (Vernacular & Intelligence)
412
  # =========================
413
 
414
  def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
 
424
  - EVENT_PLANNING: "Plan a braai", "Wedding list", "Dinner for 5" (Implicit lists).
425
 
426
  Extract:
427
+ - items: list of specific products found. **TRANSLATE ALL ITEMS TO ENGLISH** (e.g. 'Hupfu' -> 'Maize Meal').
428
  - utility_amount: number
429
  - store_preference: if a specific store is named (e.g. "at OK Mart").
430
  - is_event_planning: boolean (true if user asks to plan an event but lists no items).
431
+ - language: Detected user language (e.g., "Shona", "Ndebele", "English").
432
 
433
  JSON Schema:
434
  {
 
437
  "items": ["string"],
438
  "utility_amount": number,
439
  "store_preference": "string",
440
+ "is_event_planning": boolean,
441
+ "language": "string"
442
  }
443
  """
444
  try:
 
447
  contents=PROMPT + "\nTranscript: " + transcript,
448
  config=types.GenerateContentConfig(response_mime_type="application/json")
449
  )
450
+ return _safe_json_loads(resp.text, {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"})
451
  except Exception as e:
452
  logger.error(f"Intent Detect Error: {e}")
453
+ return {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"}
454
 
455
  def gemini_explode_concept(transcript: str) -> List[str]:
456
  """
 
461
  PROMPT = f"""
462
  User wants to plan an event: "{transcript}".
463
  Generate a STRICT list of 10-15 essential Zimbabwean shopping items for this.
464
+ Use English terms for database lookup (e.g. 'Maize Meal', 'Cooking Oil').
465
  Return ONLY a JSON list of strings.
466
  """
467
  try:
 
517
  if analyst_data:
518
  context_str += f"ANALYST DATA: {json.dumps(analyst_data, default=str)}\n"
519
 
520
+ language = intent.get("language", "English")
521
+
522
  PROMPT = f"""
523
  You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
524
  Role: Intelligent Shopping Companion.
525
  Goal: Shortest path to value. Complete Transparency.
526
 
527
  INPUT: "{transcript}"
528
+ USER LANGUAGE: {language}
529
  INTENT: {intent.get('intent')}
530
  CONTEXT:
531
  {context_str}
532
 
533
  LOGIC RULES:
534
 
535
+ 1. **LANGUAGE**: Reply in **{language}**. If Shona, use Shona. If English, use English.
536
+
537
+ 2. **BASKET COMPARISON**:
538
  - If `market_matrix` has multiple stores, compare totals.
539
  - "Spar is **$6.95**, OK Mart is **$4.00** (but missing Oil)."
540
 
541
+ 3. **BRAND SUBSTITUTES (Phrasing)**:
542
+ - If `is_substitute` is TRUE for an item, say:
543
+ "I couldn't find **[Query]**, but the **nearest match is** **[Found]** ($Price)."
544
 
545
+ 4. **SINGLE ITEMS**:
546
  - Best price first, then others.
547
 
548
+ 5. **CASUAL**:
549
  - Reset if user says "Hi".
550
 
551
  TONE: Helpful, direct, Zimbabwean. Use Markdown.
 
610
  "ok": True,
611
  "offers_indexed": len(df),
612
  "api_source": PRICE_API_BASE,
613
+ "persona": "Jessica v3.1 (Babel Fish)"
614
  })
615
 
616
  @app.post("/chat")
 
778
  def log_call_usage():
779
  """
780
  Post-Call Orchestrator.
781
+ v3.1: Handles Concept Explosion & Plan Generation.
782
  """
783
  body = request.get_json(silent=True) or {}
784
  pid = body.get("profile_id")
 
811
  if target_items:
812
  analyst_result = calculate_basket_optimization(target_items)
813
 
814
+ # v3.1: Generate Plan with Estimates & Creative Tips
815
  md_content = gemini_generate_4step_plan(transcript, analyst_result)
816
 
817
  plan_data = {