PriceLystAI-API

Running

App Files Files Community

rairo commited on Jan 27

Commit

2c5e6a5

verified ·

1 Parent(s): add7275

Update main.py

Browse files

Files changed (1) hide show

main.py +95 -100

main.py CHANGED Viewed

@@ -1,12 +1,11 @@
 """
-main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v2.6)
-✅ Fixed: Search Regression (Now searches Brand, Category, & Description).
-✅ Feature: Store Preference Detection ("Price at OK Mart?").
-✅ Logic: Single Item (Best First) vs Basket (Cheapest Total).
 ✅ "Analyst Engine": Enhanced Data Flattening & Comparison Logic.
 ✅ "Visual Engine": Lists, Products, & Meal-to-Recipe recognition.
-✅ Memory Logic: Short-Term Sliding Window.
 ENV VARS:
 - GOOGLE_API_KEY=...
@@ -115,7 +114,7 @@ app = Flask(__name__)
 CORS(app)
 # =========================
-# 1. ETL Layer (Ingestion - Deep Flattening)
 # =========================
 def _norm(s: Any) -> str:
@@ -140,10 +139,6 @@ def _safe_json_loads(s: str, fallback: Any):
         return fallback
 def fetch_and_flatten_data() -> pd.DataFrame:
-    """
-    Fetches product data and creates a 'search_vector' for deep fuzzy matching.
-    Includes: Name, Brand, Category Strings.
-    """
     all_products = []
     page = 1
@@ -174,19 +169,15 @@ def fetch_and_flatten_data() -> pd.DataFrame:
             p_id = int(p.get("id") or 0)
             p_name = str(p.get("name") or "Unknown")
-            # --- Deep Metadata Extraction ---
             brand_obj = p.get("brand") or {}
             brand_name = str(brand_obj.get("brand_name") or "")
-            # Extract ALL category names (parent, sub, etc.)
             cats = p.get("categories") or []
             cat_names = [str(c.get("name") or "") for c in cats]
             cat_str = " ".join(cat_names)
-            # Base Category (for grouping)
             primary_cat = cat_names[0] if cat_names else "General"
-            # Create a Search Vector: "Top Chef Jasmine Rice Rice & Pasta Groceries"
             search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
             views = int(p.get("view_count") or 0)
@@ -195,11 +186,10 @@ def fetch_and_flatten_data() -> pd.DataFrame:
             prices = p.get("prices") or []
             if not prices:
-                # No Price? Still index for "Out of Stock" awareness
                 rows.append({
                     "product_id": p_id,
                     "product_name": p_name,
-                    "search_vector": search_vector, # KEY UPGRADE
                     "brand": brand_name,
                     "category": primary_cat,
                     "retailer": "Listing",
@@ -219,7 +209,7 @@ def fetch_and_flatten_data() -> pd.DataFrame:
                     rows.append({
                         "product_id": p_id,
                         "product_name": p_name,
-                        "search_vector": search_vector, # KEY UPGRADE
                         "brand": brand_name,
                         "category": primary_cat,
                         "retailer": r_name,
@@ -246,21 +236,18 @@ def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
     return _data_cache["df"]
 # =========================
-# 2. Analyst Engine (Smart Search & Logic)
 # =========================
 def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
-    """
-    Searches against the 'search_vector' (Name + Brand + Categories).
-    """
     if df.empty or not query: return df
     q_norm = _norm(query)
-    # 1. Direct match in vector
     mask = df['search_vector'].str.contains(q_norm, regex=False)
     matches = df[mask].copy()
-    # 2. Token overlap fallback (if query is "Cheap Rice", matches "Rice")
     if matches.empty:
         q_tokens = set(q_norm.split())
         def token_score(text):
@@ -275,26 +262,13 @@ def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.Da
     if matches.empty: return matches
-    # 3. Sort: Views (Popularity) -> Price (Low)
     matches = matches.sort_values(by=['views', 'price'], ascending=[False, True])
     return matches.head(limit)
-def detect_retailer_preference(query: str) -> Optional[str]:
-    """Detects if user asked for a specific store."""
-    query = query.lower()
-    # Hardcoded known retailers for robustness
-    known_stores = ["ok mart", "ok supermarket", "tm pick n pay", "pick n pay", "spar", "food lovers", "choppies", "gains"]
-    for store in known_stores:
-        if store in query:
-            return store # Return the detected string to match loosely
-    return None
 def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
     """
-    The Core Logic:
-    - Single Item: Returns 'Best Option' + 'Others'.
-    - Basket: Returns 'Best Basket' + 'Breakdown'.
-    - Preference: Filters for specific store if requested.
     """
     df = get_market_index()
     if df.empty:
@@ -303,7 +277,7 @@ def calculate_basket_optimization(item_names: List[str], preferred_retailer: str
     found_items = []
     missing_global = []
-    # 1. Resolve Items
     for item in item_names:
         hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
@@ -311,58 +285,83 @@ def calculate_basket_optimization(item_names: List[str], preferred_retailer: str
             missing_global.append(item)
             continue
-        # Group hits by Product Name to aggregate offers
-        # We take the most popular product match
-        best_product_name = hits.iloc[0]['product_name']
-        product_offers = hits[hits['product_name'] == best_product_name]
-        # Sort offers: Price Ascending
-        product_offers = product_offers.sort_values('price', ascending=True)
         offers_list = []
         for _, r in product_offers.iterrows():
-            offers_list.append({
-                "retailer": r['retailer'],
-                "price": float(r['price'])
-            })
         found_items.append({
             "query": item,
-            "product_name": best_product_name,
-            "category": str(hits.iloc[0]['category']),
-            "offers": offers_list, # All available prices for this item
-            "best_price": offers_list[0]['price'],
-            "best_retailer": offers_list[0]['retailer']
         })
     if not found_items:
         return {"actionable": True, "found_items": [], "global_missing": missing_global}
-    # 2. Logic: Single vs Multi
-    is_basket = len(found_items) > 1
-    result = {
         "actionable": True,
-        "is_basket": is_basket,
         "found_items": found_items,
         "global_missing": missing_global,
         "preferred_retailer": preferred_retailer
     }
-    # 3. Store Preference Logic (User asked: "Rice at OK Mart?")
-    if preferred_retailer and not is_basket:
-        item = found_items[0]
-        # Find the offer from the preferred store
-        pref_offer = next((o for o in item['offers'] if preferred_retailer.lower() in o['retailer'].lower()), None)
-        result['preferred_offer'] = pref_offer
-        result['comparison_vs_best'] = None
-        if pref_offer:
-            diff = pref_offer['price'] - item['best_price']
-            result['comparison_vs_best'] = diff # +ve means preferred is expensive, 0 means best
-    return result
 def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
     remaining = amount_usd / 1.06
     units = 0.0
@@ -489,32 +488,32 @@ def gemini_chat_response(transcript: str, intent: Dict, analyst_data: Dict, chat
     PROMPT = f"""
     You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
     Role: Intelligent Shopping Companion.
-    Goal: Shortest path to value. Give answers, not promises.
     INPUT: "{transcript}"
     INTENT: {intent.get('intent')}
     CONTEXT:
     {context_str}
-    LOGIC RULES (Strict Adherence):
-    1. **SINGLE ITEM QUERY** (e.g. "Price of Rice"):
-       - **Primary**: State the CHEAPEST option immediately. "I found [Product] at [Retailer] for **$[Price]**."
-       - **Comparison**: List 1-2 other options. "Also available at [Store B] ($X) and [Store C] ($Y)."
-       - **Store Preference**: If user asked "Rice at OK Mart?", state that price FIRST, then say if it's cheaper elsewhere.
-    2. **BASKET QUERY** (e.g. "Rice, Oil, and Soap"):
-       - Provide the **Total Basket Cost** at the cheapest single store.
-       - Provide the Breakdown.
-       - Mention if splitting stores saves significant money.
-    3. **MISSING ITEMS**:
-       - Be honest. "I couldn't find a current price for [Item]."
     4. **CASUAL**:
-       - Reset context if user says "Hi".
-    TONE: Helpful, direct, Zimbabwean. Use Markdown for prices.
     """
     try:
@@ -535,9 +534,9 @@ def gemini_generate_4step_plan(transcript: str, analyst_result: Dict) -> str:
     DATA: {json.dumps(analyst_result, indent=2, default=str)}
     SECTIONS:
     1. **Catalogue Found ✅** (Table: Item | Store | Price)
-    2. **Missing 😔** (Estimates)
-    3. **Recommendation 💡**
-    4. **Budget Tips**
     """
     try:
         resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
@@ -556,7 +555,7 @@ def health():
         "ok": True,
         "offers_indexed": len(df),
         "api_source": PRICE_API_BASE,
-        "persona": "Jessica v2.6 (Deep Search)"
     })
 @app.post("/chat")
@@ -581,12 +580,8 @@ def chat():
     intent_data = gemini_detect_intent(msg)
     intent_type = intent_data.get("intent", "CASUAL_CHAT")
     items = intent_data.get("items", [])
-    store_pref = intent_data.get("store_preference") # Extracted from Gemini
-    # Store Preference Override (RegEx backup)
-    if not store_pref:
-        store_pref = detect_retailer_preference(msg)
     analyst_data = {}
     if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
@@ -630,7 +625,7 @@ def analyze_image():
     analyst_data = {}
     if img_type == "IRRELEVANT" and not items:
-        prompt = f"User uploaded photo of {description}. Compliment it if appropriate (pet/nature), then explain you are a shopping bot."
         response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
     elif items:
@@ -656,7 +651,7 @@ def analyze_image():
 @app.post("/api/call-briefing")
 def call_briefing():
-    # ... (Same as before, abbreviated for length but logic remains)
     body = request.get_json(silent=True) or {}
     pid = body.get("profile_id")
     username = body.get("username")

 """
+main.py — Pricelyst Shopping Advisor (Jessica Edition 2026 - Upgrade v2.7)
+✅ Fixed: Basket Comparison (Compares totals across ALL stores, showing missing items).
+✅ Fixed: Brand Loyalty (Explicitly states if exact brand is missing & suggests closest).
+✅ Logic: "Market Matrix" calculates basket cost for every retailer found.
 ✅ "Analyst Engine": Enhanced Data Flattening & Comparison Logic.
 ✅ "Visual Engine": Lists, Products, & Meal-to-Recipe recognition.
 ENV VARS:
 - GOOGLE_API_KEY=...
 CORS(app)
 # =========================
+# 1. ETL Layer (Deep Search Indexing)
 # =========================
 def _norm(s: Any) -> str:
         return fallback
 def fetch_and_flatten_data() -> pd.DataFrame:
     all_products = []
     page = 1
             p_id = int(p.get("id") or 0)
             p_name = str(p.get("name") or "Unknown")
             brand_obj = p.get("brand") or {}
             brand_name = str(brand_obj.get("brand_name") or "")
             cats = p.get("categories") or []
             cat_names = [str(c.get("name") or "") for c in cats]
             cat_str = " ".join(cat_names)
             primary_cat = cat_names[0] if cat_names else "General"
+            # Deep Search Vector
             search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
             views = int(p.get("view_count") or 0)
             prices = p.get("prices") or []
             if not prices:
                 rows.append({
                     "product_id": p_id,
                     "product_name": p_name,
+                    "search_vector": search_vector,
                     "brand": brand_name,
                     "category": primary_cat,
                     "retailer": "Listing",
                     rows.append({
                         "product_id": p_id,
                         "product_name": p_name,
+                        "search_vector": search_vector,
                         "brand": brand_name,
                         "category": primary_cat,
                         "retailer": r_name,
     return _data_cache["df"]
 # =========================
+# 2. Analyst Engine (Matrix & Fallbacks)
 # =========================
 def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
     if df.empty or not query: return df
     q_norm = _norm(query)
+    # 1. Exact/Partial Vector Match
     mask = df['search_vector'].str.contains(q_norm, regex=False)
     matches = df[mask].copy()
+    # 2. Token Overlap Fallback
     if matches.empty:
         q_tokens = set(q_norm.split())
         def token_score(text):
     if matches.empty: return matches
     matches = matches.sort_values(by=['views', 'price'], ascending=[False, True])
     return matches.head(limit)
 def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
     """
+    Generates a FULL MARKET MATRIX.
+    Returns best store, plus how EVERY other store performed.
     """
     df = get_market_index()
     if df.empty:
     found_items = []
     missing_global = []
+    # 1. Resolve Items & Check Brand Fidelity
     for item in item_names:
         hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
             missing_global.append(item)
             continue
+        best_match = hits.iloc[0]
+        # --- Brand Fidelity Check ---
+        # Did the user ask for "Top Chef" but we got "Mega Basmati"?
+        q_norm = _norm(item)
+        res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
+        # Simple heuristic: If query has 2+ words, and <50% of them are in result, it's a sub.
+        q_tokens = q_norm.split()
+        is_substitute = False
+        if len(q_tokens) > 1:
+            found_tokens = sum(1 for t in q_tokens if t in res_norm)
+            if found_tokens < len(q_tokens) / 2: # Loose threshold
+                is_substitute = True
+        # Aggregate all offers for this specific product ID
+        product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
         offers_list = []
         for _, r in product_offers.iterrows():
+            offers_list.append({"retailer": r['retailer'], "price": float(r['price'])})
         found_items.append({
             "query": item,
+            "product_name": str(best_match['product_name']),
+            "is_substitute": is_substitute, # KEY FEATURE
+            "offers": offers_list,
+            "best_price": offers_list[0]['price']
         })
     if not found_items:
         return {"actionable": True, "found_items": [], "global_missing": missing_global}
+    # 2. MARKET MATRIX (Comparison across all stores)
+    # Get unique retailers involved in these products
+    all_involved_retailers = set()
+    for f in found_items:
+        for o in f['offers']:
+            all_involved_retailers.add(o['retailer'])
+    store_comparison = []
+    for retailer in all_involved_retailers:
+        total_price = 0.0
+        found_count = 0
+        missing_in_store = []
+        for item in found_items:
+            # Find price at this retailer
+            price = next((o['price'] for o in item['offers'] if o['retailer'] == retailer), None)
+            if price:
+                total_price += price
+                found_count += 1
+            else:
+                missing_in_store.append(item['product_name'])
+        store_comparison.append({
+            "retailer": retailer,
+            "total_price": total_price,
+            "found_count": found_count,
+            "total_items": len(found_items),
+            "missing_items": missing_in_store
+        })
+    # Sort Matrix: Most Items Found -> Lowest Price
+    store_comparison.sort(key=lambda x: (-x['found_count'], x['total_price']))
+    return {
         "actionable": True,
+        "is_basket": len(found_items) > 1,
         "found_items": found_items,
         "global_missing": missing_global,
+        "market_matrix": store_comparison[:4], # Top 4 comparison
+        "best_store": store_comparison[0] if store_comparison else None,
         "preferred_retailer": preferred_retailer
     }
 def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
     remaining = amount_usd / 1.06
     units = 0.0
     PROMPT = f"""
     You are Jessica, Pricelyst's Shopping Advisor (Zimbabwe).
     Role: Intelligent Shopping Companion.
+    Goal: Shortest path to value. Complete Transparency.
     INPUT: "{transcript}"
     INTENT: {intent.get('intent')}
     CONTEXT:
     {context_str}
+    LOGIC RULES:
+    1. **BASKET COMPARISON (Transparency)**:
+       - If `market_matrix` has multiple stores, **COMPARE THEM**.
+       - Example: "Spar is **$6.95** (All items). OK Mart is **$4.00**, but misses Cooking Oil."
+       - Don't just show the winner. Show the ecosystem.
+    2. **BRAND LOYALTY (Graceful Fallback)**:
+       - If `is_substitute` is TRUE for an item, say:
+         "I couldn't find **[Query Brand]** exactly, so I've used **[Found Product]** ($Price) as a placeholder."
+       - Be honest about brand mismatches.
+    3. **SINGLE ITEMS**:
+       - Best price first, then list 1-2 others.
     4. **CASUAL**:
+       - Reset if user says "Hi".
+    TONE: Helpful, direct, Zimbabwean. Use Markdown.
     """
     try:
     DATA: {json.dumps(analyst_result, indent=2, default=str)}
     SECTIONS:
     1. **Catalogue Found ✅** (Table: Item | Store | Price)
+    2. **Missing/Substitutes ⚠️** (Be clear about brand swaps)
+    3. **Store Comparison 📊** (List the Top 3 stores totals)
+    4. **Recommendation 💡**
     """
     try:
         resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
         "ok": True,
         "offers_indexed": len(df),
         "api_source": PRICE_API_BASE,
+        "persona": "Jessica v2.7 (Matrix & Loyalty)"
     })
 @app.post("/chat")
     intent_data = gemini_detect_intent(msg)
     intent_type = intent_data.get("intent", "CASUAL_CHAT")
     items = intent_data.get("items", [])
+    store_pref = intent_data.get("store_preference")
     analyst_data = {}
     if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
     analyst_data = {}
     if img_type == "IRRELEVANT" and not items:
+        prompt = f"User uploaded photo of {description}. Compliment it if appropriate, then explain you are a shopping bot."
         response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
     elif items:
 @app.post("/api/call-briefing")
 def call_briefing():
+    # ... (Same as before)
     body = request.get_json(silent=True) or {}
     pid = body.get("profile_id")
     username = body.get("username")