PriceLystAI-API

Running

App Files Files Community

rairo commited on Jan 23

Commit

c83774d

verified ·

1 Parent(s): c601c21

Update main.py

Browse files

Files changed (1) hide show

main.py +469 -427

main.py CHANGED Viewed

@@ -1,12 +1,12 @@
 """
-main.py — Pricelyst Shopping Advisor (Jessica Edition - Grounded Data & Memory)
 ✅ Flask API
-✅ Firebase Admin persistence
-✅ Gemini via google-genai SDK (Fixed & Robust)
-✅ RAG (Retrieval Augmented Generation) for Shopping Plans
-✅ Long-Term Memory (Personal Details Extraction)
-✅ Real Pricing Logic (No Hallucinations)
 ENV VARS:
 - GOOGLE_API_KEY=...
@@ -20,10 +20,10 @@ import os
 import re
 import json
 import time
-import base64
 import logging
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
 import requests
 import pandas as pd
@@ -38,7 +38,7 @@ logging.basicConfig(
 )
 logger = logging.getLogger("pricelyst-advisor")
-# ––––– Gemini (NEW SDK) –––––
 try:
     from google import genai
@@ -65,19 +65,17 @@ from firebase_admin import credentials, firestore
 FIREBASE_ENV = os.environ.get("FIREBASE", "")
-def init_firestore_from_env() -> firestore.Client:
     if firebase_admin._apps:
         return firestore.client()
     if not FIREBASE_ENV:
         logger.warning("FIREBASE env var missing. Persistence disabled.")
         return None
     try:
         sa_info = json.loads(FIREBASE_ENV)
         cred = credentials.Certificate(sa_info)
         firebase_admin.initialize_app(cred)
-        logger.info("Firebase initialized successfully.")
         return firestore.client()
     except Exception as e:
         logger.critical("Failed to initialize Firebase: %s", e)
@@ -85,543 +83,587 @@ def init_firestore_from_env() -> firestore.Client:
 db = init_firestore_from_env()
-# ––––– External API (Pricelyst) –––––
 PRICE_API_BASE = os.environ.get("PRICE_API_BASE", "https://api.pricelyst.co.zw").rstrip("/")
-HTTP_TIMEOUT = 25
-# ––––– Flask –––––
-app = Flask(__name__)
-CORS(app)
-# ––––– In-memory product cache –––––
-PRODUCT_CACHE_TTL_SEC = 60 * 15  # 15 minutes
-_product_cache: Dict[str, Any] = {
     "ts": 0,
-    "df_offers": pd.DataFrame(),
-    "raw_count": 0,
 }
-# ––––– Static Data (Fallbacks) –––––
-ZIM_ESSENTIALS = {
-    "fuel": {"price": 1.58, "unit": "L", "retailer": "Pump Price"},
-    "petrol": {"price": 1.58, "unit": "L", "retailer": "Pump Price"},
-    "diesel": {"price": 1.65, "unit": "L", "retailer": "Pump Price"},
-    "bread": {"price": 1.00, "unit": "loaf", "retailer": "Standard"},
-    "gas": {"price": 2.00, "unit": "kg", "retailer": "LPG Market"},
-    "electricity": {"price": 20.00, "unit": "est. month", "retailer": "ZESA"},
-    "zesa": {"price": 20.00, "unit": "est. month", "retailer": "ZESA"},
-}
 # =========================
-# Helpers
 # =========================
-def now_utc_iso() -> str:
-    return datetime.now(timezone.utc).isoformat()
-def _coerce_float(v: Any) -> float:
     try:
-        if v is None: return 0.0
-        return float(v)
-    except Exception:
         return 0.0
-def _norm_str(s: Any) -> str:
-    s = "" if s is None else str(s)
-    s = s.strip().lower()
-    s = re.sub(r"\s+", " ", s)
-    return s
-def _safe_json_loads(s: str, fallback: Any):
-    try:
-        # Clean potential markdown wrapping
-        if "```json" in s:
-            s = s.split("```json")[1].split("```")[0]
-        elif "```" in s:
-            s = s.split("```")[0]
-        return json.loads(s)
-    except Exception:
-        return fallback
-# =========================
-# Firestore
-# =========================
-def profile_ref(profile_id: str):
-    if not db: return None
-    return db.collection("pricelyst_profiles").document(profile_id)
-def get_profile(profile_id: str) -> Dict[str, Any]:
-    if not db: return {}
-    try:
-        ref = profile_ref(profile_id)
-        doc = ref.get()
-        if doc.exists:
-            return doc.to_dict() or {}
-        data = {
-            "profile_id": profile_id,
-            "created_at": now_utc_iso(),
-            "updated_at": now_utc_iso(),
-            "username": None,
-            "memory_summary": "",
-            "preferences": {},
-            "counters": {"chats": 0, "calls": 0}
-        }
-        ref.set(data)
-        return data
-    except Exception as e:
-        logger.error("DB Error get_profile: %s", e)
-        return {}
-def update_profile(profile_id: str, patch: Dict[str, Any]) -> None:
-    if not db: return
-    try:
-        patch = dict(patch)
-        patch["updated_at"] = now_utc_iso()
-        profile_ref(profile_id).set(patch, merge=True)
-    except Exception as e:
-        logger.error("DB Error update_profile: %s", e)
-def log_call(profile_id: str, payload: Dict[str, Any]) -> str:
-    if not db: return str(int(time.time()))
-    try:
-        ref = db.collection("pricelyst_profiles").document(profile_id).collection("call_logs").document()
-        ref.set({
-            **payload,
-            "ts": now_utc_iso()
-        })
-        return ref.id
-    except Exception as e:
-        logger.error("DB Error log_call: %s", e)
-        return ""
-# =========================
-# Data Ingestion (ETL)
-# =========================
-def fetch_products(max_pages: int = 10, per_page: int = 50) -> List[Dict[str, Any]]:
-    """Fetch raw products from Pricelyst API."""
     all_products = []
-    for p in range(1, max_pages + 1):
         try:
-            url = f"{PRICE_API_BASE}/api/v1/products"
-            r = requests.get(url, params={"page": p, "perPage": per_page}, timeout=HTTP_TIMEOUT)
             r.raise_for_status()
-            data = r.json().get("data") or []
             if not data: break
             all_products.extend(data)
-            # Pagination check
-            meta = r.json()
-            if p >= (meta.get("totalPages") or 999):
                 break
         except Exception as e:
-            logger.warning(f"Product fetch error page {p}: {e}")
             break
-    return all_products
-def flatten_products_to_df(products: List[Dict[str, Any]]) -> pd.DataFrame:
-    """
-    Strict mapping of the nested JSON structure to a flat search index.
-    Structure: product -> prices[] -> retailer
-    """
     rows = []
-    for p in products:
         try:
             p_id = p.get("id")
             p_name = p.get("name") or "Unknown"
-            p_desc = p.get("description") or ""
-            # Get Primary Category
-            cat_name = "General"
-            cats = p.get("categories") or []
-            if cats and isinstance(cats, list) and len(cats) > 0:
-                cat_name = cats[0].get("name") or "General"
-            # Brand
-            brand_name = (p.get("brand") or {}).get("brand_name") or ""
-            # Iterate Prices (Real Offers)
             prices = p.get("prices") or []
-            # Fallback if no prices found
             if not prices:
-                base_price = _coerce_float(p.get("price"))
-                if base_price > 0:
-                    rows.append({
-                        "product_id": p_id,
-                        "product_name": p_name,
-                        "clean_name": _norm_str(p_name),
-                        "description": p_desc,
-                        "category": cat_name,
-                        "brand": brand_name,
-                        "retailer": "Pricelyst Base",
-                        "price": base_price,
-                        "image": p.get("thumbnail") or p.get("image"),
-                    })
                 continue
             for offer in prices:
-                retailer_obj = offer.get("retailer") or {}
-                retailer_name = retailer_obj.get("name") or "Unknown Store"
-                price_val = _coerce_float(offer.get("price"))
                 if price_val > 0:
                     rows.append({
                         "product_id": p_id,
                         "product_name": p_name,
-                        "clean_name": _norm_str(p_name),
-                        "description": p_desc,
-                        "category": cat_name,
                         "brand": brand_name,
-                        "retailer": retailer_name,
                         "price": price_val,
-                        "image": p.get("thumbnail") or p.get("image"),
                     })
-        except Exception as e:
             continue
     df = pd.DataFrame(rows)
     return df
-def get_data_index(force_refresh: bool = False) -> pd.DataFrame:
-    """Singleton accessor for the product Dataframe."""
-    global _product_cache
-    is_stale = (time.time() - _product_cache["ts"]) > PRODUCT_CACHE_TTL_SEC
-    if force_refresh or is_stale or _product_cache["df_offers"].empty:
-        logger.info("Refreshing Product Index...")
-        try:
-            raw_products = fetch_products(max_pages=15)
-            df = flatten_products_to_df(raw_products)
-            _product_cache["ts"] = time.time()
-            _product_cache["df_offers"] = df
-            _product_cache["raw_count"] = len(raw_products)
-            logger.info(f"Index Refreshed: {len(df)} offers from {len(raw_products)} products.")
-        except Exception as e:
-            logger.error(f"Failed to refresh index: {e}")
-            if isinstance(_product_cache["df_offers"], pd.DataFrame):
-                return _product_cache["df_offers"]
-            return pd.DataFrame()
-    return _product_cache["df_offers"]
 # =========================
-# Search & Matching Logic
 # =========================
-def search_index(df: pd.DataFrame, query: str, limit: int = 5) -> List[Dict[str, Any]]:
-    """
-    Search the DF using token overlap + substring matching.
-    """
-    if df.empty: return []
-    q_norm = _norm_str(query)
     q_tokens = set(q_norm.split())
-    def score_text(text):
-        if not isinstance(text, str): return 0
-        text_tokens = set(text.split())
-        if not text_tokens: return 0
-        intersection = q_tokens.intersection(text_tokens)
-        return len(intersection) / len(q_tokens)
-    temp_df = df.copy()
-    temp_df['score'] = temp_df['clean_name'].apply(score_text)
-    # Filter for relevant matches
-    matches = temp_df[ (temp_df['score'] > 0.4) | (temp_df['clean_name'].str.contains(q_norm, regex=False)) ]
-    if matches.empty:
-        # Fallback: Try searching category
-        matches = temp_df[temp_df['category'].str.lower().str.contains(q_norm, na=False)]
-    if matches.empty:
-        return []
-    # Sort by Score desc, then Price asc
-    matches = matches.sort_values(by=['score', 'price'], ascending=[False, True])
-    # Unique product logic
-    unique_products = []
-    seen_ids = set()
-    for _, row in matches.iterrows():
-        pid = row['product_id']
-        if pid in seen_ids: continue
-        seen_ids.add(pid)
-        unique_products.append({
-            "id": pid,
-            "name": row['product_name'],
-            "price": row['price'],
-            "retailer": row['retailer'],
-            "category": row['category'],
-            "image": row['image']
         })
-        if len(unique_products) >= limit: break
-    return unique_products
-# =========================
-# Gemini Functions (FIXED & ROBUST)
-# =========================
-def gemini_generate_text(system_prompt: str, user_prompt: str) -> str:
-    """Standard text generation."""
-    if not _gemini_client: return ""
-    try:
-        # Simplified call using contents string directly
-        response = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=system_prompt + "\n\n" + user_prompt,
-            config=types.GenerateContentConfig(
-                temperature=0.4
-            )
-        )
-        return response.text or ""
-    except Exception as e:
-        logger.error(f"Gemini Text Error: {e}")
-        return ""
-def gemini_generate_json(system_prompt: str, user_prompt: str) -> Dict[str, Any]:
-    """JSON generation with strict parsing."""
-    if not _gemini_client: return {}
-    try:
-        response = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=system_prompt + "\n\n" + user_prompt,
-            config=types.GenerateContentConfig(
-                response_mime_type="application/json",
-                temperature=0.2
-            )
-        )
-        return _safe_json_loads(response.text, {})
-    except Exception as e:
-        logger.error(f"Gemini JSON Error: {e}")
-        return {}
-# =========================
-# Long Term Memory Engine
-# =========================
-MEMORY_SYSTEM_PROMPT = """
-You are the Memory Manager for Jessica, an AI Shopping Assistant.
-Your job is to update the User's "Memory Summary" based on their latest conversation.
-INPUTS:
-1. Current Memory: The existing summary of what we know about the user.
-2. New Transcript: The latest conversation.
-GOAL:
-Update the Current Memory to include new details. Focus on:
-- Names (User, Family, Friends)
-- Dietary preferences or allergies
-- Budget habits (e.g., "likes cheap meat", "buys bulk")
-- Life events (e.g., "hosting a braai on Friday", "wife's birthday")
-- Feedback (e.g., "loved the T-bone suggestion")
-OUTPUT:
-Return ONLY the updated text summary. Keep it concise (max 150 words).
-"""
-def update_long_term_memory(profile_id: str, transcript: str) -> None:
-    """Updates the user's profile memory summary based on the new call."""
-    if len(transcript) < 20: return
-    prof = get_profile(profile_id)
-    current_memory = prof.get("memory_summary", "")
-    user_prompt = f"CURRENT MEMORY:\n{current_memory}\n\nNEW TRANSCRIPT:\n{transcript}"
-    try:
-        new_memory = gemini_generate_text(MEMORY_SYSTEM_PROMPT, user_prompt)
-        if new_memory and len(new_memory) > 10:
-            update_profile(profile_id, {"memory_summary": new_memory})
-            logger.info(f"Memory updated for {profile_id}")
-    except Exception as e:
-        logger.error(f"Memory update failed: {e}")
 # =========================
-# Shopping Plan Engine (RAG)
 # =========================
-EXTRACT_SYSTEM_PROMPT = """
-You are a Shopping Assistant Data Extractor.
-Analyze the transcript and extract a list of shopping items the user implicitly or explicitly wants.
-Return JSON: { "items": [ { "name": "searchable term", "qty": "quantity string" } ] }
-If no items found, return { "items": [] }.
-"""
-SYNTHESIS_SYSTEM_PROMPT = """
-You are Jessica, Pricelyst's Shopping Advisor.
-Generate a shopping plan based on the USER TRANSCRIPT and the DATA CONTEXT provided.
-RULES:
-1. USE REAL DATA: Use the prices and retailers found in DATA CONTEXT.
-2. ESTIMATES: If context says "FOUND: FALSE", use your best guess for Zimbabwe prices and mark as "(Est)".
-3. FORMAT: Return strict JSON with a 'markdown_content' field containing a professional report.
-JSON SCHEMA:
-{
-  "is_actionable": true,
-  "title": "Short Title",
-  "markdown_content": "# Title\n\n..."
-}
-"""
-def build_shopping_plan(transcript: str) -> Dict[str, Any]:
     """
-    RAG Pipeline: Extract -> Search -> Synthesize
     """
-    if len(transcript) < 10:
-        return {"is_actionable": False}
-    # 1. Extract
-    extraction = gemini_generate_json(EXTRACT_SYSTEM_PROMPT, f"TRANSCRIPT:\n{transcript}")
-    items_requested = extraction.get("items", [])
-    if not items_requested:
-        return {"is_actionable": False}
-    df = get_data_index()
-    # 2. Retrieval (Grounding)
-    context_lines = []
-    for item in items_requested:
-        term = item.get("name", "")
-        qty_str = item.get("qty", "1")
-        # Check Essentials Fallback
-        ess_key = next((k for k in ZIM_ESSENTIALS if k in term.lower()), None)
-        if ess_key:
-            data = ZIM_ESSENTIALS[ess_key]
-            context_lines.append(f"- ITEM: {term} | SOURCE: Market Rate | PRICE: ${data['price']} | RETAILER: {data['retailer']}")
-            continue
-        # Search DB
-        hits = search_index(df, term, limit=1)
-        if hits:
-            best = hits[0]
-            context_lines.append(f"- ITEM: {term} | FOUND: TRUE | PRODUCT: {best['name']} | PRICE: ${best['price']} | RETAILER: {best['retailer']}")
-        else:
-            context_lines.append(f"- ITEM: {term} | FOUND: FALSE | NOTE: Needs estimation.")
-    data_context = "\n".join(context_lines)
-    logger.info(f"Plan Context:\n{data_context}")
-    # 3. Synthesis
-    final_prompt = f"TRANSCRIPT:\n{transcript}\n\nDATA CONTEXT (Real Prices):\n{data_context}"
-    plan = gemini_generate_json(SYNTHESIS_SYSTEM_PROMPT, final_prompt)
-    return plan
 # =========================
-# API Endpoints
 # =========================
 @app.get("/health")
 def health():
-    df = get_data_index()
     return jsonify({
         "ok": True,
-        "ts": now_utc_iso(),
-        "products_indexed": len(df)
     })
 @app.post("/api/call-briefing")
 def call_briefing():
     """
-    Returns memory context to the frontend to pass to ElevenLabs.
     """
     body = request.get_json(silent=True) or {}
-    profile_id = body.get("profile_id")
     username = body.get("username")
-    if not profile_id:
-        return jsonify({"ok": False, "error": "Missing profile_id"}), 400
-    prof = get_profile(profile_id)
-    if username:
-        update_profile(profile_id, {"username": username})
-    # Prepare intelligence payload
-    kpi_data = {
-        "username": username or prof.get("username") or "Friend",
-        "market_rates": ZIM_ESSENTIALS,
-        "tone": "practical_zimbabwe",
-        "system_instruction": "You are Jessica. If user asks about 'how was the party?', check 'memory_summary' variable."
     }
     return jsonify({
         "ok": True,
         "memory_summary": prof.get("memory_summary", ""),
-        "kpi_snapshot": json.dumps(kpi_data)
     })
 @app.post("/api/log-call-usage")
 def log_call_usage():
     """
-    1. Update Memory (Async logic, effectively)
-    2. Generate Shopping Plan (Ground Truth)
-    3. Persist Log
     """
     body = request.get_json(silent=True) or {}
-    profile_id = body.get("profile_id")
     transcript = body.get("transcript", "")
-    if not profile_id:
-        return jsonify({"ok": False, "error": "Missing profile_id"}), 400
-    logger.info(f"Processing Call for {profile_id}. Transcript Len: {len(transcript)}")
-    # 1. Update Long Term Memory
-    update_long_term_memory(profile_id, transcript)
-    # 2. Generate Plan
-    plan_data = {}
-    plan_id = None
-    if len(transcript) > 20:
         try:
-            plan_data = build_shopping_plan(transcript)
-            if plan_data.get("is_actionable"):
-                plan_ref = db.collection("pricelyst_profiles").document(profile_id).collection("shopping_plans").document()
-                plan_data["id"] = plan_ref.id
-                plan_data["created_at"] = now_utc_iso()
-                plan_ref.set(plan_data)
-                plan_id = plan_ref.id
-                logger.info(f"Plan Created: {plan_id}")
         except Exception as e:
-            logger.error(f"Plan Gen Error: {e}")
     # 3. Log Call
-    log_call(profile_id, {
-        "transcript": transcript,
-        "duration": body.get("duration_seconds"),
-        "plan_id": plan_id
-    })
     return jsonify({
         "ok": True,
         "shopping_plan": plan_data if plan_data.get("is_actionable") else None
     })
-# ––––– CRUD: Shopping Plans –––––
 @app.get("/api/shopping-plans")
 def list_plans():
     pid = request.args.get("profile_id")
-    if not pid: return jsonify({"ok": False}), 400
     try:
         docs = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans") \
                  .order_by("created_at", direction=firestore.Query.DESCENDING).limit(10).stream()
@@ -633,12 +675,12 @@ def list_plans():
 @app.delete("/api/shopping-plans/<plan_id>")
 def delete_plan(plan_id):
     pid = request.args.get("profile_id")
-    if not pid: return jsonify({"ok": False}), 400
     try:
         db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document(plan_id).delete()
         return jsonify({"ok": True})
-    except Exception as e:
-        return jsonify({"ok": False, "error": str(e)}), 500
 # =========================
 # Main
@@ -646,9 +688,9 @@ def delete_plan(plan_id):
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    # Pre-warm cache
     try:
-        get_data_index(force_refresh=True)
     except:
         pass
     app.run(host="0.0.0.0", port=port)

 """
+main.py — Pricelyst Shopping Advisor (Analyst Edition)
 ✅ Flask API
+✅ Firebase Admin Persistence
+✅ Gemini via google-genai SDK (Robust)
+✅ "Analyst Engine": Python Math for Baskets, ZESA, & Fuel
+✅ Ground Truth Data: Uses /api/v1/product-listing
+✅ Real-Time Basket Optimization
 ENV VARS:
 - GOOGLE_API_KEY=...
 import re
 import json
 import time
+import math
 import logging
 from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
 import requests
 import pandas as pd
 )
 logger = logging.getLogger("pricelyst-advisor")
+# ––––– Gemini SDK –––––
 try:
     from google import genai
 FIREBASE_ENV = os.environ.get("FIREBASE", "")
+def init_firestore_from_env() -> Optional[firestore.Client]:
     if firebase_admin._apps:
         return firestore.client()
     if not FIREBASE_ENV:
         logger.warning("FIREBASE env var missing. Persistence disabled.")
         return None
     try:
         sa_info = json.loads(FIREBASE_ENV)
         cred = credentials.Certificate(sa_info)
         firebase_admin.initialize_app(cred)
+        logger.info("Firebase initialized.")
         return firestore.client()
     except Exception as e:
         logger.critical("Failed to initialize Firebase: %s", e)
 db = init_firestore_from_env()
+# ––––– External API –––––
 PRICE_API_BASE = os.environ.get("PRICE_API_BASE", "https://api.pricelyst.co.zw").rstrip("/")
+HTTP_TIMEOUT = 30
+# ––––– Static Data (Zim Context) –––––
+ZIM_UTILITIES = {
+    "fuel_petrol": 1.58,  # USD per Litre
+    "fuel_diesel": 1.65,  # USD per Litre
+    "gas_lpg": 2.00,      # USD per kg
+    "bread": 1.00,        # USD fixed
+    # ZESA Estimates (Simplified Stepped Tariff)
+    "zesa_step_1": {"limit": 50, "rate": 0.04},   # First 50 units (Life line)
+    "zesa_step_2": {"limit": 150, "rate": 0.09},  # Next 150
+    "zesa_step_3": {"limit": 9999, "rate": 0.14}, # Balance
+    "zesa_levy": 0.06 # 6% REA levy approx
+}
+# ––––– Cache –––––
+PRODUCT_CACHE_TTL = 60 * 20 # 20 mins
+_data_cache: Dict[str, Any] = {
     "ts": 0,
+    "df": pd.DataFrame(), # Columns: [id, name, clean_name, brand, category, retailer, price, views, image]
+    "raw_count": 0
 }
+app = Flask(__name__)
+CORS(app)
 # =========================
+# 1. ETL Layer (Ingestion)
 # =========================
+def _norm(s: Any) -> str:
+    """Normalize string for fuzzy search."""
+    if not s: return ""
+    return str(s).strip().lower()
+def _coerce_price(v: Any) -> float:
     try:
+        return float(v) if v is not None else 0.0
+    except:
         return 0.0
+def fetch_and_flatten_data() -> pd.DataFrame:
+    """
+    Fetches from /api/v1/product-listing and flattens into an analytical DF.
+    Each row represents a single 'Offer' (Product X at Retailer Y).
+    """
     all_products = []
+    page = 1
+    while True:
         try:
+            # New Endpoint Structure
+            url = f"{PRICE_API_BASE}/api/v1/product-listing"
+            r = requests.get(url, params={"page": page, "perPage": 50}, timeout=HTTP_TIMEOUT)
             r.raise_for_status()
+            payload = r.json()
+            data = payload.get("data") or []
             if not data: break
             all_products.extend(data)
+            meta = payload
+            if page >= (meta.get("totalPages") or 99):
                 break
+            page += 1
         except Exception as e:
+            logger.error(f"ETL Error on page {page}: {e}")
             break
+    # Flattening Logic
     rows = []
+    for p in all_products:
         try:
             p_id = p.get("id")
             p_name = p.get("name") or "Unknown"
+            clean_name = _norm(p_name)
+            # Category & Brand extraction
+            # Based on user JSON: 'category' is an object inside product
+            cat_obj = p.get("category") or {}
+            cat_name = cat_obj.get("name") or "General"
+            brand_obj = p.get("brand") or {}
+            brand_name = brand_obj.get("brand_name") or ""
+            views = int(p.get("view_count") or 0)
+            image = p.get("thumbnail") or p.get("image")
+            # Prices array
             prices = p.get("prices") or []
+            # If no prices, we still index product for "Knowledge" but with price=0
             if not prices:
+                rows.append({
+                    "product_id": p_id,
+                    "product_name": p_name,
+                    "clean_name": clean_name,
+                    "brand": brand_name,
+                    "category": cat_name,
+                    "retailer": "Listing",
+                    "price": 0.0,
+                    "views": views,
+                    "image": image,
+                    "is_offer": False
+                })
                 continue
             for offer in prices:
+                retailer = offer.get("retailer") or {}
+                r_name = retailer.get("name") or "Unknown Store"
+                price_val = _coerce_price(offer.get("price"))
                 if price_val > 0:
                     rows.append({
                         "product_id": p_id,
                         "product_name": p_name,
+                        "clean_name": clean_name,
                         "brand": brand_name,
+                        "category": cat_name,
+                        "retailer": r_name,
                         "price": price_val,
+                        "views": views,
+                        "image": image,
+                        "is_offer": True
                     })
+        except:
             continue
     df = pd.DataFrame(rows)
     return df
+def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
+    """Singleton access to the Dataframe."""
+    global _data_cache
+    if force_refresh or _data_cache["df"].empty or (time.time() - _data_cache["ts"] > PRODUCT_CACHE_TTL):
+        logger.info("ETL: Refreshing Market Index...")
+        df = fetch_and_flatten_data()
+        _data_cache["df"] = df
+        _data_cache["ts"] = time.time()
+        _data_cache["raw_count"] = len(df)
+        logger.info(f"ETL: Loaded {len(df)} market offers.")
+    return _data_cache["df"]
 # =========================
+# 2. Analyst Engine (Math Logic)
 # =========================
+def search_products_fuzzy(df: pd.DataFrame, query: str, limit: int = 10) -> pd.DataFrame:
+    """Finds products matching query (Name, Brand, or Category)."""
+    if df.empty or not query: return df
+    q_norm = _norm(query)
     q_tokens = set(q_norm.split())
+    # Quick filter: String contains
+    mask_name = df['clean_name'].str.contains(q_norm, regex=False)
+    mask_brand = df['brand'].str.lower().str.contains(q_norm, regex=False)
+    mask_cat = df['category'].str.lower().str.contains(q_norm, regex=False)
+    matches = df[mask_name | mask_brand | mask_cat].copy()
+    # Simple Scoring
+    def scorer(row):
+        score = 0
+        if q_norm in row['clean_name']: score += 10
+        if q_norm == row['clean_name']: score += 20
+        # Popularity boost
+        score += math.log(row['views'] + 1) * 0.5
+        return score
+    if not matches.empty:
+        matches['score'] = matches.apply(scorer, axis=1)
+        return matches.sort_values('score', ascending=False).head(limit)
+    return matches
+def calculate_basket_optimization(item_names: List[str]) -> Dict[str, Any]:
+    """
+    Killer Question: 'Where should I buy this list?'
+    Returns: Best Store, Missing Items, Total Cost.
+    """
+    df = get_market_index()
+    if df.empty: return {"error": "No data"}
+    basket_results = []
+    missing_global = []
+    # 1. Resolve Items to Real Products
+    found_items = [] # list of (item_query, product_id, product_name)
+    for item in item_names:
+        # Find best matching product (using popularity tie-breaker)
+        hits = search_products_fuzzy(df[df['is_offer']==True], item, limit=5)
+        if hits.empty:
+            missing_global.append(item)
+            continue
+        # Pick the most popular product that matches this query
+        best_prod = hits.sort_values('views', ascending=False).iloc[0]
+        found_items.append({
+            "query": item,
+            "product_id": best_prod['product_id'],
+            "name": best_prod['product_name']
         })
+    if not found_items:
+        return {"actionable": False, "reason": "No items found in database."}
+    # 2. Calculate Totals Per Retailer
+    # We only care about retailers that stock these products
+    target_pids = [x['product_id'] for x in found_items]
+    # Filter DF to only relevant products
+    relevant_offers = df[df['product_id'].isin(target_pids) & df['is_offer']]
+    # Group by Retailer
+    retailer_stats = []
+    all_retailers = relevant_offers['retailer'].unique()
+    for retailer in all_retailers:
+        r_df = relevant_offers[relevant_offers['retailer'] == retailer]
+        found_count = len(r_df)
+        total_price = r_df['price'].sum()
+        # Identify what this retailer has vs misses
+        retailer_pids = r_df['product_id'].tolist()
+        missing_in_store = [x['name'] for x in found_items if x['product_id'] not in retailer_pids]
+        found_names = [x['name'] for x in found_items if x['product_id'] in retailer_pids]
+        retailer_stats.append({
+            "retailer": retailer,
+            "total_price": float(total_price),
+            "item_count": found_count,
+            "coverage_percent": (found_count / len(found_items)) * 100,
+            "missing": missing_in_store,
+            "found_items": found_names
+        })
+    # 3. Sort by: Coverage (Desc), then Price (Asc)
+    retailer_stats.sort(key=lambda x: (-x['coverage_percent'], x['total_price']))
+    best_option = retailer_stats[0] if retailer_stats else None
+    return {
+        "actionable": True,
+        "basket_items": [x['name'] for x in found_items],
+        "global_missing": missing_global,
+        "best_store": best_option,
+        "all_stores": retailer_stats[:3] # Return top 3 for comparison
+    }
+def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
+    """
+    Killer Question: 'How much ZESA do I get for $20?'
+    Uses a simplified tiered logic (Approximation of ZESA tariff).
+    """
+    remaining = amount_usd / 1.06 # Remove 6% levy approx
+    units = 0.0
+    breakdown = []
+    # Tier 1: First 50 units (Cheap)
+    t1 = ZIM_UTILITIES["zesa_step_1"]
+    cost_t1 = t1["limit"] * t1["rate"]
+    if remaining > cost_t1:
+        units += t1["limit"]
+        remaining -= cost_t1
+        breakdown.append(f"First {t1['limit']} units @ ${t1['rate']}")
+        # Tier 2: Next 150
+        t2 = ZIM_UTILITIES["zesa_step_2"]
+        cost_t2 = t2["limit"] * t2["rate"]
+        if remaining > cost_t2:
+            units += t2["limit"]
+            remaining -= cost_t2
+            breakdown.append(f"Next {t2['limit']} units @ ${t2['rate']}")
+            # Tier 3: Balance (Expensive)
+            t3 = ZIM_UTILITIES["zesa_step_3"]
+            bought = remaining / t3["rate"]
+            units += bought
+            breakdown.append(f"Remaining ${(remaining + cost_t1 + cost_t2):.2f} bought {bought:.1f} units @ ${t3['rate']}")
+        else:
+            bought = remaining / t2["rate"]
+            units += bought
+            breakdown.append(f"Balance bought {bought:.1f} units @ ${t2['rate']}")
+    else:
+        bought = remaining / t1["rate"]
+        units += bought
+        breakdown.append(f"All {bought:.1f} units @ ${t1['rate']}")
+    return {
+        "amount_usd": amount_usd,
+        "est_units_kwh": round(units, 1),
+        "breakdown": breakdown,
+        "note": "Estimates include ~6% REA levy. Actual units depend on your last purchase date."
+    }
+def get_product_intelligence(query: str) -> Dict[str, Any]:
+    """
+    Killer Question: 'Is this price reasonable?' / 'Most Popular?'
+    """
+    df = get_market_index()
+    hits = search_products_fuzzy(df[df['is_offer']], query, limit=10)
+    if hits.empty: return {"found": False}
+    # Group by product ID to find the specific product stats
+    best_match_pid = hits.iloc[0]['product_id']
+    product_rows = df[(df['product_id'] == best_match_pid) & (df['is_offer'])]
+    if product_rows.empty: return {"found": False}
+    min_price = product_rows['price'].min()
+    max_price = product_rows['price'].max()
+    avg_price = product_rows['price'].mean()
+    cheapest_row = product_rows.loc[product_rows['price'].idxmin()]
+    return {
+        "found": True,
+        "name": cheapest_row['product_name'],
+        "brand": cheapest_row['brand'],
+        "category": cheapest_row['category'],
+        "view_count": int(cheapest_row['views']),
+        "price_stats": {
+            "min": float(min_price),
+            "max": float(max_price),
+            "avg": float(avg_price),
+            "spread": float(max_price - min_price)
+        },
+        "best_deal": {
+            "retailer": cheapest_row['retailer'],
+            "price": float(min_price)
+        },
+        "all_offers": product_rows[['retailer', 'price']].to_dict('records')
+    }
 # =========================
+# 3. Gemini Context Layer
 # =========================
+def generate_analyst_response(transcript: str) -> Dict[str, Any]:
     """
+    1. Detect Intent (Basket? Utility? Single Item?)
+    2. Run Python Analyst Function.
+    3. Generate Text Response.
     """
+    if not _gemini_client: return {"message": "AI Brain offline."}
+    # Step A: Intent Classification
+    INTENT_PROMPT = """
+    Analyze the user input. Return JSON.
+    Intents:
+    - "BASKET": User has a list of items (e.g. "Oil, bread and rice").
+    - "UTILITY": User asks about ZESA, Fuel, Gas prices or units.
+    - "PRODUCT_INTEL": User asks for "Cheapest X", "Price of X", "Popular X".
+    - "CHAT": General conversation.
+    Output: { "intent": "...", "items": ["..."], "utility_type": "zesa/fuel/gas", "amount": number }
+    """
+    try:
+        resp = _gemini_client.models.generate_content(
+            model=GEMINI_MODEL,
+            contents=INTENT_PROMPT + "\nInput: " + transcript,
+            config=types.GenerateContentConfig(response_mime_type="application/json")
+        )
+        parsed = json.loads(resp.text)
+    except:
+        parsed = {"intent": "CHAT"}
+    intent = parsed.get("intent")
+    data_context = {}
+    # Step B: Execute Analyst Logic
+    if intent == "BASKET":
+        items = parsed.get("items", [])
+        if items:
+            data_context = calculate_basket_optimization(items)
+    elif intent == "UTILITY":
+        u_type = parsed.get("utility_type", "")
+        amt = parsed.get("amount") or 0
+        if "zesa" in u_type and amt > 0:
+            data_context = calculate_zesa_units(float(amt))
+        elif "fuel" in u_type or "petrol" in u_type:
+            rate = ZIM_UTILITIES["fuel_petrol"]
+            data_context = {"type": "Petrol", "rate": rate, "units": amt / rate}
+    elif intent == "PRODUCT_INTEL":
+        items = parsed.get("items", [])
+        if items:
+            data_context = get_product_intelligence(items[0])
+    # Step C: Synthesis (Speak based on Data)
+    SYNTHESIS_PROMPT = f"""
+    You are Jessica, the Pricelyst Analyst.
+    User Input: "{transcript}"
+    ANALYST DATA (Strictly use this):
+    {json.dumps(data_context, indent=2)}
+    If 'actionable' is false or data is empty, suggest what data you need.
+    If basket data exists, summarize: "The best store for your basket is [Retailer] at $[Total]."
+    If ZESA data exists, be precise about units.
+    Keep it helpful and Zimbabwean.
+    """
+    final_resp = _gemini_client.models.generate_content(
+        model=GEMINI_MODEL,
+        contents=SYNTHESIS_PROMPT
+    )
+    return {
+        "intent": intent,
+        "analyst_data": data_context,
+        "message": final_resp.text
+    }
 # =========================
+# 4. Endpoints
 # =========================
 @app.get("/health")
 def health():
+    df = get_market_index()
     return jsonify({
         "ok": True,
+        "offers_indexed": len(df),
+        "api_source": PRICE_API_BASE
     })
+@app.post("/chat")
+def chat():
+    """Text Chat Interface."""
+    body = request.get_json(silent=True) or {}
+    msg = body.get("message", "")
+    pid = body.get("profile_id")
+    if not pid: return jsonify({"ok": False}), 400
+    response_data = generate_analyst_response(msg)
+    # Log interaction
+    if db:
+        db.collection("pricelyst_profiles").document(pid).collection("chat_logs").add({
+            "message": msg,
+            "response": response_data,
+            "ts": datetime.now(timezone.utc).isoformat()
+        })
+    return jsonify({"ok": True, "data": response_data})
 @app.post("/api/call-briefing")
 def call_briefing():
     """
+    Context for ElevenLabs.
+    Crucially: We DO NOT send the whole database. We send Memory + Utilities.
     """
     body = request.get_json(silent=True) or {}
+    pid = body.get("profile_id")
     username = body.get("username")
+    if not pid: return jsonify({"ok": False}), 400
+    prof = {}
+    if db:
+        ref = db.collection("pricelyst_profiles").document(pid)
+        doc = ref.get()
+        if doc.exists:
+            prof = doc.to_dict()
+        else:
+            ref.set({"created_at": datetime.now(timezone.utc).isoformat()})
+    # Simple snapshot
+    kpi_snapshot = {
+        "username": username or prof.get("username", "Friend"),
+        "utilities": ZIM_UTILITIES,
+        "instructions": "You are Jessica. If asked for prices, say you can check the live system. For ZESA/Fuel, use the 'utilities' variable."
     }
     return jsonify({
         "ok": True,
         "memory_summary": prof.get("memory_summary", ""),
+        "kpi_snapshot": json.dumps(kpi_snapshot)
     })
 @app.post("/api/log-call-usage")
 def log_call_usage():
     """
+    Post-Call Processor.
+    1. Update Memory.
+    2. Generate Grounded Shopping Plan.
     """
     body = request.get_json(silent=True) or {}
+    pid = body.get("profile_id")
     transcript = body.get("transcript", "")
+    if not pid: return jsonify({"ok": False}), 400
+    logger.info(f"Processing Call {pid}. Len: {len(transcript)}")
+    # 1. Update Memory (Gemini)
+    if len(transcript) > 20 and db:
         try:
+            prof_ref = db.collection("pricelyst_profiles").document(pid)
+            curr_mem = prof_ref.get().to_dict().get("memory_summary", "")
+            mem_prompt = f"Update this memory summary with new details from the transcript (names, preferences, budget):\nOLD: {curr_mem}\nTRANSCRIPT: {transcript}"
+            resp = _gemini_client.models.generate_content(
+                model=GEMINI_MODEL,
+                contents=mem_prompt
+            )
+            prof_ref.set({"memory_summary": resp.text}, merge=True)
         except Exception as e:
+            logger.error(f"Memory Update Failed: {e}")
+    # 2. Generate Plan (Analyst Engine Integration)
+    # We re-run the Analyst logic specifically for the plan
+    analyst_result = generate_analyst_response(transcript)
+    plan_data = {}
+    if analyst_result.get("intent") == "BASKET" and analyst_result.get("analyst_data", {}).get("actionable"):
+        # We have a valid basket!
+        data = analyst_result["analyst_data"]
+        best = data["best_store"]
+        # Markdown Generation
+        md = f"# Your Shopping Plan\n\n"
+        md += f"**Best Store:** {best['retailer']}\n"
+        md += f"**Total Cost:** ${best['total_price']:.2f} (for {best['item_count']} items)\n\n"
+        md += "| Item | Found? |\n|---|---|\n"
+        for item in data['basket_items']:
+            found = "✅" if item in best['found_items'] else "❌"
+            md += f"| {item} | {found} |\n"
+        if data['global_missing']:
+            md += f"\n**Missing from Market:** {', '.join(data['global_missing'])}"
+        plan_data = {
+            "is_actionable": True,
+            "title": f"Plan: {best['retailer']} (${best['total_price']:.2f})",
+            "markdown_content": md,
+            "items": data['basket_items']
+        }
+        # Save Plan
+        if db:
+            db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").add({
+                **plan_data,
+                "created_at": datetime.now(timezone.utc).isoformat()
+            })
     # 3. Log Call
+    if db:
+        db.collection("pricelyst_profiles").document(pid).collection("call_logs").add({
+            "transcript": transcript,
+            "analyst_result": analyst_result,
+            "ts": datetime.now(timezone.utc).isoformat()
+        })
     return jsonify({
         "ok": True,
         "shopping_plan": plan_data if plan_data.get("is_actionable") else None
     })
+# ––––– Shopping Plan CRUD (Standard) –––––
 @app.get("/api/shopping-plans")
 def list_plans():
     pid = request.args.get("profile_id")
+    if not pid or not db: return jsonify({"ok": False}), 400
     try:
         docs = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans") \
                  .order_by("created_at", direction=firestore.Query.DESCENDING).limit(10).stream()
 @app.delete("/api/shopping-plans/<plan_id>")
 def delete_plan(plan_id):
     pid = request.args.get("profile_id")
+    if not pid or not db: return jsonify({"ok": False}), 400
     try:
         db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document(plan_id).delete()
         return jsonify({"ok": True})
+    except:
+        return jsonify({"ok": False}), 500
 # =========================
 # Main
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    # Pre-warm Cache
     try:
+        get_market_index(force_refresh=True)
     except:
         pass
     app.run(host="0.0.0.0", port=port)