Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
"""
|
| 2 |
-
main.py — Pricelyst Shopping Advisor (Jessica Edition - Grounded Data
|
| 3 |
|
| 4 |
✅ Flask API
|
| 5 |
✅ Firebase Admin persistence
|
| 6 |
-
✅ Gemini via google-genai SDK
|
| 7 |
✅ RAG (Retrieval Augmented Generation) for Shopping Plans
|
|
|
|
| 8 |
✅ Real Pricing Logic (No Hallucinations)
|
| 9 |
-
✅ Backwards Compatible with React Client
|
| 10 |
|
| 11 |
ENV VARS:
|
| 12 |
- GOOGLE_API_KEY=...
|
|
@@ -70,7 +70,6 @@ def init_firestore_from_env() -> firestore.Client:
|
|
| 70 |
return firestore.client()
|
| 71 |
|
| 72 |
if not FIREBASE_ENV:
|
| 73 |
-
# Fallback for local dev if needed, or raise error
|
| 74 |
logger.warning("FIREBASE env var missing. Persistence disabled.")
|
| 75 |
return None
|
| 76 |
|
|
@@ -139,7 +138,7 @@ def _norm_str(s: Any) -> str:
|
|
| 139 |
|
| 140 |
def _safe_json_loads(s: str, fallback: Any):
|
| 141 |
try:
|
| 142 |
-
#
|
| 143 |
if "```json" in s:
|
| 144 |
s = s.split("```json")[1].split("```")[0]
|
| 145 |
elif "```" in s:
|
|
@@ -188,16 +187,6 @@ def update_profile(profile_id: str, patch: Dict[str, Any]) -> None:
|
|
| 188 |
except Exception as e:
|
| 189 |
logger.error("DB Error update_profile: %s", e)
|
| 190 |
|
| 191 |
-
def log_chat(profile_id: str, payload: Dict[str, Any]) -> None:
|
| 192 |
-
if not db: return
|
| 193 |
-
try:
|
| 194 |
-
db.collection("pricelyst_profiles").document(profile_id).collection("chat_logs").add({
|
| 195 |
-
**payload,
|
| 196 |
-
"ts": now_utc_iso()
|
| 197 |
-
})
|
| 198 |
-
except Exception as e:
|
| 199 |
-
logger.error("DB Error log_chat: %s", e)
|
| 200 |
-
|
| 201 |
def log_call(profile_id: str, payload: Dict[str, Any]) -> str:
|
| 202 |
if not db: return str(int(time.time()))
|
| 203 |
try:
|
|
@@ -247,7 +236,6 @@ def flatten_products_to_df(products: List[Dict[str, Any]]) -> pd.DataFrame:
|
|
| 247 |
p_id = p.get("id")
|
| 248 |
p_name = p.get("name") or "Unknown"
|
| 249 |
p_desc = p.get("description") or ""
|
| 250 |
-
p_slug = p.get("slug") or ""
|
| 251 |
|
| 252 |
# Get Primary Category
|
| 253 |
cat_name = "General"
|
|
@@ -261,9 +249,8 @@ def flatten_products_to_df(products: List[Dict[str, Any]]) -> pd.DataFrame:
|
|
| 261 |
# Iterate Prices (Real Offers)
|
| 262 |
prices = p.get("prices") or []
|
| 263 |
|
| 264 |
-
#
|
| 265 |
if not prices:
|
| 266 |
-
# Check for base price on product object as fallback
|
| 267 |
base_price = _coerce_float(p.get("price"))
|
| 268 |
if base_price > 0:
|
| 269 |
rows.append({
|
|
@@ -273,7 +260,7 @@ def flatten_products_to_df(products: List[Dict[str, Any]]) -> pd.DataFrame:
|
|
| 273 |
"description": p_desc,
|
| 274 |
"category": cat_name,
|
| 275 |
"brand": brand_name,
|
| 276 |
-
"retailer": "Pricelyst Base",
|
| 277 |
"price": base_price,
|
| 278 |
"image": p.get("thumbnail") or p.get("image"),
|
| 279 |
})
|
|
@@ -307,12 +294,11 @@ def get_data_index(force_refresh: bool = False) -> pd.DataFrame:
|
|
| 307 |
"""Singleton accessor for the product Dataframe."""
|
| 308 |
global _product_cache
|
| 309 |
|
| 310 |
-
# Refresh if empty or stale
|
| 311 |
is_stale = (time.time() - _product_cache["ts"]) > PRODUCT_CACHE_TTL_SEC
|
| 312 |
if force_refresh or is_stale or _product_cache["df_offers"].empty:
|
| 313 |
logger.info("Refreshing Product Index...")
|
| 314 |
try:
|
| 315 |
-
raw_products = fetch_products(max_pages=15)
|
| 316 |
df = flatten_products_to_df(raw_products)
|
| 317 |
|
| 318 |
_product_cache["ts"] = time.time()
|
|
@@ -321,6 +307,9 @@ def get_data_index(force_refresh: bool = False) -> pd.DataFrame:
|
|
| 321 |
logger.info(f"Index Refreshed: {len(df)} offers from {len(raw_products)} products.")
|
| 322 |
except Exception as e:
|
| 323 |
logger.error(f"Failed to refresh index: {e}")
|
|
|
|
|
|
|
|
|
|
| 324 |
|
| 325 |
return _product_cache["df_offers"]
|
| 326 |
|
|
@@ -331,32 +320,23 @@ def get_data_index(force_refresh: bool = False) -> pd.DataFrame:
|
|
| 331 |
def search_index(df: pd.DataFrame, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
| 332 |
"""
|
| 333 |
Search the DF using token overlap + substring matching.
|
| 334 |
-
Returns best distinct product matches with their best price.
|
| 335 |
"""
|
| 336 |
if df.empty: return []
|
| 337 |
|
| 338 |
q_norm = _norm_str(query)
|
| 339 |
q_tokens = set(q_norm.split())
|
| 340 |
|
| 341 |
-
# 1. Exact Substring Filter (Fast)
|
| 342 |
-
# matching_rows = df[df['clean_name'].str.contains(q_norm, regex=False)]
|
| 343 |
-
|
| 344 |
-
# 2. Token Overlap Scoring (Better for "Cooking Oil" -> "Olivine Cooking Oil")
|
| 345 |
-
# We calculate a score 0-1 based on how many query tokens exist in product name
|
| 346 |
-
|
| 347 |
def score_text(text):
|
| 348 |
if not isinstance(text, str): return 0
|
| 349 |
text_tokens = set(text.split())
|
| 350 |
if not text_tokens: return 0
|
| 351 |
intersection = q_tokens.intersection(text_tokens)
|
| 352 |
-
return len(intersection) / len(q_tokens)
|
| 353 |
|
| 354 |
-
# Copy to avoid warnings (this is in-memory, acceptable for <10k rows)
|
| 355 |
-
# For performance at scale, use vector DB or proper Search engine
|
| 356 |
temp_df = df.copy()
|
| 357 |
temp_df['score'] = temp_df['clean_name'].apply(score_text)
|
| 358 |
|
| 359 |
-
# Filter for relevant matches
|
| 360 |
matches = temp_df[ (temp_df['score'] > 0.4) | (temp_df['clean_name'].str.contains(q_norm, regex=False)) ]
|
| 361 |
|
| 362 |
if matches.empty:
|
|
@@ -369,7 +349,7 @@ def search_index(df: pd.DataFrame, query: str, limit: int = 5) -> List[Dict[str,
|
|
| 369 |
# Sort by Score desc, then Price asc
|
| 370 |
matches = matches.sort_values(by=['score', 'price'], ascending=[False, True])
|
| 371 |
|
| 372 |
-
#
|
| 373 |
unique_products = []
|
| 374 |
seen_ids = set()
|
| 375 |
|
|
@@ -391,29 +371,84 @@ def search_index(df: pd.DataFrame, query: str, limit: int = 5) -> List[Dict[str,
|
|
| 391 |
return unique_products
|
| 392 |
|
| 393 |
# =========================
|
| 394 |
-
# Gemini Functions
|
| 395 |
# =========================
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
def gemini_generate_json(system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
|
|
|
| 398 |
if not _gemini_client: return {}
|
| 399 |
try:
|
| 400 |
response = _gemini_client.models.generate_content(
|
| 401 |
model=GEMINI_MODEL,
|
| 402 |
-
contents=
|
| 403 |
-
types.Content(role="user", parts=[
|
| 404 |
-
types.Part.from_text(system_prompt + "\n\n" + user_prompt)
|
| 405 |
-
])
|
| 406 |
-
],
|
| 407 |
config=types.GenerateContentConfig(
|
| 408 |
response_mime_type="application/json",
|
| 409 |
temperature=0.2
|
| 410 |
)
|
| 411 |
)
|
| 412 |
-
return
|
| 413 |
except Exception as e:
|
| 414 |
logger.error(f"Gemini JSON Error: {e}")
|
| 415 |
return {}
|
| 416 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# =========================
|
| 418 |
# Shopping Plan Engine (RAG)
|
| 419 |
# =========================
|
|
@@ -421,7 +456,7 @@ def gemini_generate_json(system_prompt: str, user_prompt: str) -> Dict[str, Any]
|
|
| 421 |
EXTRACT_SYSTEM_PROMPT = """
|
| 422 |
You are a Shopping Assistant Data Extractor.
|
| 423 |
Analyze the transcript and extract a list of shopping items the user implicitly or explicitly wants.
|
| 424 |
-
Return JSON: { "items": [ { "name": "searchable term", "qty": "quantity string"
|
| 425 |
If no items found, return { "items": [] }.
|
| 426 |
"""
|
| 427 |
|
|
@@ -430,29 +465,26 @@ You are Jessica, Pricelyst's Shopping Advisor.
|
|
| 430 |
Generate a shopping plan based on the USER TRANSCRIPT and the DATA CONTEXT provided.
|
| 431 |
|
| 432 |
RULES:
|
| 433 |
-
1. USE REAL DATA: Use the prices and retailers found in DATA CONTEXT.
|
| 434 |
-
2.
|
| 435 |
-
3. FORMAT: Return strict JSON with a 'markdown_content' field containing a professional
|
| 436 |
|
| 437 |
JSON SCHEMA:
|
| 438 |
{
|
| 439 |
"is_actionable": true,
|
| 440 |
-
"title": "
|
| 441 |
"markdown_content": "# Title\n\n..."
|
| 442 |
}
|
| 443 |
"""
|
| 444 |
|
| 445 |
def build_shopping_plan(transcript: str) -> Dict[str, Any]:
|
| 446 |
"""
|
| 447 |
-
RAG Pipeline:
|
| 448 |
-
1. Extract items from text.
|
| 449 |
-
2. Search DB for items.
|
| 450 |
-
3. Generate report using DB results.
|
| 451 |
"""
|
| 452 |
if len(transcript) < 10:
|
| 453 |
return {"is_actionable": False}
|
| 454 |
|
| 455 |
-
#
|
| 456 |
extraction = gemini_generate_json(EXTRACT_SYSTEM_PROMPT, f"TRANSCRIPT:\n{transcript}")
|
| 457 |
items_requested = extraction.get("items", [])
|
| 458 |
|
|
@@ -461,44 +493,35 @@ def build_shopping_plan(transcript: str) -> Dict[str, Any]:
|
|
| 461 |
|
| 462 |
df = get_data_index()
|
| 463 |
|
| 464 |
-
#
|
| 465 |
context_lines = []
|
| 466 |
-
total_est = 0.0
|
| 467 |
|
| 468 |
for item in items_requested:
|
| 469 |
term = item.get("name", "")
|
| 470 |
qty_str = item.get("qty", "1")
|
| 471 |
|
| 472 |
-
# Check
|
| 473 |
ess_key = next((k for k in ZIM_ESSENTIALS if k in term.lower()), None)
|
| 474 |
-
|
| 475 |
if ess_key:
|
| 476 |
data = ZIM_ESSENTIALS[ess_key]
|
| 477 |
-
|
| 478 |
-
context_lines.append(f"- ITEM: {term} (Qty: {qty_str}) | FOUND: TRUE | SOURCE: Market Rate | PRICE: ${price} | RETAILER: {data['retailer']}")
|
| 479 |
-
total_est += price
|
| 480 |
continue
|
| 481 |
|
| 482 |
-
# Search
|
| 483 |
hits = search_index(df, term, limit=1)
|
| 484 |
-
|
| 485 |
if hits:
|
| 486 |
best = hits[0]
|
| 487 |
-
context_lines.append(f"- ITEM: {term}
|
| 488 |
-
total_est += best['price']
|
| 489 |
else:
|
| 490 |
-
context_lines.append(f"- ITEM: {term}
|
| 491 |
|
| 492 |
data_context = "\n".join(context_lines)
|
| 493 |
-
logger.info(f"Plan
|
| 494 |
|
| 495 |
-
#
|
| 496 |
-
final_prompt = f"TRANSCRIPT:\n{transcript}\n\nDATA CONTEXT (Real Prices
|
| 497 |
plan = gemini_generate_json(SYNTHESIS_SYSTEM_PROMPT, final_prompt)
|
| 498 |
|
| 499 |
-
# Add metadata for frontend
|
| 500 |
-
plan["items_found"] = len([l for l in context_lines if "FOUND: TRUE" in l])
|
| 501 |
-
|
| 502 |
return plan
|
| 503 |
|
| 504 |
# =========================
|
|
@@ -511,52 +534,13 @@ def health():
|
|
| 511 |
return jsonify({
|
| 512 |
"ok": True,
|
| 513 |
"ts": now_utc_iso(),
|
| 514 |
-
"db_connected": bool(db),
|
| 515 |
"products_indexed": len(df)
|
| 516 |
})
|
| 517 |
|
| 518 |
-
@app.post("/chat")
|
| 519 |
-
def chat_endpoint():
|
| 520 |
-
"""Text chat endpoint - kept mostly for legacy/debug, similar logic to voice."""
|
| 521 |
-
body = request.get_json(silent=True) or {}
|
| 522 |
-
message = body.get("message", "")
|
| 523 |
-
profile_id = body.get("profile_id")
|
| 524 |
-
|
| 525 |
-
if not profile_id:
|
| 526 |
-
return jsonify({"ok": False, "error": "No profile_id"}), 400
|
| 527 |
-
|
| 528 |
-
# Simple intent check for search
|
| 529 |
-
intent = "chat"
|
| 530 |
-
reply_data = {}
|
| 531 |
-
|
| 532 |
-
if "price" in message.lower() or "find" in message.lower() or len(message.split()) < 5:
|
| 533 |
-
df = get_data_index()
|
| 534 |
-
hits = search_index(df, message, limit=3)
|
| 535 |
-
if hits:
|
| 536 |
-
intent = "product_found"
|
| 537 |
-
reply_data = {
|
| 538 |
-
"type": "product_card",
|
| 539 |
-
"products": hits
|
| 540 |
-
}
|
| 541 |
-
else:
|
| 542 |
-
reply_data = {"message": "I couldn't find that product in our database."}
|
| 543 |
-
else:
|
| 544 |
-
# Gemini Chat fallback
|
| 545 |
-
reply_data = {"message": "I can help you plan your shopping. Tell me what you need!"}
|
| 546 |
-
|
| 547 |
-
log_chat(profile_id, {"message": message, "intent": intent, "reply": reply_data})
|
| 548 |
-
|
| 549 |
-
return jsonify({
|
| 550 |
-
"ok": True,
|
| 551 |
-
"intent": {"actionable": True if intent == "product_found" else False},
|
| 552 |
-
"data": reply_data
|
| 553 |
-
})
|
| 554 |
-
|
| 555 |
@app.post("/api/call-briefing")
|
| 556 |
def call_briefing():
|
| 557 |
"""
|
| 558 |
-
|
| 559 |
-
Provides context (memory, user name, tone) to the AI Agent.
|
| 560 |
"""
|
| 561 |
body = request.get_json(silent=True) or {}
|
| 562 |
profile_id = body.get("profile_id")
|
|
@@ -569,12 +553,12 @@ def call_briefing():
|
|
| 569 |
if username:
|
| 570 |
update_profile(profile_id, {"username": username})
|
| 571 |
|
| 572 |
-
#
|
| 573 |
kpi_data = {
|
| 574 |
-
"username": username or prof.get("username") or "
|
| 575 |
"market_rates": ZIM_ESSENTIALS,
|
| 576 |
-
"tone": "
|
| 577 |
-
"
|
| 578 |
}
|
| 579 |
|
| 580 |
return jsonify({
|
|
@@ -586,10 +570,9 @@ def call_briefing():
|
|
| 586 |
@app.post("/api/log-call-usage")
|
| 587 |
def log_call_usage():
|
| 588 |
"""
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
3. Saves to Firestore.
|
| 593 |
"""
|
| 594 |
body = request.get_json(silent=True) or {}
|
| 595 |
profile_id = body.get("profile_id")
|
|
@@ -600,7 +583,10 @@ def log_call_usage():
|
|
| 600 |
|
| 601 |
logger.info(f"Processing Call for {profile_id}. Transcript Len: {len(transcript)}")
|
| 602 |
|
| 603 |
-
#
|
|
|
|
|
|
|
|
|
|
| 604 |
plan_data = {}
|
| 605 |
plan_id = None
|
| 606 |
|
|
@@ -609,7 +595,6 @@ def log_call_usage():
|
|
| 609 |
plan_data = build_shopping_plan(transcript)
|
| 610 |
|
| 611 |
if plan_data.get("is_actionable"):
|
| 612 |
-
# Persist Plan
|
| 613 |
plan_ref = db.collection("pricelyst_profiles").document(profile_id).collection("shopping_plans").document()
|
| 614 |
plan_data["id"] = plan_ref.id
|
| 615 |
plan_data["created_at"] = now_utc_iso()
|
|
@@ -619,18 +604,13 @@ def log_call_usage():
|
|
| 619 |
except Exception as e:
|
| 620 |
logger.error(f"Plan Gen Error: {e}")
|
| 621 |
|
| 622 |
-
# Log Call
|
| 623 |
log_call(profile_id, {
|
| 624 |
"transcript": transcript,
|
| 625 |
"duration": body.get("duration_seconds"),
|
| 626 |
"plan_id": plan_id
|
| 627 |
})
|
| 628 |
|
| 629 |
-
# Update Counters
|
| 630 |
-
prof = get_profile(profile_id)
|
| 631 |
-
cnt = prof.get("counters", {})
|
| 632 |
-
update_profile(profile_id, {"counters": {"calls": int(cnt.get("calls", 0)) + 1}})
|
| 633 |
-
|
| 634 |
return jsonify({
|
| 635 |
"ok": True,
|
| 636 |
"shopping_plan": plan_data if plan_data.get("is_actionable") else None
|
|
@@ -666,7 +646,7 @@ def delete_plan(plan_id):
|
|
| 666 |
|
| 667 |
if __name__ == "__main__":
|
| 668 |
port = int(os.environ.get("PORT", 7860))
|
| 669 |
-
# Pre-warm cache
|
| 670 |
try:
|
| 671 |
get_data_index(force_refresh=True)
|
| 672 |
except:
|
|
|
|
| 1 |
"""
|
| 2 |
+
main.py — Pricelyst Shopping Advisor (Jessica Edition - Grounded Data & Memory)
|
| 3 |
|
| 4 |
✅ Flask API
|
| 5 |
✅ Firebase Admin persistence
|
| 6 |
+
✅ Gemini via google-genai SDK (Fixed & Robust)
|
| 7 |
✅ RAG (Retrieval Augmented Generation) for Shopping Plans
|
| 8 |
+
✅ Long-Term Memory (Personal Details Extraction)
|
| 9 |
✅ Real Pricing Logic (No Hallucinations)
|
|
|
|
| 10 |
|
| 11 |
ENV VARS:
|
| 12 |
- GOOGLE_API_KEY=...
|
|
|
|
| 70 |
return firestore.client()
|
| 71 |
|
| 72 |
if not FIREBASE_ENV:
|
|
|
|
| 73 |
logger.warning("FIREBASE env var missing. Persistence disabled.")
|
| 74 |
return None
|
| 75 |
|
|
|
|
| 138 |
|
| 139 |
def _safe_json_loads(s: str, fallback: Any):
|
| 140 |
try:
|
| 141 |
+
# Clean potential markdown wrapping
|
| 142 |
if "```json" in s:
|
| 143 |
s = s.split("```json")[1].split("```")[0]
|
| 144 |
elif "```" in s:
|
|
|
|
| 187 |
except Exception as e:
|
| 188 |
logger.error("DB Error update_profile: %s", e)
|
| 189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
def log_call(profile_id: str, payload: Dict[str, Any]) -> str:
|
| 191 |
if not db: return str(int(time.time()))
|
| 192 |
try:
|
|
|
|
| 236 |
p_id = p.get("id")
|
| 237 |
p_name = p.get("name") or "Unknown"
|
| 238 |
p_desc = p.get("description") or ""
|
|
|
|
| 239 |
|
| 240 |
# Get Primary Category
|
| 241 |
cat_name = "General"
|
|
|
|
| 249 |
# Iterate Prices (Real Offers)
|
| 250 |
prices = p.get("prices") or []
|
| 251 |
|
| 252 |
+
# Fallback if no prices found
|
| 253 |
if not prices:
|
|
|
|
| 254 |
base_price = _coerce_float(p.get("price"))
|
| 255 |
if base_price > 0:
|
| 256 |
rows.append({
|
|
|
|
| 260 |
"description": p_desc,
|
| 261 |
"category": cat_name,
|
| 262 |
"brand": brand_name,
|
| 263 |
+
"retailer": "Pricelyst Base",
|
| 264 |
"price": base_price,
|
| 265 |
"image": p.get("thumbnail") or p.get("image"),
|
| 266 |
})
|
|
|
|
| 294 |
"""Singleton accessor for the product Dataframe."""
|
| 295 |
global _product_cache
|
| 296 |
|
|
|
|
| 297 |
is_stale = (time.time() - _product_cache["ts"]) > PRODUCT_CACHE_TTL_SEC
|
| 298 |
if force_refresh or is_stale or _product_cache["df_offers"].empty:
|
| 299 |
logger.info("Refreshing Product Index...")
|
| 300 |
try:
|
| 301 |
+
raw_products = fetch_products(max_pages=15)
|
| 302 |
df = flatten_products_to_df(raw_products)
|
| 303 |
|
| 304 |
_product_cache["ts"] = time.time()
|
|
|
|
| 307 |
logger.info(f"Index Refreshed: {len(df)} offers from {len(raw_products)} products.")
|
| 308 |
except Exception as e:
|
| 309 |
logger.error(f"Failed to refresh index: {e}")
|
| 310 |
+
if isinstance(_product_cache["df_offers"], pd.DataFrame):
|
| 311 |
+
return _product_cache["df_offers"]
|
| 312 |
+
return pd.DataFrame()
|
| 313 |
|
| 314 |
return _product_cache["df_offers"]
|
| 315 |
|
|
|
|
| 320 |
def search_index(df: pd.DataFrame, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
| 321 |
"""
|
| 322 |
Search the DF using token overlap + substring matching.
|
|
|
|
| 323 |
"""
|
| 324 |
if df.empty: return []
|
| 325 |
|
| 326 |
q_norm = _norm_str(query)
|
| 327 |
q_tokens = set(q_norm.split())
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
def score_text(text):
|
| 330 |
if not isinstance(text, str): return 0
|
| 331 |
text_tokens = set(text.split())
|
| 332 |
if not text_tokens: return 0
|
| 333 |
intersection = q_tokens.intersection(text_tokens)
|
| 334 |
+
return len(intersection) / len(q_tokens)
|
| 335 |
|
|
|
|
|
|
|
| 336 |
temp_df = df.copy()
|
| 337 |
temp_df['score'] = temp_df['clean_name'].apply(score_text)
|
| 338 |
|
| 339 |
+
# Filter for relevant matches
|
| 340 |
matches = temp_df[ (temp_df['score'] > 0.4) | (temp_df['clean_name'].str.contains(q_norm, regex=False)) ]
|
| 341 |
|
| 342 |
if matches.empty:
|
|
|
|
| 349 |
# Sort by Score desc, then Price asc
|
| 350 |
matches = matches.sort_values(by=['score', 'price'], ascending=[False, True])
|
| 351 |
|
| 352 |
+
# Unique product logic
|
| 353 |
unique_products = []
|
| 354 |
seen_ids = set()
|
| 355 |
|
|
|
|
| 371 |
return unique_products
|
| 372 |
|
| 373 |
# =========================
|
| 374 |
+
# Gemini Functions (FIXED & ROBUST)
|
| 375 |
# =========================
|
| 376 |
|
| 377 |
+
def gemini_generate_text(system_prompt: str, user_prompt: str) -> str:
|
| 378 |
+
"""Standard text generation."""
|
| 379 |
+
if not _gemini_client: return ""
|
| 380 |
+
try:
|
| 381 |
+
# Simplified call using contents string directly
|
| 382 |
+
response = _gemini_client.models.generate_content(
|
| 383 |
+
model=GEMINI_MODEL,
|
| 384 |
+
contents=system_prompt + "\n\n" + user_prompt,
|
| 385 |
+
config=types.GenerateContentConfig(
|
| 386 |
+
temperature=0.4
|
| 387 |
+
)
|
| 388 |
+
)
|
| 389 |
+
return response.text or ""
|
| 390 |
+
except Exception as e:
|
| 391 |
+
logger.error(f"Gemini Text Error: {e}")
|
| 392 |
+
return ""
|
| 393 |
+
|
| 394 |
def gemini_generate_json(system_prompt: str, user_prompt: str) -> Dict[str, Any]:
|
| 395 |
+
"""JSON generation with strict parsing."""
|
| 396 |
if not _gemini_client: return {}
|
| 397 |
try:
|
| 398 |
response = _gemini_client.models.generate_content(
|
| 399 |
model=GEMINI_MODEL,
|
| 400 |
+
contents=system_prompt + "\n\n" + user_prompt,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
config=types.GenerateContentConfig(
|
| 402 |
response_mime_type="application/json",
|
| 403 |
temperature=0.2
|
| 404 |
)
|
| 405 |
)
|
| 406 |
+
return _safe_json_loads(response.text, {})
|
| 407 |
except Exception as e:
|
| 408 |
logger.error(f"Gemini JSON Error: {e}")
|
| 409 |
return {}
|
| 410 |
|
| 411 |
+
# =========================
|
| 412 |
+
# Long Term Memory Engine
|
| 413 |
+
# =========================
|
| 414 |
+
|
| 415 |
+
MEMORY_SYSTEM_PROMPT = """
|
| 416 |
+
You are the Memory Manager for Jessica, an AI Shopping Assistant.
|
| 417 |
+
Your job is to update the User's "Memory Summary" based on their latest conversation.
|
| 418 |
+
|
| 419 |
+
INPUTS:
|
| 420 |
+
1. Current Memory: The existing summary of what we know about the user.
|
| 421 |
+
2. New Transcript: The latest conversation.
|
| 422 |
+
|
| 423 |
+
GOAL:
|
| 424 |
+
Update the Current Memory to include new details. Focus on:
|
| 425 |
+
- Names (User, Family, Friends)
|
| 426 |
+
- Dietary preferences or allergies
|
| 427 |
+
- Budget habits (e.g., "likes cheap meat", "buys bulk")
|
| 428 |
+
- Life events (e.g., "hosting a braai on Friday", "wife's birthday")
|
| 429 |
+
- Feedback (e.g., "loved the T-bone suggestion")
|
| 430 |
+
|
| 431 |
+
OUTPUT:
|
| 432 |
+
Return ONLY the updated text summary. Keep it concise (max 150 words).
|
| 433 |
+
"""
|
| 434 |
+
|
| 435 |
+
def update_long_term_memory(profile_id: str, transcript: str) -> None:
|
| 436 |
+
"""Updates the user's profile memory summary based on the new call."""
|
| 437 |
+
if len(transcript) < 20: return
|
| 438 |
+
|
| 439 |
+
prof = get_profile(profile_id)
|
| 440 |
+
current_memory = prof.get("memory_summary", "")
|
| 441 |
+
|
| 442 |
+
user_prompt = f"CURRENT MEMORY:\n{current_memory}\n\nNEW TRANSCRIPT:\n{transcript}"
|
| 443 |
+
|
| 444 |
+
try:
|
| 445 |
+
new_memory = gemini_generate_text(MEMORY_SYSTEM_PROMPT, user_prompt)
|
| 446 |
+
if new_memory and len(new_memory) > 10:
|
| 447 |
+
update_profile(profile_id, {"memory_summary": new_memory})
|
| 448 |
+
logger.info(f"Memory updated for {profile_id}")
|
| 449 |
+
except Exception as e:
|
| 450 |
+
logger.error(f"Memory update failed: {e}")
|
| 451 |
+
|
| 452 |
# =========================
|
| 453 |
# Shopping Plan Engine (RAG)
|
| 454 |
# =========================
|
|
|
|
| 456 |
EXTRACT_SYSTEM_PROMPT = """
|
| 457 |
You are a Shopping Assistant Data Extractor.
|
| 458 |
Analyze the transcript and extract a list of shopping items the user implicitly or explicitly wants.
|
| 459 |
+
Return JSON: { "items": [ { "name": "searchable term", "qty": "quantity string" } ] }
|
| 460 |
If no items found, return { "items": [] }.
|
| 461 |
"""
|
| 462 |
|
|
|
|
| 465 |
Generate a shopping plan based on the USER TRANSCRIPT and the DATA CONTEXT provided.
|
| 466 |
|
| 467 |
RULES:
|
| 468 |
+
1. USE REAL DATA: Use the prices and retailers found in DATA CONTEXT.
|
| 469 |
+
2. ESTIMATES: If context says "FOUND: FALSE", use your best guess for Zimbabwe prices and mark as "(Est)".
|
| 470 |
+
3. FORMAT: Return strict JSON with a 'markdown_content' field containing a professional report.
|
| 471 |
|
| 472 |
JSON SCHEMA:
|
| 473 |
{
|
| 474 |
"is_actionable": true,
|
| 475 |
+
"title": "Short Title",
|
| 476 |
"markdown_content": "# Title\n\n..."
|
| 477 |
}
|
| 478 |
"""
|
| 479 |
|
| 480 |
def build_shopping_plan(transcript: str) -> Dict[str, Any]:
|
| 481 |
"""
|
| 482 |
+
RAG Pipeline: Extract -> Search -> Synthesize
|
|
|
|
|
|
|
|
|
|
| 483 |
"""
|
| 484 |
if len(transcript) < 10:
|
| 485 |
return {"is_actionable": False}
|
| 486 |
|
| 487 |
+
# 1. Extract
|
| 488 |
extraction = gemini_generate_json(EXTRACT_SYSTEM_PROMPT, f"TRANSCRIPT:\n{transcript}")
|
| 489 |
items_requested = extraction.get("items", [])
|
| 490 |
|
|
|
|
| 493 |
|
| 494 |
df = get_data_index()
|
| 495 |
|
| 496 |
+
# 2. Retrieval (Grounding)
|
| 497 |
context_lines = []
|
|
|
|
| 498 |
|
| 499 |
for item in items_requested:
|
| 500 |
term = item.get("name", "")
|
| 501 |
qty_str = item.get("qty", "1")
|
| 502 |
|
| 503 |
+
# Check Essentials Fallback
|
| 504 |
ess_key = next((k for k in ZIM_ESSENTIALS if k in term.lower()), None)
|
|
|
|
| 505 |
if ess_key:
|
| 506 |
data = ZIM_ESSENTIALS[ess_key]
|
| 507 |
+
context_lines.append(f"- ITEM: {term} | SOURCE: Market Rate | PRICE: ${data['price']} | RETAILER: {data['retailer']}")
|
|
|
|
|
|
|
| 508 |
continue
|
| 509 |
|
| 510 |
+
# Search DB
|
| 511 |
hits = search_index(df, term, limit=1)
|
|
|
|
| 512 |
if hits:
|
| 513 |
best = hits[0]
|
| 514 |
+
context_lines.append(f"- ITEM: {term} | FOUND: TRUE | PRODUCT: {best['name']} | PRICE: ${best['price']} | RETAILER: {best['retailer']}")
|
|
|
|
| 515 |
else:
|
| 516 |
+
context_lines.append(f"- ITEM: {term} | FOUND: FALSE | NOTE: Needs estimation.")
|
| 517 |
|
| 518 |
data_context = "\n".join(context_lines)
|
| 519 |
+
logger.info(f"Plan Context:\n{data_context}")
|
| 520 |
|
| 521 |
+
# 3. Synthesis
|
| 522 |
+
final_prompt = f"TRANSCRIPT:\n{transcript}\n\nDATA CONTEXT (Real Prices):\n{data_context}"
|
| 523 |
plan = gemini_generate_json(SYNTHESIS_SYSTEM_PROMPT, final_prompt)
|
| 524 |
|
|
|
|
|
|
|
|
|
|
| 525 |
return plan
|
| 526 |
|
| 527 |
# =========================
|
|
|
|
| 534 |
return jsonify({
|
| 535 |
"ok": True,
|
| 536 |
"ts": now_utc_iso(),
|
|
|
|
| 537 |
"products_indexed": len(df)
|
| 538 |
})
|
| 539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
@app.post("/api/call-briefing")
|
| 541 |
def call_briefing():
|
| 542 |
"""
|
| 543 |
+
Returns memory context to the frontend to pass to ElevenLabs.
|
|
|
|
| 544 |
"""
|
| 545 |
body = request.get_json(silent=True) or {}
|
| 546 |
profile_id = body.get("profile_id")
|
|
|
|
| 553 |
if username:
|
| 554 |
update_profile(profile_id, {"username": username})
|
| 555 |
|
| 556 |
+
# Prepare intelligence payload
|
| 557 |
kpi_data = {
|
| 558 |
+
"username": username or prof.get("username") or "Friend",
|
| 559 |
"market_rates": ZIM_ESSENTIALS,
|
| 560 |
+
"tone": "practical_zimbabwe",
|
| 561 |
+
"system_instruction": "You are Jessica. If user asks about 'how was the party?', check 'memory_summary' variable."
|
| 562 |
}
|
| 563 |
|
| 564 |
return jsonify({
|
|
|
|
| 570 |
@app.post("/api/log-call-usage")
|
| 571 |
def log_call_usage():
|
| 572 |
"""
|
| 573 |
+
1. Update Memory (Async logic, effectively)
|
| 574 |
+
2. Generate Shopping Plan (Ground Truth)
|
| 575 |
+
3. Persist Log
|
|
|
|
| 576 |
"""
|
| 577 |
body = request.get_json(silent=True) or {}
|
| 578 |
profile_id = body.get("profile_id")
|
|
|
|
| 583 |
|
| 584 |
logger.info(f"Processing Call for {profile_id}. Transcript Len: {len(transcript)}")
|
| 585 |
|
| 586 |
+
# 1. Update Long Term Memory
|
| 587 |
+
update_long_term_memory(profile_id, transcript)
|
| 588 |
+
|
| 589 |
+
# 2. Generate Plan
|
| 590 |
plan_data = {}
|
| 591 |
plan_id = None
|
| 592 |
|
|
|
|
| 595 |
plan_data = build_shopping_plan(transcript)
|
| 596 |
|
| 597 |
if plan_data.get("is_actionable"):
|
|
|
|
| 598 |
plan_ref = db.collection("pricelyst_profiles").document(profile_id).collection("shopping_plans").document()
|
| 599 |
plan_data["id"] = plan_ref.id
|
| 600 |
plan_data["created_at"] = now_utc_iso()
|
|
|
|
| 604 |
except Exception as e:
|
| 605 |
logger.error(f"Plan Gen Error: {e}")
|
| 606 |
|
| 607 |
+
# 3. Log Call
|
| 608 |
log_call(profile_id, {
|
| 609 |
"transcript": transcript,
|
| 610 |
"duration": body.get("duration_seconds"),
|
| 611 |
"plan_id": plan_id
|
| 612 |
})
|
| 613 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
return jsonify({
|
| 615 |
"ok": True,
|
| 616 |
"shopping_plan": plan_data if plan_data.get("is_actionable") else None
|
|
|
|
| 646 |
|
| 647 |
if __name__ == "__main__":
|
| 648 |
port = int(os.environ.get("PORT", 7860))
|
| 649 |
+
# Pre-warm cache
|
| 650 |
try:
|
| 651 |
get_data_index(force_refresh=True)
|
| 652 |
except:
|