Youmnaaaa commited on
Commit
a104191
ยท
verified ยท
1 Parent(s): 9d217dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -10
app.py CHANGED
@@ -58,6 +58,7 @@ intent_tokenizer = intent_model = label_encoder = id2intent = None
58
  ner_pipeline = label2id = id2label = None
59
  semantic_model = corpus_df = corpus_embeddings = places_df = None
60
  SESSIONS: dict = {}
 
61
  STARTUP_COMPLETE: bool = False
62
  _SESSION_TTL_SEC = 3600 # session ุจุชุชู…ุณุญ ุจุนุฏ ุณุงุนุฉ ู…ู† ุบูŠุฑ ุงุณุชุฎุฏุงู…
63
 
@@ -1582,7 +1583,7 @@ def chat(text: str, session, user_lat=None, user_lon=None):
1582
  _LOC_REQ = ["ู…ูˆู‚ุนูŠ","ู„ูˆูƒูŠุดู† ุจุชุงุนูŠ","ุงุณุชุฎุฏู… ู…ูˆู‚ุนูŠ","ุชุนุฑู ุชุฌูŠุจ ู„ูˆูƒูŠุดู†",
1583
  "ู…ูˆู‚ุนูƒ","location ุจุชุงุนูŠ","gps"]
1584
  if any(w in t_norm for w in _LOC_REQ):
1585
- if user_lat and user_lon:
1586
  reply = "ุชู…ุงู…ุŒ ู‡ุณุชุฎุฏู… ู…ูˆู‚ุนูƒ ุนุดุงู† ุฃุฌูŠุจ ุงู„ุฃู‚ุฑุจ ู„ูŠูƒ. ๐Ÿ“"
1587
  else:
1588
  reply = "ู…ุญุชุงุฌ ุชุณู…ุญู„ูŠ ุจุงู„ู„ูˆูƒูŠุดู† ู…ู† ุงู„ุชุทุจูŠู‚ ุงู„ุฃูˆู„. ๐Ÿ“"
@@ -1759,7 +1760,7 @@ def chat(text: str, session, user_lat=None, user_lon=None):
1759
 
1760
  # โ”€โ”€ proximity query: ู„ูˆ ู…ููŠุด lat/lon โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1761
  t_low = norm(text)
1762
- if _is_proximity_query(text) and not (user_lat and user_lon):
1763
  reply = ("๐Ÿ“ ุนุดุงู† ุฃุญุฏุฏ ุงู„ุฃู‚ุฑุจ ู…ุญุชุงุฌ ุชุณู…ุญู„ูŠ ุจุงู„ู„ูˆูƒูŠุดู†.\n"
1764
  "ุฃูˆ ู‚ูˆู„ูŠ ุงู„ู…ู†ุทู‚ุฉ ุงู„ู„ูŠ ุฃู†ุช ููŠู‡ุง.")
1765
  result.update(reply=reply, intent="missing_info",
@@ -1956,17 +1957,69 @@ _ITEM_SEARCH_PATTERNS = [
1956
  ]
1957
 
1958
  def _looks_like_item_search(text: str) -> bool:
1959
- """Detect item/menu-item search even if the item word is not in our keywords."""
 
 
 
 
 
 
 
1960
  t = norm(text)
1961
- # menu display is different from item search
1962
- if any(kw in t for kw in _MENU_TRIGGERS) and not any(p in t for p in ["ุนู†ุฏู‡", "ุนู†ุฏู‡ุง", "ุจูŠุนู…ู„", "ุจุชุนู…ู„", "ุจูŠู‚ุฏู…", "ุจุชู‚ุฏู…", "ููŠู‡", "ููŠู‡ุง"]):
 
 
 
 
 
 
1963
  return False
1964
- has_item_pattern = any(norm(p) in t for p in _ITEM_SEARCH_PATTERNS)
1965
- category_context = infer_category(text) in ("restaurant", "cafe")
1966
- has_item_keyword = any(norm(kw) in t for kw in _ITEM_TRIGGERS)
1967
  candidate = extract_item_query(text)
1968
- # ู„ูˆ ููŠู‡ ุตูŠุบุฉ ุทู„ุจ + ุจุงู‚ูŠ ู†ุต ุจุนุฏ ุงู„ุดูŠู„ ูŠุจู‚ู‰ ุบุงู„ุจู‹ุง ุตู†ูุŒ ุญุชู‰ ู„ูˆ ู…ุด ููŠ keywords
1969
- return bool(has_item_keyword or (has_item_pattern and candidate) or (category_context and has_item_pattern and candidate))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1970
 
1971
  def _detect_new_intent(text: str) -> str | None:
1972
  """
@@ -2329,6 +2382,97 @@ def get_place_menu(place_id: str) -> list[dict]:
2329
  log.error(f"โŒ get_menu error: {e}")
2330
  return []
2331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2332
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
2333
  # โ”€โ”€โ”€ RESPONSE FORMATTERS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
2334
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•๏ฟฝ๏ฟฝโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
@@ -2470,6 +2614,11 @@ def handle_search_by_item(text: str, session, user_lat=None, user_lon=None) -> d
2470
  places = search_places_by_item(item_query)
2471
  places = filter_item_results_strict(places, item_query)
2472
 
 
 
 
 
 
2473
  if not places:
2474
  result = {
2475
  "reply": f"ู…ุด ู„ุงู‚ูŠ ุฃู…ุงูƒู† ุจุชู‚ุฏู… {item_query} ุญุงู„ูŠู‹ุง ๐Ÿ˜”",
@@ -2845,3 +2994,4 @@ def reset_session(session_id: str):
2845
  SESSIONS.pop(session_id, None)
2846
  log.info(f"๐Ÿ—‘๏ธ Session {session_id} reset")
2847
  return {"status": "reset", "session_id": session_id}
 
 
58
  ner_pipeline = label2id = id2label = None
59
  semantic_model = corpus_df = corpus_embeddings = places_df = None
60
  SESSIONS: dict = {}
61
+ MENU_CACHE: dict = {}
62
  STARTUP_COMPLETE: bool = False
63
  _SESSION_TTL_SEC = 3600 # session ุจุชุชู…ุณุญ ุจุนุฏ ุณุงุนุฉ ู…ู† ุบูŠุฑ ุงุณุชุฎุฏุงู…
64
 
 
1583
  _LOC_REQ = ["ู…ูˆู‚ุนูŠ","ู„ูˆูƒูŠุดู† ุจุชุงุนูŠ","ุงุณุชุฎุฏู… ู…ูˆู‚ุนูŠ","ุชุนุฑู ุชุฌูŠุจ ู„ูˆูƒูŠุดู†",
1584
  "ู…ูˆู‚ุนูƒ","location ุจุชุงุนูŠ","gps"]
1585
  if any(w in t_norm for w in _LOC_REQ):
1586
+ if user_lat is not None and user_lon is not None:
1587
  reply = "ุชู…ุงู…ุŒ ู‡ุณุชุฎุฏู… ู…ูˆู‚ุนูƒ ุนุดุงู† ุฃุฌูŠุจ ุงู„ุฃู‚ุฑุจ ู„ูŠูƒ. ๐Ÿ“"
1588
  else:
1589
  reply = "ู…ุญุชุงุฌ ุชุณู…ุญู„ูŠ ุจุงู„ู„ูˆูƒูŠุดู† ู…ู† ุงู„ุชุทุจูŠู‚ ุงู„ุฃูˆู„. ๐Ÿ“"
 
1760
 
1761
  # โ”€โ”€ proximity query: ู„ูˆ ู…ููŠุด lat/lon โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1762
  t_low = norm(text)
1763
+ if _is_proximity_query(text) and not (user_lat is not None and user_lon is not None):
1764
  reply = ("๐Ÿ“ ุนุดุงู† ุฃุญุฏุฏ ุงู„ุฃู‚ุฑุจ ู…ุญุชุงุฌ ุชุณู…ุญู„ูŠ ุจุงู„ู„ูˆูƒูŠุดู†.\n"
1765
  "ุฃูˆ ู‚ูˆู„ูŠ ุงู„ู…ู†ุทู‚ุฉ ุงู„ู„ูŠ ุฃู†ุช ููŠู‡ุง.")
1766
  result.update(reply=reply, intent="missing_info",
 
1957
  ]
1958
 
1959
  def _looks_like_item_search(text: str) -> bool:
1960
+ """
1961
+ Detect item/menu-item search without stealing normal category requests.
1962
+
1963
+ Important fixes:
1964
+ - "ุนุงูŠุฒุฉ ุณูˆุจุฑู…ุงุฑูƒุช" is nearest_supermarket, not search_by_item.
1965
+ - "ุนุงูŠุฒุฉ ุงุญุณู† ู…ุทุนู…" is recommendation/search, not item_query=ุงุญุณู†.
1966
+ - "ุนุงูŠุฒุฉ ุจุฑุฌุฑ" / "ู…ูƒุงู† ุนู†ุฏู‡ ุงุณู…ูˆุฒูŠ ุฎูˆุฎ" stays search_by_item.
1967
+ """
1968
  t = norm(text)
1969
+
1970
+ # menu display is different from item search unless the user asks for a place that has an item
1971
+ item_context_words = [
1972
+ "ุนู†ุฏู‡", "ุนู†ุฏู‡ุง", "ุจูŠุนู…ู„", "ุจุชุนู…ู„", "ุจูŠู‚ุฏู…", "ุจุชู‚ุฏู…",
1973
+ "ููŠู‡", "ููŠู‡ุง", "ู…ูƒุงู† ุนู†ุฏู‡", "ู…ูƒุงู† ุนู†ุฏู‡ุง", "ู…ุทุนู… ุนู†ุฏู‡", "ูƒุงููŠู‡ ุนู†ุฏู‡",
1974
+ "has", "serves", "serve"
1975
+ ]
1976
+ if any(kw in t for kw in _MENU_TRIGGERS) and not any(norm(p) in t for p in item_context_words):
1977
  return False
1978
+
1979
+ inferred_cat = infer_category(text)
 
1980
  candidate = extract_item_query(text)
1981
+ cand_clean = clean_text(candidate or "")
1982
+
1983
+ # If the remaining candidate is only a category or a ranking adjective, do NOT treat it as food item.
1984
+ category_words = set()
1985
+ for words in CATEGORY_KEYWORDS.values():
1986
+ category_words.update(clean_text(w) for w in words)
1987
+ category_words.update(clean_text(k) for k in _CAT_MAP.keys())
1988
+
1989
+ ranking_words = {
1990
+ "ุงุญุณู†", "ุงูุถู„", "ุงู„ุงุญุณู†", "ุงู„ุงูุถู„", "ุฃุญุณู†", "ุฃูุถู„",
1991
+ "ูƒูˆูŠุณ", "ูƒูˆูŠุณุฉ", "ุญู„ูˆ", "ุญู„ูˆุฉ", "ุชุฑุดูŠุญ", "ุฑุดุญู„ูŠ",
1992
+ "best", "good", "recommended", "recommend"
1993
+ }
1994
+ ranking_words = {clean_text(w) for w in ranking_words}
1995
+
1996
+ cand_tokens = [w for w in cand_clean.split() if w]
1997
+ if cand_tokens and all((w in category_words or w in ranking_words) for w in cand_tokens):
1998
+ return False
1999
+
2000
+ # Pure category requests must go to nearest_* intents, not item search.
2001
+ if inferred_cat in ("supermarket", "pharmacy", "housing"):
2002
+ return False
2003
+ if inferred_cat in ("restaurant", "cafe") and cand_tokens and all(w in category_words or w in ranking_words for w in cand_tokens):
2004
+ return False
2005
+
2006
+ has_item_keyword = any(norm(kw) in t for kw in _ITEM_TRIGGERS)
2007
+ has_item_pattern = any(norm(p) in t for p in _ITEM_SEARCH_PATTERNS)
2008
+ has_strong_item_context = any(norm(p) in t for p in item_context_words)
2009
+
2010
+ # Known item names are enough: "ุนุงูŠุฒุฉ ุจุฑุฌุฑ".
2011
+ if has_item_keyword:
2012
+ return True
2013
+
2014
+ # Unknown item names need strong item context: "ู…ูƒุงู† ุนู†ุฏู‡ ูุชุฉ" / "ูƒุงููŠู‡ ุจูŠุนู…ู„ ู…ูˆู‡ูŠ๏ฟฝ๏ฟฝูˆ".
2015
+ if has_strong_item_context and candidate and len(cand_clean) >= 3:
2016
+ return True
2017
+
2018
+ # Avoid broad patterns like "ุนุงูŠุฒุฉ" stealing normal searches.
2019
+ if has_item_pattern and inferred_cat in ("restaurant", "cafe") and candidate and len(cand_clean) >= 3:
2020
+ return False
2021
+
2022
+ return False
2023
 
2024
  def _detect_new_intent(text: str) -> str | None:
2025
  """
 
2382
  log.error(f"โŒ get_menu error: {e}")
2383
  return []
2384
 
2385
+
2386
+
2387
+ def _place_id_for_menu(row: dict) -> str | None:
2388
+ """Pick a usable id for /places/{place_id}/menu from mixed DB/API rows."""
2389
+ for k in ("place_id", "id", "placeId"):
2390
+ v = row.get(k) if isinstance(row, dict) else None
2391
+ if v is not None and str(v).strip() and str(v).strip().lower() != "nan":
2392
+ return str(v).strip()
2393
+ return None
2394
+
2395
+
2396
+ def _get_menu_cached(place_id: str) -> list[dict]:
2397
+ """Small in-memory cache so item search does not re-download menus every turn."""
2398
+ if not place_id:
2399
+ return []
2400
+ if place_id in MENU_CACHE:
2401
+ return MENU_CACHE[place_id]
2402
+ items = get_place_menu(place_id)
2403
+ MENU_CACHE[place_id] = items
2404
+ return items
2405
+
2406
+
2407
+ def _infer_item_kind(item_query: str) -> str:
2408
+ """Return food/drink/unknown without depending on a closed keyword list only."""
2409
+ q = clean_text(item_query)
2410
+ drink_hints = {
2411
+ "ู‚ู‡ูˆู‡", "ู‚ู‡ูˆุฉ", "ูƒูˆููŠ", "ู„ุงุชูŠู‡", "ุงุณุจุฑูŠุณูˆ", "ู†ุณูƒุงููŠู‡", "ูƒุงุจุชุดูŠู†ูˆ",
2412
+ "ุนุตูŠุฑ", "ู…ุดุฑูˆุจ", "ู…ุดุฑูˆุจุงุช", "ู…ูˆู‡ูŠุชูˆ", "ู…ูŠู„ูƒ", "ุดูŠูƒ", "ุงูŠุณ", "ุขูŠุณ",
2413
+ "ุณู…ูˆุฒูŠ", "ุงุณู…ูˆุฒูŠ", "ุณู…ูˆุฐูŠ", "smoothie", "juice", "coffee", "latte", "tea", "ุดุงูŠ"
2414
+ }
2415
+ food_hints = {
2416
+ "ุจุฑุฌุฑ", "ุจูŠุชุฒุง", "ุดุงูˆุฑู…ุง", "ูƒุฑูŠุจ", "ูุฑุงุฎ", "ูุฑุฎู‡", "ุฏุฌุงุฌ", "ู…ุดูˆูŠุงุช", "ูƒุจุงุจ",
2417
+ "ูƒูุชู‡", "ูƒูุชุฉ", "ุณู…ูƒ", "ุณู†ุฏูˆุชุด", "ุณุงู†ุฏูˆุชุด", "ุญูˆุงูˆุดูŠ", "ูƒุดุฑูŠ", "ู…ูƒุฑูˆู†ู‡", "ูˆุฌุจู‡",
2418
+ "burger", "pizza", "shawarma", "crepe", "chicken", "grill", "sandwich"
2419
+ }
2420
+ if any(clean_text(w) in q for w in drink_hints):
2421
+ return "drink"
2422
+ if any(clean_text(w) in q for w in food_hints):
2423
+ return "food"
2424
+ return "unknown"
2425
+
2426
+
2427
+ def scan_menus_for_item(item_query: str, max_places_to_scan: int = 120) -> list[dict]:
2428
+ """
2429
+ Last-resort but important fallback:
2430
+ Search inside actual menus when /search-by-item returns empty or too strict.
2431
+ This fixes cases where the item exists in menu but is not present in item keywords
2432
+ or the backend search endpoint does not match the spelling.
2433
+ """
2434
+ if places_df is None or places_df.empty:
2435
+ return []
2436
+
2437
+ kind = _infer_item_kind(item_query)
2438
+ df = places_df.copy()
2439
+
2440
+ if "category_clean" in df.columns:
2441
+ if kind == "food":
2442
+ df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) == "restaurant" or "ู…ุทุนู…" in x)]
2443
+ elif kind == "drink":
2444
+ df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) in ("cafe", "restaurant") or "ูƒุงููŠู‡" in x or "ู…ุทุนู…" in x)]
2445
+ else:
2446
+ # Unknown item: scan likely menu-owning categories only, not pharmacy/supermarket/housing.
2447
+ df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) in ("cafe", "restaurant") or "ูƒุงููŠู‡" in x or "ู…ุทุนู…" in x)]
2448
+
2449
+ matches = []
2450
+ for _, row in df.head(max_places_to_scan).iterrows():
2451
+ place = row.to_dict()
2452
+ pid = _place_id_for_menu(place)
2453
+ if not pid:
2454
+ continue
2455
+ menu_items = _get_menu_cached(pid)
2456
+ matched_items = []
2457
+ for item in menu_items:
2458
+ blob = clean_text(
2459
+ f"{item.get('item_name','')} {item.get('name','')} "
2460
+ f"{item.get('subcategory_name','')} {item.get('sub_category','')} "
2461
+ f"{item.get('description','')}"
2462
+ )
2463
+ if _text_matches_item_query(blob, item_query):
2464
+ matched_items.append(item)
2465
+ if matched_items:
2466
+ place["matched_items"] = matched_items[:5]
2467
+ place["matched_item"] = matched_items[0].get("item_name") or matched_items[0].get("name") or item_query
2468
+ place["matched_item_count"] = len(matched_items)
2469
+ matches.append(place)
2470
+ if len(matches) >= CFG.MAX_CARDS:
2471
+ break
2472
+
2473
+ log.info(f"๐Ÿ”Ž scan_menus_for_item '{item_query}' โ†’ {len(matches)} places")
2474
+ return matches
2475
+
2476
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
2477
  # โ”€โ”€โ”€ RESPONSE FORMATTERS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
2478
  # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•๏ฟฝ๏ฟฝโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
 
2614
  places = search_places_by_item(item_query)
2615
  places = filter_item_results_strict(places, item_query)
2616
 
2617
+ # If the dedicated endpoint/fuzzy search missed an item that actually exists
2618
+ # in menus, scan real menus before saying no_result.
2619
+ if not places:
2620
+ places = scan_menus_for_item(item_query)
2621
+
2622
  if not places:
2623
  result = {
2624
  "reply": f"ู…ุด ู„ุงู‚ูŠ ุฃู…ุงูƒู† ุจุชู‚ุฏู… {item_query} ุญุงู„ูŠู‹ุง ๐Ÿ˜”",
 
2994
  SESSIONS.pop(session_id, None)
2995
  log.info(f"๐Ÿ—‘๏ธ Session {session_id} reset")
2996
  return {"status": "reset", "session_id": session_id}
2997
+