Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,6 +58,7 @@ intent_tokenizer = intent_model = label_encoder = id2intent = None
|
|
| 58 |
ner_pipeline = label2id = id2label = None
|
| 59 |
semantic_model = corpus_df = corpus_embeddings = places_df = None
|
| 60 |
SESSIONS: dict = {}
|
|
|
|
| 61 |
STARTUP_COMPLETE: bool = False
|
| 62 |
_SESSION_TTL_SEC = 3600 # session ุจุชุชู
ุณุญ ุจุนุฏ ุณุงุนุฉ ู
ู ุบูุฑ ุงุณุชุฎุฏุงู
|
| 63 |
|
|
@@ -1582,7 +1583,7 @@ def chat(text: str, session, user_lat=None, user_lon=None):
|
|
| 1582 |
_LOC_REQ = ["ู
ููุนู","ููููุดู ุจุชุงุนู","ุงุณุชุฎุฏู
ู
ููุนู","ุชุนุฑู ุชุฌูุจ ููููุดู",
|
| 1583 |
"ู
ููุนู","location ุจุชุงุนู","gps"]
|
| 1584 |
if any(w in t_norm for w in _LOC_REQ):
|
| 1585 |
-
if user_lat and user_lon:
|
| 1586 |
reply = "ุชู
ุงู
ุ ูุณุชุฎุฏู
ู
ููุนู ุนุดุงู ุฃุฌูุจ ุงูุฃูุฑุจ ููู. ๐"
|
| 1587 |
else:
|
| 1588 |
reply = "ู
ุญุชุงุฌ ุชุณู
ุญูู ุจุงูููููุดู ู
ู ุงูุชุทุจูู ุงูุฃูู. ๐"
|
|
@@ -1759,7 +1760,7 @@ def chat(text: str, session, user_lat=None, user_lon=None):
|
|
| 1759 |
|
| 1760 |
# โโ proximity query: ูู ู
ููุด lat/lon โโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1761 |
t_low = norm(text)
|
| 1762 |
-
if _is_proximity_query(text) and not (user_lat and user_lon):
|
| 1763 |
reply = ("๐ ุนุดุงู ุฃุญุฏุฏ ุงูุฃูุฑุจ ู
ุญุชุงุฌ ุชุณู
ุญูู ุจุงูููููุดู.\n"
|
| 1764 |
"ุฃู ูููู ุงูู
ูุทูุฉ ุงููู ุฃูุช ูููุง.")
|
| 1765 |
result.update(reply=reply, intent="missing_info",
|
|
@@ -1956,17 +1957,69 @@ _ITEM_SEARCH_PATTERNS = [
|
|
| 1956 |
]
|
| 1957 |
|
| 1958 |
def _looks_like_item_search(text: str) -> bool:
|
| 1959 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1960 |
t = norm(text)
|
| 1961 |
-
|
| 1962 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1963 |
return False
|
| 1964 |
-
|
| 1965 |
-
|
| 1966 |
-
has_item_keyword = any(norm(kw) in t for kw in _ITEM_TRIGGERS)
|
| 1967 |
candidate = extract_item_query(text)
|
| 1968 |
-
|
| 1969 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1970 |
|
| 1971 |
def _detect_new_intent(text: str) -> str | None:
|
| 1972 |
"""
|
|
@@ -2329,6 +2382,97 @@ def get_place_menu(place_id: str) -> list[dict]:
|
|
| 2329 |
log.error(f"โ get_menu error: {e}")
|
| 2330 |
return []
|
| 2331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2332 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 2333 |
# โโโ RESPONSE FORMATTERS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 2334 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ๏ฟฝ๏ฟฝโโโโโโโโโโโโโโโโโโโ
|
|
@@ -2470,6 +2614,11 @@ def handle_search_by_item(text: str, session, user_lat=None, user_lon=None) -> d
|
|
| 2470 |
places = search_places_by_item(item_query)
|
| 2471 |
places = filter_item_results_strict(places, item_query)
|
| 2472 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2473 |
if not places:
|
| 2474 |
result = {
|
| 2475 |
"reply": f"ู
ุด ูุงูู ุฃู
ุงูู ุจุชูุฏู
{item_query} ุญุงูููุง ๐",
|
|
@@ -2845,3 +2994,4 @@ def reset_session(session_id: str):
|
|
| 2845 |
SESSIONS.pop(session_id, None)
|
| 2846 |
log.info(f"๐๏ธ Session {session_id} reset")
|
| 2847 |
return {"status": "reset", "session_id": session_id}
|
|
|
|
|
|
| 58 |
ner_pipeline = label2id = id2label = None
|
| 59 |
semantic_model = corpus_df = corpus_embeddings = places_df = None
|
| 60 |
SESSIONS: dict = {}
|
| 61 |
+
MENU_CACHE: dict = {}
|
| 62 |
STARTUP_COMPLETE: bool = False
|
| 63 |
_SESSION_TTL_SEC = 3600 # session ุจุชุชู
ุณุญ ุจุนุฏ ุณุงุนุฉ ู
ู ุบูุฑ ุงุณุชุฎุฏุงู
|
| 64 |
|
|
|
|
| 1583 |
_LOC_REQ = ["ู
ููุนู","ููููุดู ุจุชุงุนู","ุงุณุชุฎุฏู
ู
ููุนู","ุชุนุฑู ุชุฌูุจ ููููุดู",
|
| 1584 |
"ู
ููุนู","location ุจุชุงุนู","gps"]
|
| 1585 |
if any(w in t_norm for w in _LOC_REQ):
|
| 1586 |
+
if user_lat is not None and user_lon is not None:
|
| 1587 |
reply = "ุชู
ุงู
ุ ูุณุชุฎุฏู
ู
ููุนู ุนุดุงู ุฃุฌูุจ ุงูุฃูุฑุจ ููู. ๐"
|
| 1588 |
else:
|
| 1589 |
reply = "ู
ุญุชุงุฌ ุชุณู
ุญูู ุจุงูููููุดู ู
ู ุงูุชุทุจูู ุงูุฃูู. ๐"
|
|
|
|
| 1760 |
|
| 1761 |
# โโ proximity query: ูู ู
ููุด lat/lon โโโโโโโโโโโโโโโโโโโโโโโโ
|
| 1762 |
t_low = norm(text)
|
| 1763 |
+
if _is_proximity_query(text) and not (user_lat is not None and user_lon is not None):
|
| 1764 |
reply = ("๐ ุนุดุงู ุฃุญุฏุฏ ุงูุฃูุฑุจ ู
ุญุชุงุฌ ุชุณู
ุญูู ุจุงูููููุดู.\n"
|
| 1765 |
"ุฃู ูููู ุงูู
ูุทูุฉ ุงููู ุฃูุช ูููุง.")
|
| 1766 |
result.update(reply=reply, intent="missing_info",
|
|
|
|
| 1957 |
]
|
| 1958 |
|
| 1959 |
def _looks_like_item_search(text: str) -> bool:
|
| 1960 |
+
"""
|
| 1961 |
+
Detect item/menu-item search without stealing normal category requests.
|
| 1962 |
+
|
| 1963 |
+
Important fixes:
|
| 1964 |
+
- "ุนุงูุฒุฉ ุณูุจุฑู
ุงุฑูุช" is nearest_supermarket, not search_by_item.
|
| 1965 |
+
- "ุนุงูุฒุฉ ุงุญุณู ู
ุทุนู
" is recommendation/search, not item_query=ุงุญุณู.
|
| 1966 |
+
- "ุนุงูุฒุฉ ุจุฑุฌุฑ" / "ู
ูุงู ุนูุฏู ุงุณู
ูุฒู ุฎูุฎ" stays search_by_item.
|
| 1967 |
+
"""
|
| 1968 |
t = norm(text)
|
| 1969 |
+
|
| 1970 |
+
# menu display is different from item search unless the user asks for a place that has an item
|
| 1971 |
+
item_context_words = [
|
| 1972 |
+
"ุนูุฏู", "ุนูุฏูุง", "ุจูุนู
ู", "ุจุชุนู
ู", "ุจููุฏู
", "ุจุชูุฏู
",
|
| 1973 |
+
"ููู", "ูููุง", "ู
ูุงู ุนูุฏู", "ู
ูุงู ุนูุฏูุง", "ู
ุทุนู
ุนูุฏู", "ูุงููู ุนูุฏู",
|
| 1974 |
+
"has", "serves", "serve"
|
| 1975 |
+
]
|
| 1976 |
+
if any(kw in t for kw in _MENU_TRIGGERS) and not any(norm(p) in t for p in item_context_words):
|
| 1977 |
return False
|
| 1978 |
+
|
| 1979 |
+
inferred_cat = infer_category(text)
|
|
|
|
| 1980 |
candidate = extract_item_query(text)
|
| 1981 |
+
cand_clean = clean_text(candidate or "")
|
| 1982 |
+
|
| 1983 |
+
# If the remaining candidate is only a category or a ranking adjective, do NOT treat it as food item.
|
| 1984 |
+
category_words = set()
|
| 1985 |
+
for words in CATEGORY_KEYWORDS.values():
|
| 1986 |
+
category_words.update(clean_text(w) for w in words)
|
| 1987 |
+
category_words.update(clean_text(k) for k in _CAT_MAP.keys())
|
| 1988 |
+
|
| 1989 |
+
ranking_words = {
|
| 1990 |
+
"ุงุญุณู", "ุงูุถู", "ุงูุงุญุณู", "ุงูุงูุถู", "ุฃุญุณู", "ุฃูุถู",
|
| 1991 |
+
"ูููุณ", "ูููุณุฉ", "ุญูู", "ุญููุฉ", "ุชุฑุดูุญ", "ุฑุดุญูู",
|
| 1992 |
+
"best", "good", "recommended", "recommend"
|
| 1993 |
+
}
|
| 1994 |
+
ranking_words = {clean_text(w) for w in ranking_words}
|
| 1995 |
+
|
| 1996 |
+
cand_tokens = [w for w in cand_clean.split() if w]
|
| 1997 |
+
if cand_tokens and all((w in category_words or w in ranking_words) for w in cand_tokens):
|
| 1998 |
+
return False
|
| 1999 |
+
|
| 2000 |
+
# Pure category requests must go to nearest_* intents, not item search.
|
| 2001 |
+
if inferred_cat in ("supermarket", "pharmacy", "housing"):
|
| 2002 |
+
return False
|
| 2003 |
+
if inferred_cat in ("restaurant", "cafe") and cand_tokens and all(w in category_words or w in ranking_words for w in cand_tokens):
|
| 2004 |
+
return False
|
| 2005 |
+
|
| 2006 |
+
has_item_keyword = any(norm(kw) in t for kw in _ITEM_TRIGGERS)
|
| 2007 |
+
has_item_pattern = any(norm(p) in t for p in _ITEM_SEARCH_PATTERNS)
|
| 2008 |
+
has_strong_item_context = any(norm(p) in t for p in item_context_words)
|
| 2009 |
+
|
| 2010 |
+
# Known item names are enough: "ุนุงูุฒุฉ ุจุฑุฌุฑ".
|
| 2011 |
+
if has_item_keyword:
|
| 2012 |
+
return True
|
| 2013 |
+
|
| 2014 |
+
# Unknown item names need strong item context: "ู
ูุงู ุนูุฏู ูุชุฉ" / "ูุงููู ุจูุนู
ู ู
ููู๏ฟฝ๏ฟฝู".
|
| 2015 |
+
if has_strong_item_context and candidate and len(cand_clean) >= 3:
|
| 2016 |
+
return True
|
| 2017 |
+
|
| 2018 |
+
# Avoid broad patterns like "ุนุงูุฒุฉ" stealing normal searches.
|
| 2019 |
+
if has_item_pattern and inferred_cat in ("restaurant", "cafe") and candidate and len(cand_clean) >= 3:
|
| 2020 |
+
return False
|
| 2021 |
+
|
| 2022 |
+
return False
|
| 2023 |
|
| 2024 |
def _detect_new_intent(text: str) -> str | None:
|
| 2025 |
"""
|
|
|
|
| 2382 |
log.error(f"โ get_menu error: {e}")
|
| 2383 |
return []
|
| 2384 |
|
| 2385 |
+
|
| 2386 |
+
|
| 2387 |
+
def _place_id_for_menu(row: dict) -> str | None:
|
| 2388 |
+
"""Pick a usable id for /places/{place_id}/menu from mixed DB/API rows."""
|
| 2389 |
+
for k in ("place_id", "id", "placeId"):
|
| 2390 |
+
v = row.get(k) if isinstance(row, dict) else None
|
| 2391 |
+
if v is not None and str(v).strip() and str(v).strip().lower() != "nan":
|
| 2392 |
+
return str(v).strip()
|
| 2393 |
+
return None
|
| 2394 |
+
|
| 2395 |
+
|
| 2396 |
+
def _get_menu_cached(place_id: str) -> list[dict]:
|
| 2397 |
+
"""Small in-memory cache so item search does not re-download menus every turn."""
|
| 2398 |
+
if not place_id:
|
| 2399 |
+
return []
|
| 2400 |
+
if place_id in MENU_CACHE:
|
| 2401 |
+
return MENU_CACHE[place_id]
|
| 2402 |
+
items = get_place_menu(place_id)
|
| 2403 |
+
MENU_CACHE[place_id] = items
|
| 2404 |
+
return items
|
| 2405 |
+
|
| 2406 |
+
|
| 2407 |
+
def _infer_item_kind(item_query: str) -> str:
|
| 2408 |
+
"""Return food/drink/unknown without depending on a closed keyword list only."""
|
| 2409 |
+
q = clean_text(item_query)
|
| 2410 |
+
drink_hints = {
|
| 2411 |
+
"ูููู", "ูููุฉ", "ูููู", "ูุงุชูู", "ุงุณุจุฑูุณู", "ูุณูุงููู", "ูุงุจุชุดููู",
|
| 2412 |
+
"ุนุตูุฑ", "ู
ุดุฑูุจ", "ู
ุดุฑูุจุงุช", "ู
ูููุชู", "ู
ููู", "ุดูู", "ุงูุณ", "ุขูุณ",
|
| 2413 |
+
"ุณู
ูุฒู", "ุงุณู
ูุฒู", "ุณู
ูุฐู", "smoothie", "juice", "coffee", "latte", "tea", "ุดุงู"
|
| 2414 |
+
}
|
| 2415 |
+
food_hints = {
|
| 2416 |
+
"ุจุฑุฌุฑ", "ุจูุชุฒุง", "ุดุงูุฑู
ุง", "ูุฑูุจ", "ูุฑุงุฎ", "ูุฑุฎู", "ุฏุฌุงุฌ", "ู
ุดููุงุช", "ูุจุงุจ",
|
| 2417 |
+
"ููุชู", "ููุชุฉ", "ุณู
ู", "ุณูุฏูุชุด", "ุณุงูุฏูุชุด", "ุญูุงูุดู", "ูุดุฑู", "ู
ูุฑููู", "ูุฌุจู",
|
| 2418 |
+
"burger", "pizza", "shawarma", "crepe", "chicken", "grill", "sandwich"
|
| 2419 |
+
}
|
| 2420 |
+
if any(clean_text(w) in q for w in drink_hints):
|
| 2421 |
+
return "drink"
|
| 2422 |
+
if any(clean_text(w) in q for w in food_hints):
|
| 2423 |
+
return "food"
|
| 2424 |
+
return "unknown"
|
| 2425 |
+
|
| 2426 |
+
|
| 2427 |
+
def scan_menus_for_item(item_query: str, max_places_to_scan: int = 120) -> list[dict]:
|
| 2428 |
+
"""
|
| 2429 |
+
Last-resort but important fallback:
|
| 2430 |
+
Search inside actual menus when /search-by-item returns empty or too strict.
|
| 2431 |
+
This fixes cases where the item exists in menu but is not present in item keywords
|
| 2432 |
+
or the backend search endpoint does not match the spelling.
|
| 2433 |
+
"""
|
| 2434 |
+
if places_df is None or places_df.empty:
|
| 2435 |
+
return []
|
| 2436 |
+
|
| 2437 |
+
kind = _infer_item_kind(item_query)
|
| 2438 |
+
df = places_df.copy()
|
| 2439 |
+
|
| 2440 |
+
if "category_clean" in df.columns:
|
| 2441 |
+
if kind == "food":
|
| 2442 |
+
df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) == "restaurant" or "ู
ุทุนู
" in x)]
|
| 2443 |
+
elif kind == "drink":
|
| 2444 |
+
df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) in ("cafe", "restaurant") or "ูุงููู" in x or "ู
ุทุนู
" in x)]
|
| 2445 |
+
else:
|
| 2446 |
+
# Unknown item: scan likely menu-owning categories only, not pharmacy/supermarket/housing.
|
| 2447 |
+
df = df[df["category_clean"].astype(str).apply(lambda x: normalize_category(x) in ("cafe", "restaurant") or "ูุงููู" in x or "ู
ุทุนู
" in x)]
|
| 2448 |
+
|
| 2449 |
+
matches = []
|
| 2450 |
+
for _, row in df.head(max_places_to_scan).iterrows():
|
| 2451 |
+
place = row.to_dict()
|
| 2452 |
+
pid = _place_id_for_menu(place)
|
| 2453 |
+
if not pid:
|
| 2454 |
+
continue
|
| 2455 |
+
menu_items = _get_menu_cached(pid)
|
| 2456 |
+
matched_items = []
|
| 2457 |
+
for item in menu_items:
|
| 2458 |
+
blob = clean_text(
|
| 2459 |
+
f"{item.get('item_name','')} {item.get('name','')} "
|
| 2460 |
+
f"{item.get('subcategory_name','')} {item.get('sub_category','')} "
|
| 2461 |
+
f"{item.get('description','')}"
|
| 2462 |
+
)
|
| 2463 |
+
if _text_matches_item_query(blob, item_query):
|
| 2464 |
+
matched_items.append(item)
|
| 2465 |
+
if matched_items:
|
| 2466 |
+
place["matched_items"] = matched_items[:5]
|
| 2467 |
+
place["matched_item"] = matched_items[0].get("item_name") or matched_items[0].get("name") or item_query
|
| 2468 |
+
place["matched_item_count"] = len(matched_items)
|
| 2469 |
+
matches.append(place)
|
| 2470 |
+
if len(matches) >= CFG.MAX_CARDS:
|
| 2471 |
+
break
|
| 2472 |
+
|
| 2473 |
+
log.info(f"๐ scan_menus_for_item '{item_query}' โ {len(matches)} places")
|
| 2474 |
+
return matches
|
| 2475 |
+
|
| 2476 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 2477 |
# โโโ RESPONSE FORMATTERS โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 2478 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ๏ฟฝ๏ฟฝโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 2614 |
places = search_places_by_item(item_query)
|
| 2615 |
places = filter_item_results_strict(places, item_query)
|
| 2616 |
|
| 2617 |
+
# If the dedicated endpoint/fuzzy search missed an item that actually exists
|
| 2618 |
+
# in menus, scan real menus before saying no_result.
|
| 2619 |
+
if not places:
|
| 2620 |
+
places = scan_menus_for_item(item_query)
|
| 2621 |
+
|
| 2622 |
if not places:
|
| 2623 |
result = {
|
| 2624 |
"reply": f"ู
ุด ูุงูู ุฃู
ุงูู ุจุชูุฏู
{item_query} ุญุงูููุง ๐",
|
|
|
|
| 2994 |
SESSIONS.pop(session_id, None)
|
| 2995 |
log.info(f"๐๏ธ Session {session_id} reset")
|
| 2996 |
return {"status": "reset", "session_id": session_id}
|
| 2997 |
+
|