Davidtran99
Deploy backend to Hugging Face Space
faebf07
raw
history blame
5.04 kB
"""
Routing utilities that decide whether a query should hit RAG or stay in small-talk.
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, Optional
class IntentRoute(str, Enum):
"""High-level route for the chatbot pipeline."""
GREETING = "greeting"
SMALL_TALK = "small_talk"
SEARCH = "search"
DOCUMENT_CODE_PATTERNS = [
r"264[-\s]?QD[-\s]?TW",
r"QD[-\s]?69[-\s]?TW",
r"TT[-\s]?02[-\s]?CAND",
r"TT[-\s]?02[-\s]?BIEN[-\s]?SOAN",
r"QUYET[-\s]?DINH[-\s]?69",
r"QUYET[-\s]?DINH[-\s]?264",
r"THONG[-\s]?TU[-\s]?02",
]
SMALL_TALK_PHRASES = [
"mệt quá",
"nhàm chán",
"tâm sự",
"chém gió",
"đang làm gì",
"chuyện trò",
"trò chuyện",
"hỏi chơi thôi",
]
def _has_document_code(query: str) -> bool:
normalized = query.upper()
return any(re.search(pattern, normalized) for pattern in DOCUMENT_CODE_PATTERNS)
def _flag_keywords(query_lower: str) -> Dict[str, bool]:
return {
"greeting": any(
phrase in query_lower for phrase in ["xin chào", "xin chao", "chào", "chao", "hello", "hi"]
),
"fine": any(
kw in query_lower
for kw in ["mức phạt", "phạt", "vi phạm", "đèn đỏ", "nồng độ cồn", "mũ bảo hiểm", "tốc độ"]
),
"procedure": any(
kw in query_lower for kw in ["thủ tục", "thu tuc", "hồ sơ", "ho so", "điều kiện", "dieu kien", "cư trú", "cu tru"]
),
"advisory": any(kw in query_lower for kw in ["cảnh báo", "lua dao", "lừa đảo", "scam", "mạo danh", "thủ đoạn"]),
"office": any(kw in query_lower for kw in ["địa chỉ", "dia chi", "công an", "cong an", "điểm tiếp dân", "số điện thoại"]),
"legal": any(
kw in query_lower
for kw in [
"quyết định",
"thông tư",
"nghị quyết",
"kỷ luật",
"qd 69",
"qd 264",
"thông tư 02",
"điều lệnh",
"văn bản pháp luật",
]
),
"small_talk": any(phrase in query_lower for phrase in SMALL_TALK_PHRASES),
}
@dataclass
class RouteDecision:
route: IntentRoute
intent: str
confidence: float
rationale: str
forced_intent: Optional[str] = None
keyword_flags: Dict[str, bool] = field(default_factory=dict)
def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
"""
Decide how the chatbot should handle the query before invoking RAG.
"""
query_lower = query.lower().strip()
words = query_lower.split()
keyword_flags = _flag_keywords(query_lower)
has_doc_code = _has_document_code(query_lower)
route = IntentRoute.SEARCH
rationale = "default-search"
forced_intent: Optional[str] = None
doc_code_override = False
if has_doc_code and intent != "search_legal":
forced_intent = "search_legal"
rationale = "doc-code-detected"
route = IntentRoute.SEARCH
doc_code_override = True
greeting_candidate = (
len(words) <= 3 and keyword_flags["greeting"] and not any(
keyword_flags[key] for key in ["fine", "procedure", "advisory", "office", "legal"]
)
)
if greeting_candidate and intent == "greeting" and not doc_code_override:
route = IntentRoute.GREETING
rationale = "simple-greeting"
forced_intent = "greeting"
elif (
not doc_code_override
and keyword_flags["small_talk"]
and not any(keyword_flags[key] for key in ["fine", "procedure", "advisory", "office", "legal"])
):
route = IntentRoute.SMALL_TALK
rationale = "small-talk-keywords"
forced_intent = "general_query"
elif not doc_code_override and (intent == "general_query" or confidence < 0.55):
# Generic small talk / low confidence
route = IntentRoute.SMALL_TALK
rationale = "general-or-low-confidence"
if route != IntentRoute.GREETING and not doc_code_override:
keyword_force_map = [
("legal", "search_legal"),
("fine", "search_fine"),
("procedure", "search_procedure"),
("advisory", "search_advisory"),
("office", "search_office"),
]
for flag, target_intent in keyword_force_map:
if forced_intent:
break
if keyword_flags.get(flag) and intent != target_intent:
forced_intent = target_intent
route = IntentRoute.SEARCH
rationale = f"keyword-override-{flag}"
break
return RouteDecision(
route=route,
intent=intent,
confidence=confidence,
rationale=rationale,
forced_intent=forced_intent,
keyword_flags=keyword_flags,
)