Upload backend/hue_portal/chatbot/router.py with huggingface_hub
Browse files
backend/hue_portal/chatbot/router.py
CHANGED
|
@@ -8,6 +8,8 @@ from dataclasses import dataclass, field
|
|
| 8 |
from enum import Enum
|
| 9 |
from typing import Dict, Optional
|
| 10 |
|
|
|
|
|
|
|
| 11 |
|
| 12 |
class IntentRoute(str, Enum):
|
| 13 |
"""High-level route for the chatbot pipeline."""
|
|
@@ -62,8 +64,17 @@ def _flag_keywords(query_lower: str) -> Dict[str, bool]:
|
|
| 62 |
kw in query_lower
|
| 63 |
for kw in [
|
| 64 |
"quyết định",
|
|
|
|
| 65 |
"thông tư",
|
|
|
|
| 66 |
"nghị quyết",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
"kỷ luật",
|
| 68 |
"qd 69",
|
| 69 |
"qd 264",
|
|
@@ -84,6 +95,8 @@ class RouteDecision:
|
|
| 84 |
rationale: str
|
| 85 |
forced_intent: Optional[str] = None
|
| 86 |
keyword_flags: Dict[str, bool] = field(default_factory=dict)
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
|
|
@@ -94,6 +107,7 @@ def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
|
|
| 94 |
words = query_lower.split()
|
| 95 |
keyword_flags = _flag_keywords(query_lower)
|
| 96 |
has_doc_code = _has_document_code(query_lower)
|
|
|
|
| 97 |
|
| 98 |
route = IntentRoute.SEARCH
|
| 99 |
rationale = "default-search"
|
|
@@ -145,6 +159,19 @@ def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
|
|
| 145 |
rationale = f"keyword-override-{flag}"
|
| 146 |
break
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return RouteDecision(
|
| 149 |
route=route,
|
| 150 |
intent=intent,
|
|
@@ -152,5 +179,23 @@ def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
|
|
| 152 |
rationale=rationale,
|
| 153 |
forced_intent=forced_intent,
|
| 154 |
keyword_flags=keyword_flags,
|
|
|
|
|
|
|
| 155 |
)
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from enum import Enum
|
| 9 |
from typing import Dict, Optional
|
| 10 |
|
| 11 |
+
from hue_portal.chatbot.document_topics import DOCUMENT_TOPICS
|
| 12 |
+
|
| 13 |
|
| 14 |
class IntentRoute(str, Enum):
|
| 15 |
"""High-level route for the chatbot pipeline."""
|
|
|
|
| 64 |
kw in query_lower
|
| 65 |
for kw in [
|
| 66 |
"quyết định",
|
| 67 |
+
"quyet dinh",
|
| 68 |
"thông tư",
|
| 69 |
+
"thong tu",
|
| 70 |
"nghị quyết",
|
| 71 |
+
"nghi quyet",
|
| 72 |
+
"nghị định",
|
| 73 |
+
"nghi dinh",
|
| 74 |
+
"luật",
|
| 75 |
+
"luat",
|
| 76 |
+
"điều ",
|
| 77 |
+
"dieu ",
|
| 78 |
"kỷ luật",
|
| 79 |
"qd 69",
|
| 80 |
"qd 264",
|
|
|
|
| 95 |
rationale: str
|
| 96 |
forced_intent: Optional[str] = None
|
| 97 |
keyword_flags: Dict[str, bool] = field(default_factory=dict)
|
| 98 |
+
clarification_required: bool = False
|
| 99 |
+
topic_scores: Dict[str, float] = field(default_factory=dict)
|
| 100 |
|
| 101 |
|
| 102 |
def decide_route(query: str, intent: str, confidence: float) -> RouteDecision:
|
|
|
|
| 107 |
words = query_lower.split()
|
| 108 |
keyword_flags = _flag_keywords(query_lower)
|
| 109 |
has_doc_code = _has_document_code(query_lower)
|
| 110 |
+
topic_scores = _compute_topic_scores(query_lower)
|
| 111 |
|
| 112 |
route = IntentRoute.SEARCH
|
| 113 |
rationale = "default-search"
|
|
|
|
| 159 |
rationale = f"keyword-override-{flag}"
|
| 160 |
break
|
| 161 |
|
| 162 |
+
clarification_required = False
|
| 163 |
+
should_consider_legal = (
|
| 164 |
+
(forced_intent == "search_legal") or (intent == "search_legal") or keyword_flags["legal"]
|
| 165 |
+
)
|
| 166 |
+
if (
|
| 167 |
+
route == IntentRoute.SEARCH
|
| 168 |
+
and should_consider_legal
|
| 169 |
+
and not has_doc_code
|
| 170 |
+
and not forced_intent == "greeting"
|
| 171 |
+
):
|
| 172 |
+
if topic_scores or confidence < 0.5:
|
| 173 |
+
clarification_required = True
|
| 174 |
+
|
| 175 |
return RouteDecision(
|
| 176 |
route=route,
|
| 177 |
intent=intent,
|
|
|
|
| 179 |
rationale=rationale,
|
| 180 |
forced_intent=forced_intent,
|
| 181 |
keyword_flags=keyword_flags,
|
| 182 |
+
clarification_required=clarification_required,
|
| 183 |
+
topic_scores=topic_scores,
|
| 184 |
)
|
| 185 |
|
| 186 |
+
|
| 187 |
+
def _compute_topic_scores(query_lower: str) -> Dict[str, float]:
|
| 188 |
+
scores: Dict[str, float] = {}
|
| 189 |
+
for topic in DOCUMENT_TOPICS:
|
| 190 |
+
keywords = topic.get("keywords", [])
|
| 191 |
+
score = 0.0
|
| 192 |
+
for keyword in keywords:
|
| 193 |
+
normalized_kw = keyword.lower()
|
| 194 |
+
if not normalized_kw:
|
| 195 |
+
continue
|
| 196 |
+
if normalized_kw in query_lower:
|
| 197 |
+
score += 1.5 if len(normalized_kw) > 5 else 1.0
|
| 198 |
+
if score > 0:
|
| 199 |
+
scores[topic["code"].upper()] = score
|
| 200 |
+
return scores
|
| 201 |
+
|