Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub
Browse files
backend/hue_portal/chatbot/slow_path_handler.py
CHANGED
|
@@ -62,7 +62,7 @@ class SlowPathHandler:
|
|
| 62 |
selected_document_code_normalized = (
|
| 63 |
selected_document_code.strip().upper() if selected_document_code else None
|
| 64 |
)
|
| 65 |
-
|
| 66 |
# Handle greetings
|
| 67 |
if intent == "greeting":
|
| 68 |
query_lower = query.lower().strip()
|
|
@@ -80,7 +80,81 @@ class SlowPathHandler:
|
|
| 80 |
"count": 0,
|
| 81 |
"_source": "slow_path"
|
| 82 |
}
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
# Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
|
| 85 |
search_result = self._search_by_intent(
|
| 86 |
intent,
|
|
@@ -89,19 +163,6 @@ class SlowPathHandler:
|
|
| 89 |
preferred_document_code=selected_document_code_normalized,
|
| 90 |
) # Balance: 15 for good recall, not too slow
|
| 91 |
|
| 92 |
-
if intent == "search_legal":
|
| 93 |
-
clarification = self._maybe_request_clarification(
|
| 94 |
-
query=query,
|
| 95 |
-
search_result=search_result,
|
| 96 |
-
selected_document_code=selected_document_code_normalized,
|
| 97 |
-
)
|
| 98 |
-
if clarification:
|
| 99 |
-
clarification.setdefault("intent", intent)
|
| 100 |
-
clarification.setdefault("_source", "clarification")
|
| 101 |
-
clarification.setdefault("routing", "clarification")
|
| 102 |
-
clarification.setdefault("confidence", 0.3)
|
| 103 |
-
return clarification
|
| 104 |
-
|
| 105 |
# Fast path for high-confidence legal queries (skip for complex queries)
|
| 106 |
fast_path_response = None
|
| 107 |
if intent == "search_legal" and not self._is_complex_query(query):
|
|
|
|
| 62 |
selected_document_code_normalized = (
|
| 63 |
selected_document_code.strip().upper() if selected_document_code else None
|
| 64 |
)
|
| 65 |
+
|
| 66 |
# Handle greetings
|
| 67 |
if intent == "greeting":
|
| 68 |
query_lower = query.lower().strip()
|
|
|
|
| 80 |
"count": 0,
|
| 81 |
"_source": "slow_path"
|
| 82 |
}
|
| 83 |
+
|
| 84 |
+
# Wizard / option-first cho mọi câu hỏi pháp lý chung:
|
| 85 |
+
# Nếu:
|
| 86 |
+
# - intent là search_legal
|
| 87 |
+
# - chưa có selected_document_code trong session
|
| 88 |
+
# - trong câu hỏi không ghi rõ mã văn bản
|
| 89 |
+
# Thì: luôn trả về payload options để người dùng chọn văn bản trước,
|
| 90 |
+
# chưa generate câu trả lời chi tiết.
|
| 91 |
+
if (
|
| 92 |
+
intent == "search_legal"
|
| 93 |
+
and not selected_document_code_normalized
|
| 94 |
+
and not self._has_explicit_document_code_in_query(query)
|
| 95 |
+
):
|
| 96 |
+
canonical_candidates: List[Dict[str, Any]] = []
|
| 97 |
+
try:
|
| 98 |
+
canonical_docs = list(
|
| 99 |
+
LegalDocument.objects.filter(
|
| 100 |
+
code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
|
| 101 |
+
)
|
| 102 |
+
)
|
| 103 |
+
for doc in canonical_docs:
|
| 104 |
+
summary = getattr(doc, "summary", "") or ""
|
| 105 |
+
metadata = getattr(doc, "metadata", {}) or {}
|
| 106 |
+
if not summary and isinstance(metadata, dict):
|
| 107 |
+
summary = metadata.get("summary", "")
|
| 108 |
+
canonical_candidates.append(
|
| 109 |
+
{
|
| 110 |
+
"code": doc.code,
|
| 111 |
+
"title": getattr(doc, "title", "") or doc.code,
|
| 112 |
+
"summary": summary,
|
| 113 |
+
"doc_type": getattr(doc, "doc_type", "") or "",
|
| 114 |
+
"section_title": "",
|
| 115 |
+
}
|
| 116 |
+
)
|
| 117 |
+
except Exception as exc:
|
| 118 |
+
logger.warning(
|
| 119 |
+
"[CLARIFICATION] Canonical documents lookup failed, using static list: %s",
|
| 120 |
+
exc,
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
if not canonical_candidates:
|
| 124 |
+
canonical_candidates = [
|
| 125 |
+
{
|
| 126 |
+
"code": "264-QD-TW",
|
| 127 |
+
"title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
|
| 128 |
+
"summary": "",
|
| 129 |
+
"doc_type": "",
|
| 130 |
+
"section_title": "",
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"code": "QD-69-TW",
|
| 134 |
+
"title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
|
| 135 |
+
"summary": "",
|
| 136 |
+
"doc_type": "",
|
| 137 |
+
"section_title": "",
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"code": "TT-02-CAND",
|
| 141 |
+
"title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
|
| 142 |
+
"summary": "",
|
| 143 |
+
"doc_type": "",
|
| 144 |
+
"section_title": "",
|
| 145 |
+
},
|
| 146 |
+
]
|
| 147 |
+
|
| 148 |
+
clarification_payload = self._build_clarification_payload(
|
| 149 |
+
query, canonical_candidates
|
| 150 |
+
)
|
| 151 |
+
if clarification_payload:
|
| 152 |
+
clarification_payload.setdefault("intent", intent)
|
| 153 |
+
clarification_payload.setdefault("_source", "clarification")
|
| 154 |
+
clarification_payload.setdefault("routing", "clarification")
|
| 155 |
+
clarification_payload.setdefault("confidence", 0.3)
|
| 156 |
+
return clarification_payload
|
| 157 |
+
|
| 158 |
# Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
|
| 159 |
search_result = self._search_by_intent(
|
| 160 |
intent,
|
|
|
|
| 163 |
preferred_document_code=selected_document_code_normalized,
|
| 164 |
) # Balance: 15 for good recall, not too slow
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
# Fast path for high-confidence legal queries (skip for complex queries)
|
| 167 |
fast_path_response = None
|
| 168 |
if intent == "search_legal" and not self._is_complex_query(query):
|