Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub
Browse files
backend/hue_portal/chatbot/slow_path_handler.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
Slow Path Handler - Full RAG pipeline for complex queries.
|
| 3 |
"""
|
|
|
|
| 4 |
import time
|
| 5 |
import logging
|
| 6 |
from typing import Dict, Any, Optional, List
|
|
@@ -33,7 +34,13 @@ class SlowPathHandler:
|
|
| 33 |
self.chatbot = get_chatbot()
|
| 34 |
self.llm_generator = get_llm_generator()
|
| 35 |
|
| 36 |
-
def handle(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
Full RAG pipeline:
|
| 39 |
1. Search (hybrid: BM25 + vector)
|
|
@@ -51,6 +58,9 @@ class SlowPathHandler:
|
|
| 51 |
Response dict with message, intent, results, etc.
|
| 52 |
"""
|
| 53 |
query = query.strip()
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Handle greetings
|
| 56 |
if intent == "greeting":
|
|
@@ -70,8 +80,26 @@ class SlowPathHandler:
|
|
| 70 |
"_source": "slow_path"
|
| 71 |
}
|
| 72 |
|
| 73 |
-
# Search based on intent - retrieve top-
|
| 74 |
-
search_result = self._search_by_intent(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# Fast path for high-confidence legal queries (skip for complex queries)
|
| 77 |
fast_path_response = None
|
|
@@ -82,17 +110,18 @@ class SlowPathHandler:
|
|
| 82 |
fast_path_response["_source"] = "fast_path"
|
| 83 |
return fast_path_response
|
| 84 |
|
| 85 |
-
# Rerank results
|
| 86 |
-
#
|
| 87 |
-
|
|
|
|
| 88 |
try:
|
| 89 |
# Lazy import to avoid blocking startup (FlagEmbedding may download model)
|
| 90 |
from hue_portal.core.reranker import rerank_documents
|
| 91 |
|
| 92 |
legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
|
| 93 |
if len(legal_results) > 0:
|
| 94 |
-
# Rerank to top-
|
| 95 |
-
top_k = min(
|
| 96 |
reranked = rerank_documents(query, legal_results, top_k=top_k)
|
| 97 |
# Update search_result with reranked results (keep non-legal results)
|
| 98 |
non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
|
|
@@ -106,6 +135,9 @@ class SlowPathHandler:
|
|
| 106 |
)
|
| 107 |
except Exception as e:
|
| 108 |
logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
|
| 111 |
# Chỉ áp dụng cho legal queries có results với score cao
|
|
@@ -202,9 +234,9 @@ class SlowPathHandler:
|
|
| 202 |
# Generate response message using LLM if available and we have documents
|
| 203 |
message = None
|
| 204 |
if self.llm_generator and search_result["count"] > 0:
|
| 205 |
-
# For legal queries, use structured output (
|
| 206 |
if intent == "search_legal" and search_result["results"]:
|
| 207 |
-
legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:
|
| 208 |
if legal_docs:
|
| 209 |
structured_answer = self.llm_generator.generate_structured_legal_answer(
|
| 210 |
query,
|
|
@@ -216,7 +248,7 @@ class SlowPathHandler:
|
|
| 216 |
|
| 217 |
# For other intents or if structured failed, use regular LLM generation
|
| 218 |
if not message:
|
| 219 |
-
documents = [r["data"] for r in search_result["results"][:
|
| 220 |
message = self.llm_generator.generate_answer(
|
| 221 |
query,
|
| 222 |
context=context,
|
|
@@ -272,8 +304,163 @@ class SlowPathHandler:
|
|
| 272 |
}
|
| 273 |
|
| 274 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
-
def _search_by_intent(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
"""Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
|
| 278 |
# Use original query for better matching
|
| 279 |
keywords = query.strip()
|
|
@@ -335,30 +522,31 @@ class SlowPathHandler:
|
|
| 335 |
qs = LegalSection.objects.all()
|
| 336 |
text_fields = ["section_title", "section_code", "content"]
|
| 337 |
detected_code = self._detect_document_code(query)
|
|
|
|
| 338 |
filtered = False
|
| 339 |
-
if
|
| 340 |
-
filtered_qs = qs.filter(document__code__iexact=
|
| 341 |
if filtered_qs.exists():
|
| 342 |
qs = filtered_qs
|
| 343 |
filtered = True
|
| 344 |
logger.info(
|
| 345 |
"[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
|
| 346 |
-
|
| 347 |
query,
|
| 348 |
)
|
| 349 |
else:
|
| 350 |
logger.info(
|
| 351 |
"[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
|
| 352 |
-
|
| 353 |
)
|
| 354 |
else:
|
| 355 |
logger.debug("[SEARCH] No document code detected for query: %s", query)
|
| 356 |
-
# Retrieve top-
|
| 357 |
search_results = search_with_ml(
|
| 358 |
qs,
|
| 359 |
keywords,
|
| 360 |
text_fields,
|
| 361 |
-
top_k=limit, # limit=
|
| 362 |
min_score=0.02, # Lower threshold for legal
|
| 363 |
)
|
| 364 |
results = self._format_legal_results(search_results, detected_code, query=query)
|
|
@@ -375,7 +563,8 @@ class SlowPathHandler:
|
|
| 375 |
"query": query,
|
| 376 |
"keywords": keywords,
|
| 377 |
"results": results,
|
| 378 |
-
"count": len(results)
|
|
|
|
| 379 |
}
|
| 380 |
|
| 381 |
def _should_save_to_golden(self, query: str, response: Dict) -> bool:
|
|
|
|
| 1 |
"""
|
| 2 |
Slow Path Handler - Full RAG pipeline for complex queries.
|
| 3 |
"""
|
| 4 |
+
import os
|
| 5 |
import time
|
| 6 |
import logging
|
| 7 |
from typing import Dict, Any, Optional, List
|
|
|
|
| 34 |
self.chatbot = get_chatbot()
|
| 35 |
self.llm_generator = get_llm_generator()
|
| 36 |
|
| 37 |
+
def handle(
|
| 38 |
+
self,
|
| 39 |
+
query: str,
|
| 40 |
+
intent: str,
|
| 41 |
+
session_id: Optional[str] = None,
|
| 42 |
+
selected_document_code: Optional[str] = None,
|
| 43 |
+
) -> Dict[str, Any]:
|
| 44 |
"""
|
| 45 |
Full RAG pipeline:
|
| 46 |
1. Search (hybrid: BM25 + vector)
|
|
|
|
| 58 |
Response dict with message, intent, results, etc.
|
| 59 |
"""
|
| 60 |
query = query.strip()
|
| 61 |
+
selected_document_code_normalized = (
|
| 62 |
+
selected_document_code.strip().upper() if selected_document_code else None
|
| 63 |
+
)
|
| 64 |
|
| 65 |
# Handle greetings
|
| 66 |
if intent == "greeting":
|
|
|
|
| 80 |
"_source": "slow_path"
|
| 81 |
}
|
| 82 |
|
| 83 |
+
# Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
|
| 84 |
+
search_result = self._search_by_intent(
|
| 85 |
+
intent,
|
| 86 |
+
query,
|
| 87 |
+
limit=15,
|
| 88 |
+
preferred_document_code=selected_document_code_normalized,
|
| 89 |
+
) # Balance: 15 for good recall, not too slow
|
| 90 |
+
|
| 91 |
+
if intent == "search_legal":
|
| 92 |
+
clarification = self._maybe_request_clarification(
|
| 93 |
+
query=query,
|
| 94 |
+
search_result=search_result,
|
| 95 |
+
selected_document_code=selected_document_code_normalized,
|
| 96 |
+
)
|
| 97 |
+
if clarification:
|
| 98 |
+
clarification.setdefault("intent", intent)
|
| 99 |
+
clarification.setdefault("_source", "clarification")
|
| 100 |
+
clarification.setdefault("routing", "clarification")
|
| 101 |
+
clarification.setdefault("confidence", 0.3)
|
| 102 |
+
return clarification
|
| 103 |
|
| 104 |
# Fast path for high-confidence legal queries (skip for complex queries)
|
| 105 |
fast_path_response = None
|
|
|
|
| 110 |
fast_path_response["_source"] = "fast_path"
|
| 111 |
return fast_path_response
|
| 112 |
|
| 113 |
+
# Rerank results - DISABLED for speed (can enable via ENABLE_RERANKER env var)
|
| 114 |
+
# Reranker adds 1-3 seconds delay, skip for faster responses
|
| 115 |
+
enable_reranker = os.environ.get("ENABLE_RERANKER", "false").lower() == "true"
|
| 116 |
+
if intent == "search_legal" and enable_reranker:
|
| 117 |
try:
|
| 118 |
# Lazy import to avoid blocking startup (FlagEmbedding may download model)
|
| 119 |
from hue_portal.core.reranker import rerank_documents
|
| 120 |
|
| 121 |
legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
|
| 122 |
if len(legal_results) > 0:
|
| 123 |
+
# Rerank to top-4 (balance speed and context quality)
|
| 124 |
+
top_k = min(4, len(legal_results))
|
| 125 |
reranked = rerank_documents(query, legal_results, top_k=top_k)
|
| 126 |
# Update search_result with reranked results (keep non-legal results)
|
| 127 |
non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
|
|
|
|
| 135 |
)
|
| 136 |
except Exception as e:
|
| 137 |
logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
|
| 138 |
+
elif intent == "search_legal":
|
| 139 |
+
# Skip reranking for speed - just use top results by score
|
| 140 |
+
logger.debug("[RERANKER] Skipped reranking for speed (ENABLE_RERANKER=false)")
|
| 141 |
|
| 142 |
# BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
|
| 143 |
# Chỉ áp dụng cho legal queries có results với score cao
|
|
|
|
| 234 |
# Generate response message using LLM if available and we have documents
|
| 235 |
message = None
|
| 236 |
if self.llm_generator and search_result["count"] > 0:
|
| 237 |
+
# For legal queries, use structured output (top-4 for good context and speed)
|
| 238 |
if intent == "search_legal" and search_result["results"]:
|
| 239 |
+
legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:4] # Top-4 for balance
|
| 240 |
if legal_docs:
|
| 241 |
structured_answer = self.llm_generator.generate_structured_legal_answer(
|
| 242 |
query,
|
|
|
|
| 248 |
|
| 249 |
# For other intents or if structured failed, use regular LLM generation
|
| 250 |
if not message:
|
| 251 |
+
documents = [r["data"] for r in search_result["results"][:4]] # Top-4 for balance
|
| 252 |
message = self.llm_generator.generate_answer(
|
| 253 |
query,
|
| 254 |
context=context,
|
|
|
|
| 304 |
}
|
| 305 |
|
| 306 |
return response
|
| 307 |
+
|
| 308 |
+
def _maybe_request_clarification(
|
| 309 |
+
self,
|
| 310 |
+
query: str,
|
| 311 |
+
search_result: Dict[str, Any],
|
| 312 |
+
selected_document_code: Optional[str] = None,
|
| 313 |
+
) -> Optional[Dict[str, Any]]:
|
| 314 |
+
"""If multiple legal documents compete and no doc code specified, ask user to clarify."""
|
| 315 |
+
if selected_document_code:
|
| 316 |
+
return None
|
| 317 |
+
if not search_result or search_result.get("count", 0) == 0:
|
| 318 |
+
return None
|
| 319 |
+
detected_code = self._detect_document_code(query)
|
| 320 |
+
if detected_code:
|
| 321 |
+
return None
|
| 322 |
+
legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
|
| 323 |
+
if len(legal_results) < 2:
|
| 324 |
+
return None
|
| 325 |
+
candidates = self._collect_document_candidates(legal_results, limit=4)
|
| 326 |
+
if len(candidates) < 2:
|
| 327 |
+
return None
|
| 328 |
+
payload = self._build_clarification_payload(query, candidates)
|
| 329 |
+
if payload:
|
| 330 |
+
logger.info(
|
| 331 |
+
"[CLARIFICATION] Requesting user choice among documents: %s",
|
| 332 |
+
[c["code"] for c in candidates],
|
| 333 |
+
)
|
| 334 |
+
return payload
|
| 335 |
+
|
| 336 |
+
def _collect_document_candidates(
|
| 337 |
+
self,
|
| 338 |
+
legal_results: List[Dict[str, Any]],
|
| 339 |
+
limit: int = 4,
|
| 340 |
+
) -> List[Dict[str, Any]]:
|
| 341 |
+
"""Collect unique document candidates from legal results."""
|
| 342 |
+
ordered_codes: List[str] = []
|
| 343 |
+
seen: set[str] = set()
|
| 344 |
+
for result in legal_results:
|
| 345 |
+
data = result.get("data", {})
|
| 346 |
+
code = (data.get("document_code") or "").strip()
|
| 347 |
+
if not code:
|
| 348 |
+
continue
|
| 349 |
+
upper = code.upper()
|
| 350 |
+
if upper in seen:
|
| 351 |
+
continue
|
| 352 |
+
ordered_codes.append(code)
|
| 353 |
+
seen.add(upper)
|
| 354 |
+
if len(ordered_codes) >= limit:
|
| 355 |
+
break
|
| 356 |
+
if len(ordered_codes) < 2:
|
| 357 |
+
return []
|
| 358 |
+
try:
|
| 359 |
+
documents = {
|
| 360 |
+
doc.code.upper(): doc
|
| 361 |
+
for doc in LegalDocument.objects.filter(code__in=ordered_codes)
|
| 362 |
+
}
|
| 363 |
+
except Exception as exc:
|
| 364 |
+
logger.warning("[CLARIFICATION] Unable to load documents for candidates: %s", exc)
|
| 365 |
+
documents = {}
|
| 366 |
+
candidates: List[Dict[str, Any]] = []
|
| 367 |
+
for code in ordered_codes:
|
| 368 |
+
upper = code.upper()
|
| 369 |
+
doc_obj = documents.get(upper)
|
| 370 |
+
section = next(
|
| 371 |
+
(
|
| 372 |
+
res
|
| 373 |
+
for res in legal_results
|
| 374 |
+
if (res.get("data", {}).get("document_code") or "").strip().upper() == upper
|
| 375 |
+
),
|
| 376 |
+
None,
|
| 377 |
+
)
|
| 378 |
+
data = section.get("data", {}) if section else {}
|
| 379 |
+
summary = ""
|
| 380 |
+
if doc_obj:
|
| 381 |
+
summary = doc_obj.summary or ""
|
| 382 |
+
if not summary and isinstance(doc_obj.metadata, dict):
|
| 383 |
+
summary = doc_obj.metadata.get("summary", "")
|
| 384 |
+
if not summary:
|
| 385 |
+
summary = data.get("excerpt") or data.get("content", "")[:200]
|
| 386 |
+
candidates.append(
|
| 387 |
+
{
|
| 388 |
+
"code": code,
|
| 389 |
+
"title": data.get("document_title") or (doc_obj.title if doc_obj else code),
|
| 390 |
+
"summary": summary,
|
| 391 |
+
"doc_type": doc_obj.doc_type if doc_obj else "",
|
| 392 |
+
"section_title": data.get("section_title") or "",
|
| 393 |
+
}
|
| 394 |
+
)
|
| 395 |
+
return candidates
|
| 396 |
+
|
| 397 |
+
def _build_clarification_payload(
|
| 398 |
+
self,
|
| 399 |
+
query: str,
|
| 400 |
+
candidates: List[Dict[str, Any]],
|
| 401 |
+
) -> Optional[Dict[str, Any]]:
|
| 402 |
+
if not candidates:
|
| 403 |
+
return None
|
| 404 |
+
default_message = (
|
| 405 |
+
"Tôi tìm thấy một số văn bản có thể phù hợp. "
|
| 406 |
+
"Bạn vui lòng chọn văn bản muốn tra cứu để tôi trả lời chính xác hơn."
|
| 407 |
+
)
|
| 408 |
+
llm_payload = self._call_clarification_llm(query, candidates)
|
| 409 |
+
if llm_payload:
|
| 410 |
+
message = llm_payload.get("message") or default_message
|
| 411 |
+
options = llm_payload.get("options") or []
|
| 412 |
+
else:
|
| 413 |
+
message = default_message
|
| 414 |
+
options = [
|
| 415 |
+
{
|
| 416 |
+
"code": candidate["code"].upper(),
|
| 417 |
+
"title": candidate["title"],
|
| 418 |
+
"reason": candidate.get("summary") or candidate.get("section_title") or "",
|
| 419 |
+
}
|
| 420 |
+
for candidate in candidates[:3]
|
| 421 |
+
]
|
| 422 |
+
if not any(opt.get("code") == "__other__" for opt in options):
|
| 423 |
+
options.append(
|
| 424 |
+
{
|
| 425 |
+
"code": "__other__",
|
| 426 |
+
"title": "Khác",
|
| 427 |
+
"reason": "Tôi muốn hỏi văn bản hoặc chủ đề khác",
|
| 428 |
+
}
|
| 429 |
+
)
|
| 430 |
+
return {
|
| 431 |
+
"message": message,
|
| 432 |
+
"clarification": {
|
| 433 |
+
"message": message,
|
| 434 |
+
"options": options,
|
| 435 |
+
},
|
| 436 |
+
"results": [],
|
| 437 |
+
"count": 0,
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
def _call_clarification_llm(
|
| 441 |
+
self,
|
| 442 |
+
query: str,
|
| 443 |
+
candidates: List[Dict[str, Any]],
|
| 444 |
+
) -> Optional[Dict[str, Any]]:
|
| 445 |
+
if not self.llm_generator:
|
| 446 |
+
return None
|
| 447 |
+
try:
|
| 448 |
+
return self.llm_generator.suggest_clarification_topics(
|
| 449 |
+
query,
|
| 450 |
+
candidates,
|
| 451 |
+
max_options=3,
|
| 452 |
+
)
|
| 453 |
+
except Exception as exc:
|
| 454 |
+
logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
|
| 455 |
+
return None
|
| 456 |
|
| 457 |
+
def _search_by_intent(
|
| 458 |
+
self,
|
| 459 |
+
intent: str,
|
| 460 |
+
query: str,
|
| 461 |
+
limit: int = 5,
|
| 462 |
+
preferred_document_code: Optional[str] = None,
|
| 463 |
+
) -> Dict[str, Any]:
|
| 464 |
"""Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
|
| 465 |
# Use original query for better matching
|
| 466 |
keywords = query.strip()
|
|
|
|
| 522 |
qs = LegalSection.objects.all()
|
| 523 |
text_fields = ["section_title", "section_code", "content"]
|
| 524 |
detected_code = self._detect_document_code(query)
|
| 525 |
+
effective_code = preferred_document_code or detected_code
|
| 526 |
filtered = False
|
| 527 |
+
if effective_code:
|
| 528 |
+
filtered_qs = qs.filter(document__code__iexact=effective_code)
|
| 529 |
if filtered_qs.exists():
|
| 530 |
qs = filtered_qs
|
| 531 |
filtered = True
|
| 532 |
logger.info(
|
| 533 |
"[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
|
| 534 |
+
effective_code,
|
| 535 |
query,
|
| 536 |
)
|
| 537 |
else:
|
| 538 |
logger.info(
|
| 539 |
"[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
|
| 540 |
+
effective_code,
|
| 541 |
)
|
| 542 |
else:
|
| 543 |
logger.debug("[SEARCH] No document code detected for query: %s", query)
|
| 544 |
+
# Retrieve top-15 for reranking (will be reduced to top-4 after rerank)
|
| 545 |
search_results = search_with_ml(
|
| 546 |
qs,
|
| 547 |
keywords,
|
| 548 |
text_fields,
|
| 549 |
+
top_k=limit, # limit=15 for reranking, will be reduced to 4
|
| 550 |
min_score=0.02, # Lower threshold for legal
|
| 551 |
)
|
| 552 |
results = self._format_legal_results(search_results, detected_code, query=query)
|
|
|
|
| 563 |
"query": query,
|
| 564 |
"keywords": keywords,
|
| 565 |
"results": results,
|
| 566 |
+
"count": len(results),
|
| 567 |
+
"detected_code": detected_code,
|
| 568 |
}
|
| 569 |
|
| 570 |
def _should_save_to_golden(self, query: str, response: Dict) -> bool:
|