davidtran999 commited on
Commit
af1bc2d
·
verified ·
1 Parent(s): 5181f18

Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub

Browse files
backend/hue_portal/chatbot/slow_path_handler.py CHANGED
@@ -1,6 +1,7 @@
1
  """
2
  Slow Path Handler - Full RAG pipeline for complex queries.
3
  """
 
4
  import time
5
  import logging
6
  from typing import Dict, Any, Optional, List
@@ -33,7 +34,13 @@ class SlowPathHandler:
33
  self.chatbot = get_chatbot()
34
  self.llm_generator = get_llm_generator()
35
 
36
- def handle(self, query: str, intent: str, session_id: Optional[str] = None) -> Dict[str, Any]:
 
 
 
 
 
 
37
  """
38
  Full RAG pipeline:
39
  1. Search (hybrid: BM25 + vector)
@@ -51,6 +58,9 @@ class SlowPathHandler:
51
  Response dict with message, intent, results, etc.
52
  """
53
  query = query.strip()
 
 
 
54
 
55
  # Handle greetings
56
  if intent == "greeting":
@@ -70,8 +80,26 @@ class SlowPathHandler:
70
  "_source": "slow_path"
71
  }
72
 
73
- # Search based on intent - retrieve top-8 for reranking
74
- search_result = self._search_by_intent(intent, query, limit=8) # Increased to 8 for reranker
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  # Fast path for high-confidence legal queries (skip for complex queries)
77
  fast_path_response = None
@@ -82,17 +110,18 @@ class SlowPathHandler:
82
  fast_path_response["_source"] = "fast_path"
83
  return fast_path_response
84
 
85
- # Rerank results from top-8 to top-3 for legal queries (reduces prompt size by ~40%)
86
- # Always rerank if we have legal results (even if <= 3, reranker improves relevance)
87
- if intent == "search_legal":
 
88
  try:
89
  # Lazy import to avoid blocking startup (FlagEmbedding may download model)
90
  from hue_portal.core.reranker import rerank_documents
91
 
92
  legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
93
  if len(legal_results) > 0:
94
- # Rerank to top-3 (or all if we have fewer)
95
- top_k = min(3, len(legal_results))
96
  reranked = rerank_documents(query, legal_results, top_k=top_k)
97
  # Update search_result with reranked results (keep non-legal results)
98
  non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
@@ -106,6 +135,9 @@ class SlowPathHandler:
106
  )
107
  except Exception as e:
108
  logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
 
 
 
109
 
110
  # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
111
  # Chỉ áp dụng cho legal queries có results với score cao
@@ -202,9 +234,9 @@ class SlowPathHandler:
202
  # Generate response message using LLM if available and we have documents
203
  message = None
204
  if self.llm_generator and search_result["count"] > 0:
205
- # For legal queries, use structured output (now with top-3 reranked results)
206
  if intent == "search_legal" and search_result["results"]:
207
- legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:3] # Top-3 after reranking
208
  if legal_docs:
209
  structured_answer = self.llm_generator.generate_structured_legal_answer(
210
  query,
@@ -216,7 +248,7 @@ class SlowPathHandler:
216
 
217
  # For other intents or if structured failed, use regular LLM generation
218
  if not message:
219
- documents = [r["data"] for r in search_result["results"][:3]] # Top-3 after reranking
220
  message = self.llm_generator.generate_answer(
221
  query,
222
  context=context,
@@ -272,8 +304,163 @@ class SlowPathHandler:
272
  }
273
 
274
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
- def _search_by_intent(self, intent: str, query: str, limit: int = 5) -> Dict[str, Any]:
 
 
 
 
 
 
277
  """Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
278
  # Use original query for better matching
279
  keywords = query.strip()
@@ -335,30 +522,31 @@ class SlowPathHandler:
335
  qs = LegalSection.objects.all()
336
  text_fields = ["section_title", "section_code", "content"]
337
  detected_code = self._detect_document_code(query)
 
338
  filtered = False
339
- if detected_code:
340
- filtered_qs = qs.filter(document__code__iexact=detected_code)
341
  if filtered_qs.exists():
342
  qs = filtered_qs
343
  filtered = True
344
  logger.info(
345
  "[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
346
- detected_code,
347
  query,
348
  )
349
  else:
350
  logger.info(
351
  "[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
352
- detected_code,
353
  )
354
  else:
355
  logger.debug("[SEARCH] No document code detected for query: %s", query)
356
- # Retrieve top-8 for reranking (will be reduced to top-3 after rerank)
357
  search_results = search_with_ml(
358
  qs,
359
  keywords,
360
  text_fields,
361
- top_k=limit, # limit=8 for reranking, will be reduced to 3
362
  min_score=0.02, # Lower threshold for legal
363
  )
364
  results = self._format_legal_results(search_results, detected_code, query=query)
@@ -375,7 +563,8 @@ class SlowPathHandler:
375
  "query": query,
376
  "keywords": keywords,
377
  "results": results,
378
- "count": len(results)
 
379
  }
380
 
381
  def _should_save_to_golden(self, query: str, response: Dict) -> bool:
 
1
  """
2
  Slow Path Handler - Full RAG pipeline for complex queries.
3
  """
4
+ import os
5
  import time
6
  import logging
7
  from typing import Dict, Any, Optional, List
 
34
  self.chatbot = get_chatbot()
35
  self.llm_generator = get_llm_generator()
36
 
37
+ def handle(
38
+ self,
39
+ query: str,
40
+ intent: str,
41
+ session_id: Optional[str] = None,
42
+ selected_document_code: Optional[str] = None,
43
+ ) -> Dict[str, Any]:
44
  """
45
  Full RAG pipeline:
46
  1. Search (hybrid: BM25 + vector)
 
58
  Response dict with message, intent, results, etc.
59
  """
60
  query = query.strip()
61
+ selected_document_code_normalized = (
62
+ selected_document_code.strip().upper() if selected_document_code else None
63
+ )
64
 
65
  # Handle greetings
66
  if intent == "greeting":
 
80
  "_source": "slow_path"
81
  }
82
 
83
+ # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
84
+ search_result = self._search_by_intent(
85
+ intent,
86
+ query,
87
+ limit=15,
88
+ preferred_document_code=selected_document_code_normalized,
89
+ ) # Balance: 15 for good recall, not too slow
90
+
91
+ if intent == "search_legal":
92
+ clarification = self._maybe_request_clarification(
93
+ query=query,
94
+ search_result=search_result,
95
+ selected_document_code=selected_document_code_normalized,
96
+ )
97
+ if clarification:
98
+ clarification.setdefault("intent", intent)
99
+ clarification.setdefault("_source", "clarification")
100
+ clarification.setdefault("routing", "clarification")
101
+ clarification.setdefault("confidence", 0.3)
102
+ return clarification
103
 
104
  # Fast path for high-confidence legal queries (skip for complex queries)
105
  fast_path_response = None
 
110
  fast_path_response["_source"] = "fast_path"
111
  return fast_path_response
112
 
113
+ # Rerank results - DISABLED for speed (can enable via ENABLE_RERANKER env var)
114
+ # Reranker adds 1-3 seconds delay, skip for faster responses
115
+ enable_reranker = os.environ.get("ENABLE_RERANKER", "false").lower() == "true"
116
+ if intent == "search_legal" and enable_reranker:
117
  try:
118
  # Lazy import to avoid blocking startup (FlagEmbedding may download model)
119
  from hue_portal.core.reranker import rerank_documents
120
 
121
  legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
122
  if len(legal_results) > 0:
123
+ # Rerank to top-4 (balance speed and context quality)
124
+ top_k = min(4, len(legal_results))
125
  reranked = rerank_documents(query, legal_results, top_k=top_k)
126
  # Update search_result with reranked results (keep non-legal results)
127
  non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
 
135
  )
136
  except Exception as e:
137
  logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
138
+ elif intent == "search_legal":
139
+ # Skip reranking for speed - just use top results by score
140
+ logger.debug("[RERANKER] Skipped reranking for speed (ENABLE_RERANKER=false)")
141
 
142
  # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
143
  # Chỉ áp dụng cho legal queries có results với score cao
 
234
  # Generate response message using LLM if available and we have documents
235
  message = None
236
  if self.llm_generator and search_result["count"] > 0:
237
+ # For legal queries, use structured output (top-4 for good context and speed)
238
  if intent == "search_legal" and search_result["results"]:
239
+ legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:4] # Top-4 for balance
240
  if legal_docs:
241
  structured_answer = self.llm_generator.generate_structured_legal_answer(
242
  query,
 
248
 
249
  # For other intents or if structured failed, use regular LLM generation
250
  if not message:
251
+ documents = [r["data"] for r in search_result["results"][:4]] # Top-4 for balance
252
  message = self.llm_generator.generate_answer(
253
  query,
254
  context=context,
 
304
  }
305
 
306
  return response
307
+
308
+ def _maybe_request_clarification(
309
+ self,
310
+ query: str,
311
+ search_result: Dict[str, Any],
312
+ selected_document_code: Optional[str] = None,
313
+ ) -> Optional[Dict[str, Any]]:
314
+ """If multiple legal documents compete and no doc code specified, ask user to clarify."""
315
+ if selected_document_code:
316
+ return None
317
+ if not search_result or search_result.get("count", 0) == 0:
318
+ return None
319
+ detected_code = self._detect_document_code(query)
320
+ if detected_code:
321
+ return None
322
+ legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
323
+ if len(legal_results) < 2:
324
+ return None
325
+ candidates = self._collect_document_candidates(legal_results, limit=4)
326
+ if len(candidates) < 2:
327
+ return None
328
+ payload = self._build_clarification_payload(query, candidates)
329
+ if payload:
330
+ logger.info(
331
+ "[CLARIFICATION] Requesting user choice among documents: %s",
332
+ [c["code"] for c in candidates],
333
+ )
334
+ return payload
335
+
336
+ def _collect_document_candidates(
337
+ self,
338
+ legal_results: List[Dict[str, Any]],
339
+ limit: int = 4,
340
+ ) -> List[Dict[str, Any]]:
341
+ """Collect unique document candidates from legal results."""
342
+ ordered_codes: List[str] = []
343
+ seen: set[str] = set()
344
+ for result in legal_results:
345
+ data = result.get("data", {})
346
+ code = (data.get("document_code") or "").strip()
347
+ if not code:
348
+ continue
349
+ upper = code.upper()
350
+ if upper in seen:
351
+ continue
352
+ ordered_codes.append(code)
353
+ seen.add(upper)
354
+ if len(ordered_codes) >= limit:
355
+ break
356
+ if len(ordered_codes) < 2:
357
+ return []
358
+ try:
359
+ documents = {
360
+ doc.code.upper(): doc
361
+ for doc in LegalDocument.objects.filter(code__in=ordered_codes)
362
+ }
363
+ except Exception as exc:
364
+ logger.warning("[CLARIFICATION] Unable to load documents for candidates: %s", exc)
365
+ documents = {}
366
+ candidates: List[Dict[str, Any]] = []
367
+ for code in ordered_codes:
368
+ upper = code.upper()
369
+ doc_obj = documents.get(upper)
370
+ section = next(
371
+ (
372
+ res
373
+ for res in legal_results
374
+ if (res.get("data", {}).get("document_code") or "").strip().upper() == upper
375
+ ),
376
+ None,
377
+ )
378
+ data = section.get("data", {}) if section else {}
379
+ summary = ""
380
+ if doc_obj:
381
+ summary = doc_obj.summary or ""
382
+ if not summary and isinstance(doc_obj.metadata, dict):
383
+ summary = doc_obj.metadata.get("summary", "")
384
+ if not summary:
385
+ summary = data.get("excerpt") or data.get("content", "")[:200]
386
+ candidates.append(
387
+ {
388
+ "code": code,
389
+ "title": data.get("document_title") or (doc_obj.title if doc_obj else code),
390
+ "summary": summary,
391
+ "doc_type": doc_obj.doc_type if doc_obj else "",
392
+ "section_title": data.get("section_title") or "",
393
+ }
394
+ )
395
+ return candidates
396
+
397
+ def _build_clarification_payload(
398
+ self,
399
+ query: str,
400
+ candidates: List[Dict[str, Any]],
401
+ ) -> Optional[Dict[str, Any]]:
402
+ if not candidates:
403
+ return None
404
+ default_message = (
405
+ "Tôi tìm thấy một số văn bản có thể phù hợp. "
406
+ "Bạn vui lòng chọn văn bản muốn tra cứu để tôi trả lời chính xác hơn."
407
+ )
408
+ llm_payload = self._call_clarification_llm(query, candidates)
409
+ if llm_payload:
410
+ message = llm_payload.get("message") or default_message
411
+ options = llm_payload.get("options") or []
412
+ else:
413
+ message = default_message
414
+ options = [
415
+ {
416
+ "code": candidate["code"].upper(),
417
+ "title": candidate["title"],
418
+ "reason": candidate.get("summary") or candidate.get("section_title") or "",
419
+ }
420
+ for candidate in candidates[:3]
421
+ ]
422
+ if not any(opt.get("code") == "__other__" for opt in options):
423
+ options.append(
424
+ {
425
+ "code": "__other__",
426
+ "title": "Khác",
427
+ "reason": "Tôi muốn hỏi văn bản hoặc chủ đề khác",
428
+ }
429
+ )
430
+ return {
431
+ "message": message,
432
+ "clarification": {
433
+ "message": message,
434
+ "options": options,
435
+ },
436
+ "results": [],
437
+ "count": 0,
438
+ }
439
+
440
+ def _call_clarification_llm(
441
+ self,
442
+ query: str,
443
+ candidates: List[Dict[str, Any]],
444
+ ) -> Optional[Dict[str, Any]]:
445
+ if not self.llm_generator:
446
+ return None
447
+ try:
448
+ return self.llm_generator.suggest_clarification_topics(
449
+ query,
450
+ candidates,
451
+ max_options=3,
452
+ )
453
+ except Exception as exc:
454
+ logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
455
+ return None
456
 
457
+ def _search_by_intent(
458
+ self,
459
+ intent: str,
460
+ query: str,
461
+ limit: int = 5,
462
+ preferred_document_code: Optional[str] = None,
463
+ ) -> Dict[str, Any]:
464
  """Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
465
  # Use original query for better matching
466
  keywords = query.strip()
 
522
  qs = LegalSection.objects.all()
523
  text_fields = ["section_title", "section_code", "content"]
524
  detected_code = self._detect_document_code(query)
525
+ effective_code = preferred_document_code or detected_code
526
  filtered = False
527
+ if effective_code:
528
+ filtered_qs = qs.filter(document__code__iexact=effective_code)
529
  if filtered_qs.exists():
530
  qs = filtered_qs
531
  filtered = True
532
  logger.info(
533
  "[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
534
+ effective_code,
535
  query,
536
  )
537
  else:
538
  logger.info(
539
  "[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
540
+ effective_code,
541
  )
542
  else:
543
  logger.debug("[SEARCH] No document code detected for query: %s", query)
544
+ # Retrieve top-15 for reranking (will be reduced to top-4 after rerank)
545
  search_results = search_with_ml(
546
  qs,
547
  keywords,
548
  text_fields,
549
+ top_k=limit, # limit=15 for reranking, will be reduced to 4
550
  min_score=0.02, # Lower threshold for legal
551
  )
552
  results = self._format_legal_results(search_results, detected_code, query=query)
 
563
  "query": query,
564
  "keywords": keywords,
565
  "results": results,
566
+ "count": len(results),
567
+ "detected_code": detected_code,
568
  }
569
 
570
  def _should_save_to_golden(self, query: str, response: Dict) -> bool: