Spaces:

davidtran999
/

hue-portal-backendDocker

Running

App Files Files Community

davidtran999 commited on 16 days ago

Commit

af1bc2d

verified ·

1 Parent(s): 5181f18

Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub

Browse files

Files changed (1) hide show

backend/hue_portal/chatbot/slow_path_handler.py +208 -19

backend/hue_portal/chatbot/slow_path_handler.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 Slow Path Handler - Full RAG pipeline for complex queries.
 """
 import time
 import logging
 from typing import Dict, Any, Optional, List
@@ -33,7 +34,13 @@ class SlowPathHandler:
         self.chatbot = get_chatbot()
         self.llm_generator = get_llm_generator()
-    def handle(self, query: str, intent: str, session_id: Optional[str] = None) -> Dict[str, Any]:
         """
         Full RAG pipeline:
         1. Search (hybrid: BM25 + vector)
@@ -51,6 +58,9 @@ class SlowPathHandler:
             Response dict with message, intent, results, etc.
         """
         query = query.strip()
         # Handle greetings
         if intent == "greeting":
@@ -70,8 +80,26 @@ class SlowPathHandler:
                     "_source": "slow_path"
                 }
-        # Search based on intent - retrieve top-8 for reranking
-        search_result = self._search_by_intent(intent, query, limit=8)  # Increased to 8 for reranker
         # Fast path for high-confidence legal queries (skip for complex queries)
         fast_path_response = None
@@ -82,17 +110,18 @@ class SlowPathHandler:
                 fast_path_response["_source"] = "fast_path"
                 return fast_path_response
-        # Rerank results from top-8 to top-3 for legal queries (reduces prompt size by ~40%)
-        # Always rerank if we have legal results (even if <= 3, reranker improves relevance)
-        if intent == "search_legal":
             try:
                 # Lazy import to avoid blocking startup (FlagEmbedding may download model)
                 from hue_portal.core.reranker import rerank_documents
                 legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
                 if len(legal_results) > 0:
-                    # Rerank to top-3 (or all if we have fewer)
-                    top_k = min(3, len(legal_results))
                     reranked = rerank_documents(query, legal_results, top_k=top_k)
                     # Update search_result with reranked results (keep non-legal results)
                     non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
@@ -106,6 +135,9 @@ class SlowPathHandler:
                     )
             except Exception as e:
                 logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
         # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
         # Chỉ áp dụng cho legal queries có results với score cao
@@ -202,9 +234,9 @@ class SlowPathHandler:
         # Generate response message using LLM if available and we have documents
         message = None
         if self.llm_generator and search_result["count"] > 0:
-            # For legal queries, use structured output (now with top-3 reranked results)
             if intent == "search_legal" and search_result["results"]:
-                legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:3]  # Top-3 after reranking
                 if legal_docs:
                     structured_answer = self.llm_generator.generate_structured_legal_answer(
                         query,
@@ -216,7 +248,7 @@ class SlowPathHandler:
             # For other intents or if structured failed, use regular LLM generation
             if not message:
-                documents = [r["data"] for r in search_result["results"][:3]]  # Top-3 after reranking
                 message = self.llm_generator.generate_answer(
                     query,
                     context=context,
@@ -272,8 +304,163 @@ class SlowPathHandler:
         }
         return response
-    def _search_by_intent(self, intent: str, query: str, limit: int = 5) -> Dict[str, Any]:
         """Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
         # Use original query for better matching
         keywords = query.strip()
@@ -335,30 +522,31 @@ class SlowPathHandler:
             qs = LegalSection.objects.all()
             text_fields = ["section_title", "section_code", "content"]
             detected_code = self._detect_document_code(query)
             filtered = False
-            if detected_code:
-                filtered_qs = qs.filter(document__code__iexact=detected_code)
                 if filtered_qs.exists():
                     qs = filtered_qs
                     filtered = True
                     logger.info(
                         "[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
-                        detected_code,
                         query,
                     )
                 else:
                     logger.info(
                         "[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
-                        detected_code,
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
-            # Retrieve top-8 for reranking (will be reduced to top-3 after rerank)
             search_results = search_with_ml(
                 qs,
                 keywords,
                 text_fields,
-                top_k=limit,  # limit=8 for reranking, will be reduced to 3
                 min_score=0.02,  # Lower threshold for legal
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
@@ -375,7 +563,8 @@ class SlowPathHandler:
             "query": query,
             "keywords": keywords,
             "results": results,
-            "count": len(results)
         }
     def _should_save_to_golden(self, query: str, response: Dict) -> bool:

 """
 Slow Path Handler - Full RAG pipeline for complex queries.
 """
+import os
 import time
 import logging
 from typing import Dict, Any, Optional, List
         self.chatbot = get_chatbot()
         self.llm_generator = get_llm_generator()
+    def handle(
+        self,
+        query: str,
+        intent: str,
+        session_id: Optional[str] = None,
+        selected_document_code: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """
         Full RAG pipeline:
         1. Search (hybrid: BM25 + vector)
             Response dict with message, intent, results, etc.
         """
         query = query.strip()
+        selected_document_code_normalized = (
+            selected_document_code.strip().upper() if selected_document_code else None
+        )
         # Handle greetings
         if intent == "greeting":
                     "_source": "slow_path"
                 }
+        # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
+        search_result = self._search_by_intent(
+            intent,
+            query,
+            limit=15,
+            preferred_document_code=selected_document_code_normalized,
+        )  # Balance: 15 for good recall, not too slow
+        if intent == "search_legal":
+            clarification = self._maybe_request_clarification(
+                query=query,
+                search_result=search_result,
+                selected_document_code=selected_document_code_normalized,
+            )
+            if clarification:
+                clarification.setdefault("intent", intent)
+                clarification.setdefault("_source", "clarification")
+                clarification.setdefault("routing", "clarification")
+                clarification.setdefault("confidence", 0.3)
+                return clarification
         # Fast path for high-confidence legal queries (skip for complex queries)
         fast_path_response = None
                 fast_path_response["_source"] = "fast_path"
                 return fast_path_response
+        # Rerank results - DISABLED for speed (can enable via ENABLE_RERANKER env var)
+        # Reranker adds 1-3 seconds delay, skip for faster responses
+        enable_reranker = os.environ.get("ENABLE_RERANKER", "false").lower() == "true"
+        if intent == "search_legal" and enable_reranker:
             try:
                 # Lazy import to avoid blocking startup (FlagEmbedding may download model)
                 from hue_portal.core.reranker import rerank_documents
                 legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
                 if len(legal_results) > 0:
+                    # Rerank to top-4 (balance speed and context quality)
+                    top_k = min(4, len(legal_results))
                     reranked = rerank_documents(query, legal_results, top_k=top_k)
                     # Update search_result with reranked results (keep non-legal results)
                     non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
                     )
             except Exception as e:
                 logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
+        elif intent == "search_legal":
+            # Skip reranking for speed - just use top results by score
+            logger.debug("[RERANKER] Skipped reranking for speed (ENABLE_RERANKER=false)")
         # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
         # Chỉ áp dụng cho legal queries có results với score cao
         # Generate response message using LLM if available and we have documents
         message = None
         if self.llm_generator and search_result["count"] > 0:
+            # For legal queries, use structured output (top-4 for good context and speed)
             if intent == "search_legal" and search_result["results"]:
+                legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:4]  # Top-4 for balance
                 if legal_docs:
                     structured_answer = self.llm_generator.generate_structured_legal_answer(
                         query,
             # For other intents or if structured failed, use regular LLM generation
             if not message:
+                documents = [r["data"] for r in search_result["results"][:4]]  # Top-4 for balance
                 message = self.llm_generator.generate_answer(
                     query,
                     context=context,
         }
         return response
+    def _maybe_request_clarification(
+        self,
+        query: str,
+        search_result: Dict[str, Any],
+        selected_document_code: Optional[str] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """If multiple legal documents compete and no doc code specified, ask user to clarify."""
+        if selected_document_code:
+            return None
+        if not search_result or search_result.get("count", 0) == 0:
+            return None
+        detected_code = self._detect_document_code(query)
+        if detected_code:
+            return None
+        legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
+        if len(legal_results) < 2:
+            return None
+        candidates = self._collect_document_candidates(legal_results, limit=4)
+        if len(candidates) < 2:
+            return None
+        payload = self._build_clarification_payload(query, candidates)
+        if payload:
+            logger.info(
+                "[CLARIFICATION] Requesting user choice among documents: %s",
+                [c["code"] for c in candidates],
+            )
+        return payload
+    def _collect_document_candidates(
+        self,
+        legal_results: List[Dict[str, Any]],
+        limit: int = 4,
+    ) -> List[Dict[str, Any]]:
+        """Collect unique document candidates from legal results."""
+        ordered_codes: List[str] = []
+        seen: set[str] = set()
+        for result in legal_results:
+            data = result.get("data", {})
+            code = (data.get("document_code") or "").strip()
+            if not code:
+                continue
+            upper = code.upper()
+            if upper in seen:
+                continue
+            ordered_codes.append(code)
+            seen.add(upper)
+            if len(ordered_codes) >= limit:
+                break
+        if len(ordered_codes) < 2:
+            return []
+        try:
+            documents = {
+                doc.code.upper(): doc
+                for doc in LegalDocument.objects.filter(code__in=ordered_codes)
+            }
+        except Exception as exc:
+            logger.warning("[CLARIFICATION] Unable to load documents for candidates: %s", exc)
+            documents = {}
+        candidates: List[Dict[str, Any]] = []
+        for code in ordered_codes:
+            upper = code.upper()
+            doc_obj = documents.get(upper)
+            section = next(
+                (
+                    res
+                    for res in legal_results
+                    if (res.get("data", {}).get("document_code") or "").strip().upper() == upper
+                ),
+                None,
+            )
+            data = section.get("data", {}) if section else {}
+            summary = ""
+            if doc_obj:
+                summary = doc_obj.summary or ""
+                if not summary and isinstance(doc_obj.metadata, dict):
+                    summary = doc_obj.metadata.get("summary", "")
+            if not summary:
+                summary = data.get("excerpt") or data.get("content", "")[:200]
+            candidates.append(
+                {
+                    "code": code,
+                    "title": data.get("document_title") or (doc_obj.title if doc_obj else code),
+                    "summary": summary,
+                    "doc_type": doc_obj.doc_type if doc_obj else "",
+                    "section_title": data.get("section_title") or "",
+                }
+            )
+        return candidates
+    def _build_clarification_payload(
+        self,
+        query: str,
+        candidates: List[Dict[str, Any]],
+    ) -> Optional[Dict[str, Any]]:
+        if not candidates:
+            return None
+        default_message = (
+            "Tôi tìm thấy một số văn bản có thể phù hợp. "
+            "Bạn vui lòng chọn văn bản muốn tra cứu để tôi trả lời chính xác hơn."
+        )
+        llm_payload = self._call_clarification_llm(query, candidates)
+        if llm_payload:
+            message = llm_payload.get("message") or default_message
+            options = llm_payload.get("options") or []
+        else:
+            message = default_message
+            options = [
+                {
+                    "code": candidate["code"].upper(),
+                    "title": candidate["title"],
+                    "reason": candidate.get("summary") or candidate.get("section_title") or "",
+                }
+                for candidate in candidates[:3]
+            ]
+        if not any(opt.get("code") == "__other__" for opt in options):
+            options.append(
+                {
+                    "code": "__other__",
+                    "title": "Khác",
+                    "reason": "Tôi muốn hỏi văn bản hoặc chủ đề khác",
+                }
+            )
+        return {
+            "message": message,
+            "clarification": {
+                "message": message,
+                "options": options,
+            },
+            "results": [],
+            "count": 0,
+        }
+    def _call_clarification_llm(
+        self,
+        query: str,
+        candidates: List[Dict[str, Any]],
+    ) -> Optional[Dict[str, Any]]:
+        if not self.llm_generator:
+            return None
+        try:
+            return self.llm_generator.suggest_clarification_topics(
+                query,
+                candidates,
+                max_options=3,
+            )
+        except Exception as exc:
+            logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
+            return None
+    def _search_by_intent(
+        self,
+        intent: str,
+        query: str,
+        limit: int = 5,
+        preferred_document_code: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
         # Use original query for better matching
         keywords = query.strip()
             qs = LegalSection.objects.all()
             text_fields = ["section_title", "section_code", "content"]
             detected_code = self._detect_document_code(query)
+            effective_code = preferred_document_code or detected_code
             filtered = False
+            if effective_code:
+                filtered_qs = qs.filter(document__code__iexact=effective_code)
                 if filtered_qs.exists():
                     qs = filtered_qs
                     filtered = True
                     logger.info(
                         "[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
+                        effective_code,
                         query,
                     )
                 else:
                     logger.info(
                         "[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
+                        effective_code,
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
+            # Retrieve top-15 for reranking (will be reduced to top-4 after rerank)
             search_results = search_with_ml(
                 qs,
                 keywords,
                 text_fields,
+                top_k=limit,  # limit=15 for reranking, will be reduced to 4
                 min_score=0.02,  # Lower threshold for legal
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
             "query": query,
             "keywords": keywords,
             "results": results,
+            "count": len(results),
+            "detected_code": detected_code,
         }
     def _should_save_to_golden(self, query: str, response: Dict) -> bool: