Spaces:

davidtran999
/

hue-portal-backendDocker

Running

App Files Files Community

davidtran999 commited on 17 days ago

Commit

d2c3c89

verified ·

1 Parent(s): 6260a86

Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub

Browse files

Files changed (1) hide show

backend/hue_portal/chatbot/slow_path_handler.py +434 -64

backend/hue_portal/chatbot/slow_path_handler.py CHANGED Viewed

@@ -4,9 +4,12 @@ Slow Path Handler - Full RAG pipeline for complex queries.
 import os
 import time
 import logging
-from typing import Dict, Any, Optional, List
 import unicodedata
 import re
 from hue_portal.core.chatbot import get_chatbot, RESPONSE_TEMPLATES
 from hue_portal.core.models import (
@@ -18,12 +21,15 @@ from hue_portal.core.models import (
     LegalDocument,
 )
 from hue_portal.core.search_ml import search_with_ml
 # Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
 # from hue_portal.core.reranker import rerank_documents
 from hue_portal.chatbot.llm_integration import get_llm_generator
 from hue_portal.chatbot.structured_legal import format_structured_legal_answer
 from hue_portal.chatbot.context_manager import ConversationContext
 from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
 logger = logging.getLogger(__name__)
@@ -34,6 +40,15 @@ class SlowPathHandler:
     def __init__(self):
         self.chatbot = get_chatbot()
         self.llm_generator = get_llm_generator()
     def handle(
         self,
@@ -54,6 +69,7 @@ class SlowPathHandler:
             query: User query.
             intent: Detected intent.
             session_id: Optional session ID for context.
         Returns:
             Response dict with message, intent, results, etc.
@@ -62,7 +78,7 @@ class SlowPathHandler:
         selected_document_code_normalized = (
             selected_document_code.strip().upper() if selected_document_code else None
         )
         # Handle greetings
         if intent == "greeting":
             query_lower = query.lower().strip()
@@ -80,7 +96,7 @@ class SlowPathHandler:
                     "count": 0,
                     "_source": "slow_path"
                 }
         # Wizard / option-first cho mọi câu hỏi pháp lý chung:
         # Nếu:
         #   - intent là search_legal
@@ -101,68 +117,236 @@ class SlowPathHandler:
             and not selected_document_code_normalized
             and not has_explicit_code
         ):
-            logger.info("[WIZARD] ✅ Wizard conditions met, returning options payload")
-            canonical_candidates: List[Dict[str, Any]] = []
             try:
-                canonical_docs = list(
-                    LegalDocument.objects.filter(
-                        code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
-                    )
                 )
-                for doc in canonical_docs:
-                    summary = getattr(doc, "summary", "") or ""
-                    metadata = getattr(doc, "metadata", {}) or {}
-                    if not summary and isinstance(metadata, dict):
-                        summary = metadata.get("summary", "")
-                    canonical_candidates.append(
                         {
-                            "code": doc.code,
-                            "title": getattr(doc, "title", "") or doc.code,
-                            "summary": summary,
-                            "doc_type": getattr(doc, "doc_type", "") or "",
-                            "section_title": "",
-                        }
                     )
             except Exception as exc:
-                logger.warning(
-                    "[CLARIFICATION] Canonical documents lookup failed, using static list: %s",
                     exc,
                 )
-            if not canonical_candidates:
-                canonical_candidates = [
-                    {
-                        "code": "264-QD-TW",
-                        "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "QD-69-TW",
-                        "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "TT-02-CAND",
-                        "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                ]
-            clarification_payload = self._build_clarification_payload(
-                query, canonical_candidates
-            )
-            if clarification_payload:
-                clarification_payload.setdefault("intent", intent)
-                clarification_payload.setdefault("_source", "clarification")
-                clarification_payload.setdefault("routing", "clarification")
-                clarification_payload.setdefault("confidence", 0.3)
-                return clarification_payload
         # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
         search_result = self._search_by_intent(
@@ -391,7 +575,7 @@ class SlowPathHandler:
         }
         return response
     def _maybe_request_clarification(
         self,
         query: str,
@@ -651,6 +835,193 @@ class SlowPathHandler:
             logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
             return None
     def _search_by_intent(
         self,
         intent: str,
@@ -738,13 +1109,12 @@ class SlowPathHandler:
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
-            # Retrieve top-15 for reranking (will be reduced to top-4 after rerank)
-            search_results = search_with_ml(
                 qs,
-                keywords,
-                text_fields,
                 top_k=limit,  # limit=15 for reranking, will be reduced to 4
-                min_score=0.02,  # Lower threshold for legal
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
             logger.info(

 import os
 import time
 import logging
+import hashlib
+from typing import Dict, Any, Optional, List, Set
 import unicodedata
 import re
+from concurrent.futures import ThreadPoolExecutor, Future
+import threading
 from hue_portal.core.chatbot import get_chatbot, RESPONSE_TEMPLATES
 from hue_portal.core.models import (
     LegalDocument,
 )
 from hue_portal.core.search_ml import search_with_ml
+from hue_portal.core.pure_semantic_search import pure_semantic_search
 # Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
 # from hue_portal.core.reranker import rerank_documents
 from hue_portal.chatbot.llm_integration import get_llm_generator
 from hue_portal.chatbot.structured_legal import format_structured_legal_answer
 from hue_portal.chatbot.context_manager import ConversationContext
 from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
+from hue_portal.core.query_rewriter import get_query_rewriter
+from hue_portal.core.pure_semantic_search import pure_semantic_search, parallel_vector_search
 logger = logging.getLogger(__name__)
     def __init__(self):
         self.chatbot = get_chatbot()
         self.llm_generator = get_llm_generator()
+        # Thread pool for parallel search (max 2 workers to avoid overwhelming DB)
+        self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="parallel_search")
+        # Cache for prefetched results by session_id (in-memory fallback)
+        self._prefetched_cache: Dict[str, Dict[str, Any]] = {}
+        self._cache_lock = threading.Lock()
+        # Redis cache for prefetch results
+        self.redis_cache = get_redis_cache()
+        # Prefetch cache TTL (30 minutes default)
+        self.prefetch_cache_ttl = int(os.environ.get("CACHE_PREFETCH_TTL", "1800"))
     def handle(
         self,
             query: User query.
             intent: Detected intent.
             session_id: Optional session ID for context.
+            selected_document_code: Selected document code from wizard.
         Returns:
             Response dict with message, intent, results, etc.
         selected_document_code_normalized = (
             selected_document_code.strip().upper() if selected_document_code else None
         )
         # Handle greetings
         if intent == "greeting":
             query_lower = query.lower().strip()
                     "count": 0,
                     "_source": "slow_path"
                 }
         # Wizard / option-first cho mọi câu hỏi pháp lý chung:
         # Nếu:
         #   - intent là search_legal
             and not selected_document_code_normalized
             and not has_explicit_code
         ):
+            logger.info("[QUERY_REWRITE] ✅ Wizard conditions met, using Query Rewrite Strategy")
+            # Query Rewrite Strategy: Rewrite query into 3-5 optimized legal queries
+            query_rewriter = get_query_rewriter(self.llm_generator)
+            # Get conversation context for query rewriting
+            context = None
+            if session_id:
+                try:
+                    recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
+                    context = [
+                        {"role": msg.role, "content": msg.content}
+                        for msg in recent_messages
+                    ]
+                except Exception as exc:
+                    logger.warning("[QUERY_REWRITE] Failed to load context: %s", exc)
+            # Rewrite query into 3-5 queries
+            rewritten_queries = query_rewriter.rewrite_query(
+                query,
+                context=context,
+                max_queries=5,
+                min_queries=3
+            )
+            if not rewritten_queries:
+                # Fallback to original query if rewrite fails
+                rewritten_queries = [query]
+            logger.info(
+                "[QUERY_REWRITE] Rewrote query into %d queries: %s",
+                len(rewritten_queries),
+                rewritten_queries[:3]
+            )
+            # Parallel vector search with multiple queries
             try:
+                from hue_portal.core.models import LegalSection
+                # Search all legal sections (no document filter yet)
+                qs = LegalSection.objects.all()
+                text_fields = ["section_title", "section_code", "content"]
+                # Use parallel vector search
+                search_results = parallel_vector_search(
+                    rewritten_queries,
+                    qs,
+                    top_k_per_query=5,
+                    final_top_k=7,
+                    text_fields=text_fields
                 )
+                # Extract unique document codes from results
+                doc_codes_seen: Set[str] = set()
+                document_options: List[Dict[str, Any]] = []
+                for section, score in search_results:
+                    doc = getattr(section, "document", None)
+                    if not doc:
+                        continue
+                    doc_code = getattr(doc, "code", "").upper()
+                    if not doc_code or doc_code in doc_codes_seen:
+                        continue
+                    doc_codes_seen.add(doc_code)
+                    # Get document metadata
+                    doc_title = getattr(doc, "title", "") or doc_code
+                    doc_summary = getattr(doc, "summary", "") or ""
+                    if not doc_summary:
+                        metadata = getattr(doc, "metadata", {}) or {}
+                        if isinstance(metadata, dict):
+                            doc_summary = metadata.get("summary", "")
+                    document_options.append({
+                        "code": doc_code,
+                        "title": doc_title,
+                        "summary": doc_summary,
+                        "score": float(score),
+                        "doc_type": getattr(doc, "doc_type", "") or "",
+                    })
+                    # Limit to top 5 documents
+                    if len(document_options) >= 5:
+                        break
+                # If no documents found, use canonical fallback
+                if not document_options:
+                    logger.warning("[QUERY_REWRITE] No documents found, using canonical fallback")
+                    canonical_candidates = [
                         {
+                            "code": "264-QD-TW",
+                            "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                        {
+                            "code": "QD-69-TW",
+                            "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                        {
+                            "code": "TT-02-CAND",
+                            "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                    ]
+                    clarification_payload = self._build_clarification_payload(
+                        query, canonical_candidates
                     )
+                    if clarification_payload:
+                        clarification_payload.setdefault("intent", intent)
+                        clarification_payload.setdefault("_source", "clarification")
+                        clarification_payload.setdefault("routing", "clarification")
+                        clarification_payload.setdefault("confidence", 0.3)
+                        return clarification_payload
+                # Build options from search results
+                options = [
+                    {
+                        "code": opt["code"],
+                        "title": opt["title"],
+                        "reason": opt.get("summary") or f"Độ liên quan: {opt['score']:.2f}",
+                    }
+                    for opt in document_options
+                ]
+                # Add "Khác" option
+                if not any(opt.get("code") == "__other__" for opt in options):
+                    options.append({
+                        "code": "__other__",
+                        "title": "Khác",
+                        "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
+                    })
+                message = (
+                    "Tôi đã tìm thấy các văn bản pháp luật liên quan đến câu hỏi của bạn.\n\n"
+                    "Bạn hãy chọn văn bản muốn tra cứu để tôi trả lời chi tiết hơn:"
+                )
+                logger.info(
+                    "[QUERY_REWRITE] ✅ Found %d documents using Query Rewrite Strategy",
+                    len(document_options)
+                )
+                return {
+                    "type": "options",
+                    "wizard_stage": "choose_document",
+                    "message": message,
+                    "options": options,
+                    "clarification": {
+                        "message": message,
+                        "options": options,
+                    },
+                    "results": [],
+                    "count": 0,
+                    "intent": intent,
+                    "_source": "query_rewrite",
+                    "routing": "query_rewrite",
+                    "confidence": 0.95,  # High confidence with Query Rewrite Strategy
+                }
             except Exception as exc:
+                logger.error(
+                    "[QUERY_REWRITE] Error in Query Rewrite Strategy: %s, falling back to LLM suggestions",
                     exc,
+                    exc_info=True
                 )
+                # Fallback to original LLM-based clarification
+                canonical_candidates: List[Dict[str, Any]] = []
+                try:
+                    canonical_docs = list(
+                        LegalDocument.objects.filter(
+                            code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
+                        )
+                    )
+                    for doc in canonical_docs:
+                        summary = getattr(doc, "summary", "") or ""
+                        metadata = getattr(doc, "metadata", {}) or {}
+                        if not summary and isinstance(metadata, dict):
+                            summary = metadata.get("summary", "")
+                        canonical_candidates.append(
+                            {
+                                "code": doc.code,
+                                "title": getattr(doc, "title", "") or doc.code,
+                                "summary": summary,
+                                "doc_type": getattr(doc, "doc_type", "") or "",
+                                "section_title": "",
+                            }
+                        )
+                except Exception as e:
+                    logger.warning("[CLARIFICATION] Canonical documents lookup failed: %s", e)
+                if not canonical_candidates:
+                    canonical_candidates = [
+                        {
+                            "code": "264-QD-TW",
+                            "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                        {
+                            "code": "QD-69-TW",
+                            "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                        {
+                            "code": "TT-02-CAND",
+                            "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                    ]
+                clarification_payload = self._build_clarification_payload(
+                    query, canonical_candidates
+                )
+                if clarification_payload:
+                    clarification_payload.setdefault("intent", intent)
+                    clarification_payload.setdefault("_source", "clarification_fallback")
+                    clarification_payload.setdefault("routing", "clarification")
+                    clarification_payload.setdefault("confidence", 0.3)
+                    return clarification_payload
         # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
         search_result = self._search_by_intent(
         }
         return response
     def _maybe_request_clarification(
         self,
         query: str,
             logger.warning("[CLARIFICATION] LLM suggestion failed: %s", exc)
             return None
+    def _parallel_search_prepare(
+        self,
+        document_code: str,
+        keywords: List[str],
+        session_id: Optional[str] = None,
+    ) -> None:
+        """
+        Trigger parallel search in background when user selects a document option.
+        Stores results in cache for Stage 2 (choose topic).
+        Args:
+            document_code: Selected document code
+            keywords: Keywords extracted from query/options
+            session_id: Session ID for caching results
+        """
+        if not session_id:
+            return
+        def _search_task():
+            try:
+                logger.info(
+                    "[PARALLEL_SEARCH] Starting background search for doc=%s, keywords=%s",
+                    document_code,
+                    keywords[:5],
+                )
+                # Check Redis cache first
+                cache_key = f"prefetch:{document_code.upper()}:{hashlib.sha256(' '.join(keywords).encode()).hexdigest()[:16]}"
+                cached_result = None
+                if self.redis_cache and self.redis_cache.is_available():
+                    cached_result = self.redis_cache.get(cache_key)
+                    if cached_result:
+                        logger.info(
+                            "[PARALLEL_SEARCH] ✅ Cache hit for doc=%s",
+                            document_code
+                        )
+                        # Store in in-memory cache too
+                        with self._cache_lock:
+                            if session_id not in self._prefetched_cache:
+                                self._prefetched_cache[session_id] = {}
+                            self._prefetched_cache[session_id]["document_results"] = cached_result
+                        return
+                # Search in the selected document
+                query_text = " ".join(keywords) if keywords else ""
+                search_result = self._search_by_intent(
+                    intent="search_legal",
+                    query=query_text,
+                    limit=20,  # Get more results for topic options
+                    preferred_document_code=document_code.upper(),
+                )
+                # Prepare cache data
+                cache_data = {
+                    "document_code": document_code,
+                    "results": search_result.get("results", []),
+                    "count": search_result.get("count", 0),
+                    "timestamp": time.time(),
+                }
+                # Store in Redis cache
+                if self.redis_cache and self.redis_cache.is_available():
+                    self.redis_cache.set(cache_key, cache_data, ttl_seconds=self.prefetch_cache_ttl)
+                    logger.debug(
+                        "[PARALLEL_SEARCH] Cached prefetch results (TTL: %ds)",
+                        self.prefetch_cache_ttl
+                    )
+                # Store in in-memory cache (fallback)
+                with self._cache_lock:
+                    if session_id not in self._prefetched_cache:
+                        self._prefetched_cache[session_id] = {}
+                    self._prefetched_cache[session_id]["document_results"] = cache_data
+                logger.info(
+                    "[PARALLEL_SEARCH] Completed background search for doc=%s, found %d results",
+                    document_code,
+                    search_result.get("count", 0),
+                )
+            except Exception as exc:
+                logger.warning("[PARALLEL_SEARCH] Background search failed: %s", exc)
+        # Submit to thread pool
+        self._executor.submit(_search_task)
+    def _parallel_search_topic(
+        self,
+        document_code: str,
+        topic_keywords: List[str],
+        session_id: Optional[str] = None,
+    ) -> None:
+        """
+        Trigger parallel search when user selects a topic option.
+        Stores results for final answer generation.
+        Args:
+            document_code: Selected document code
+            topic_keywords: Keywords from selected topic
+            session_id: Session ID for caching results
+        """
+        if not session_id:
+            return
+        def _search_task():
+            try:
+                logger.info(
+                    "[PARALLEL_SEARCH] Starting topic search for doc=%s, keywords=%s",
+                    document_code,
+                    topic_keywords[:5],
+                )
+                # Search with topic keywords
+                query_text = " ".join(topic_keywords) if topic_keywords else ""
+                search_result = self._search_by_intent(
+                    intent="search_legal",
+                    query=query_text,
+                    limit=10,
+                    preferred_document_code=document_code.upper(),
+                )
+                # Store in cache
+                with self._cache_lock:
+                    if session_id not in self._prefetched_cache:
+                        self._prefetched_cache[session_id] = {}
+                    self._prefetched_cache[session_id]["topic_results"] = {
+                        "document_code": document_code,
+                        "keywords": topic_keywords,
+                        "results": search_result.get("results", []),
+                        "count": search_result.get("count", 0),
+                        "timestamp": time.time(),
+                    }
+                logger.info(
+                    "[PARALLEL_SEARCH] Completed topic search, found %d results",
+                    search_result.get("count", 0),
+                )
+            except Exception as exc:
+                logger.warning("[PARALLEL_SEARCH] Topic search failed: %s", exc)
+        # Submit to thread pool
+        self._executor.submit(_search_task)
+    def _get_prefetched_results(
+        self,
+        session_id: Optional[str],
+        result_type: str = "document_results",
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get prefetched search results from cache.
+        Args:
+            session_id: Session ID
+            result_type: "document_results" or "topic_results"
+        Returns:
+            Cached results dict or None
+        """
+        if not session_id:
+            return None
+        with self._cache_lock:
+            cache_entry = self._prefetched_cache.get(session_id)
+            if not cache_entry:
+                return None
+            results = cache_entry.get(result_type)
+            if not results:
+                return None
+            # Check if results are still fresh (within 5 minutes)
+            timestamp = results.get("timestamp", 0)
+            if time.time() - timestamp > 300:  # 5 minutes
+                logger.debug("[PARALLEL_SEARCH] Prefetched results expired for session=%s", session_id)
+                return None
+            return results
+    def _clear_prefetched_cache(self, session_id: Optional[str]) -> None:
+        """Clear prefetched cache for a session."""
+        if not session_id:
+            return
+        with self._cache_lock:
+            if session_id in self._prefetched_cache:
+                del self._prefetched_cache[session_id]
+                logger.debug("[PARALLEL_SEARCH] Cleared cache for session=%s", session_id)
     def _search_by_intent(
         self,
         intent: str,
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
+            # Use pure semantic search (100% vector, no BM25)
+            search_results = pure_semantic_search(
+                [keywords],
                 qs,
                 top_k=limit,  # limit=15 for reranking, will be reduced to 4
+                text_fields=text_fields
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
             logger.info(