Spaces:

davidtran999
/

hue-portal-backend

Paused

App Files Files Community

Davidtran99 commited on 30 days ago

Commit

7e53bd3

1 Parent(s): 49a1a82

fix: add DISABLE_WIZARD_FLOW support - reset wizard state when disabled

Browse files

Files changed (2) hide show

backend/hue_portal/chatbot/chatbot.py +123 -132
backend/hue_portal/chatbot/slow_path_handler.py +276 -69

backend/hue_portal/chatbot/chatbot.py CHANGED Viewed

@@ -136,7 +136,7 @@ class Chatbot(CoreChatbot):
         # tránh trả lại các câu trả lời cũ không có options.
         cached_response = None
         if intent != "search_legal":
-        cached_response = EXACT_MATCH_CACHE.get(query, intent)
         if cached_response:
             cached_response["_cache"] = "exact_match"
             cached_response["_source"] = cached_response.get("_source", "cache")
@@ -167,6 +167,8 @@ class Chatbot(CoreChatbot):
         # Stage 2: Choose topic/section (if document selected but no topic)
         # Stage 3: Choose detail (if topic selected, ask for more details)
         # Final: Answer (when user says "Không" or after detail selection)
         has_doc_code_in_query = self._query_has_document_code(query)
         wizard_stage = session_metadata.get("wizard_stage") if session_metadata else None
@@ -175,151 +177,140 @@ class Chatbot(CoreChatbot):
         print(f"[WIZARD] Chatbot layer check - intent={intent}, wizard_stage={wizard_stage}, selected_doc_code={selected_doc_code}, selected_topic={selected_topic}, has_doc_code_in_query={has_doc_code_in_query}, query='{query[:50]}'")
-        # Stage 1: Choose document (if no document selected and no code in query)
-        if intent == "search_legal" and not selected_doc_code and not has_doc_code_in_query:
-            print("[WIZARD] ✅ Chatbot layer wizard triggered, using AI to generate options")
-            # Load canonical documents từ DB
-            canonical_candidates = []
-            try:
-                canonical_docs = list(
-                    LegalDocument.objects.filter(
-                        code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
-                    )
-                )
-                for doc in canonical_docs:
-                    summary = getattr(doc, "summary", "") or ""
-                    metadata = getattr(doc, "metadata", {}) or {}
-                    if not summary and isinstance(metadata, dict):
-                        summary = metadata.get("summary", "")
-                    canonical_candidates.append(
                         {
-                            "code": doc.code,
-                            "title": getattr(doc, "title", "") or doc.code,
-                            "summary": summary,
-                            "doc_type": getattr(doc, "doc_type", "") or "",
-                            "section_title": "",
                         }
                     )
-            except Exception as exc:
-                logger.warning("[WIZARD] Failed to load canonical documents: %s", exc)
-            # Fallback nếu không load được từ DB
-            if not canonical_candidates:
-                canonical_candidates = [
-                    {
-                        "code": "264-QD-TW",
-                        "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
-                        "summary": "Quy định chung về xử lý kỷ luật đối với đảng viên vi phạm.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "QD-69-TW",
-                        "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
-                        "summary": "Quy định chi tiết về các hành vi vi phạm và hình thức kỷ luật.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "TT-02-CAND",
-                        "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
-                        "summary": "Quy định về điều lệnh, lễ tiết, tác phong trong CAND.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                ]
-            # Dùng LLM để đề xuất options dựa trên câu hỏi
-            clarification_options = []
-            intro_message = (
-                "Tôi tìm thấy một số nhóm văn bản có thể liên quan đến câu hỏi của bạn.\n\n"
-                "Bạn hãy chọn văn bản muốn tra cứu trước, sau đó tôi sẽ trả lời chi tiết hơn:"
-            )
-            if self.llm_generator:
                 try:
-                    llm_payload = self.llm_generator.suggest_clarification_topics(
-                        query,
-                        canonical_candidates,
-                        max_options=3,
                     )
-                    if llm_payload:
-                        intro_message = llm_payload.get("message") or intro_message
-                        raw_options = llm_payload.get("options")
-                        if isinstance(raw_options, list) and len(raw_options) > 0:
-                            clarification_options = [
-                                {
-                                    "code": (opt.get("code") or candidate.get("code", "")).upper(),
-                                    "title": opt.get("title") or opt.get("document_title") or candidate.get("title", ""),
-                                    "reason": opt.get("reason")
-                                    or opt.get("summary")
-                                    or candidate.get("summary")
-                                    or candidate.get("section_title")
-                                    or "",
-                                }
-                                for opt, candidate in zip(
-                                    raw_options,
-                                    canonical_candidates[: len(raw_options)],
-                                )
-                                if (opt.get("code") or candidate.get("code"))
-                                and (opt.get("title") or opt.get("document_title") or candidate.get("title"))
-                            ]
-                            print(f"[WIZARD] ✅ LLM generated {len(clarification_options)} options")
-                except Exception as exc:
-                    logger.warning("[WIZARD] LLM suggestion failed: %s, using fallback", exc)
-            # Fallback nếu LLM không trả về options hợp lệ
-            if not clarification_options:
-                clarification_options = [
-                    {
-                        "code": candidate["code"].upper(),
-                        "title": candidate["title"],
-                        "reason": candidate.get("summary") or candidate.get("section_title") or "",
-                    }
-                    for candidate in canonical_candidates[:3]
-                ]
-                print("[WIZARD] Using fallback options (LLM unavailable or failed)")
-            # Thêm option "Khác" nếu chưa có
-            if not any(opt.get("code") == "__other__" for opt in clarification_options):
-                clarification_options.append(
-                    {
-                        "code": "__other__",
-                        "title": "Khác",
-                        "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
-                    }
-                )
-            response = {
-                "message": intro_message,
                 "intent": intent,
-                "confidence": confidence,
                 "results": [],
                 "count": 0,
-                "routing": "legal_wizard",
-                "type": "options",
-                "wizard_stage": "choose_document",
-                "clarification": {
-                    "message": intro_message,
-                    "options": clarification_options,
-                },
-                "options": clarification_options,
             }
-            if session_id:
-                response["session_id"] = session_id
-                try:
-                    ConversationContext.add_message(
-                        session_id=session_id,
-                        role="bot",
-                        content=intro_message,
-                        intent=intent,
-                    )
-                except Exception as e:
-                    print(f"⚠️ Failed to save wizard bot message: {e}")
-            return response
         # Stage 2: Choose topic/section (if document selected but no topic yet)
         # Skip if wizard_stage is already "answer" (user wants final answer)
-        if intent == "search_legal" and selected_doc_code and not selected_topic and not has_doc_code_in_query and wizard_stage != "answer":
             print("[WIZARD] ✅ Stage 2 triggered: Choose topic/section")
             # Get document title

         # tránh trả lại các câu trả lời cũ không có options.
         cached_response = None
         if intent != "search_legal":
+            cached_response = EXACT_MATCH_CACHE.get(query, intent)
         if cached_response:
             cached_response["_cache"] = "exact_match"
             cached_response["_source"] = cached_response.get("_source", "cache")
         # Stage 2: Choose topic/section (if document selected but no topic)
         # Stage 3: Choose detail (if topic selected, ask for more details)
         # Final: Answer (when user says "Không" or after detail selection)
+        disable_wizard_flow = os.environ.get("DISABLE_WIZARD_FLOW", "false").lower() == "true"
+        print(f"[WIZARD] DISABLE_WIZARD_FLOW={os.environ.get('DISABLE_WIZARD_FLOW', 'false')} -> disable_wizard_flow={disable_wizard_flow}")
         has_doc_code_in_query = self._query_has_document_code(query)
         wizard_stage = session_metadata.get("wizard_stage") if session_metadata else None
         print(f"[WIZARD] Chatbot layer check - intent={intent}, wizard_stage={wizard_stage}, selected_doc_code={selected_doc_code}, selected_topic={selected_topic}, has_doc_code_in_query={has_doc_code_in_query}, query='{query[:50]}'")
+        # CRITICAL: If wizard flow is disabled, reset all wizard state immediately
+        if disable_wizard_flow:
+            print("[WIZARD] 🚫 Wizard flow DISABLED - resetting all wizard state and skipping wizard stages")
+            selected_doc_code = None
+            selected_topic = None
+            wizard_stage = None
+            wizard_depth = 0
+            # Update session metadata to clear wizard state
+            if session_id:
+                try:
+                    ConversationContext.update_session_metadata(
+                        session_id,
                         {
+                            "selected_document_code": None,
+                            "selected_topic": None,
+                            "wizard_stage": None,
+                            "wizard_depth": 0,
                         }
                     )
+                    print("[WIZARD] ✅ Wizard state cleared from session metadata")
+                except Exception as e:
+                    print(f"⚠️ Failed to clear wizard state: {e}")
+            # Also update session_metadata dict for current function scope
+            if session_metadata:
+                session_metadata["selected_document_code"] = None
+                session_metadata["selected_topic"] = None
+                session_metadata["wizard_stage"] = None
+                session_metadata["wizard_depth"] = 0
+        # Reset wizard state if new query doesn't have document code and wizard_stage is "answer"
+        # This handles the case where user asks a new question after completing a previous wizard flow
+        # CRITICAL: Check conditions and reset BEFORE Stage 1 check
+        should_reset = (
+            not disable_wizard_flow
+            and intent == "search_legal"
+            and not has_doc_code_in_query
+            and wizard_stage == "answer"
+        )
+        print(f"[WIZARD] Reset check - intent={intent}, has_doc_code={has_doc_code_in_query}, wizard_stage={wizard_stage}, should_reset={should_reset}")  # v2.0-fix
+        if should_reset:
+            print("[WIZARD] 🔄 New query detected, resetting wizard state for fresh start")
+            selected_doc_code = None
+            selected_topic = None
+            wizard_stage = None
+            # Update session metadata FIRST before continuing
+            if session_id:
                 try:
+                    ConversationContext.update_session_metadata(
+                        session_id,
+                        {
+                            "selected_document_code": None,
+                            "selected_topic": None,
+                            "wizard_stage": None,
+                            "wizard_depth": 0,
+                        }
                     )
+                    print("[WIZARD] ✅ Wizard state reset in session metadata")
+                except Exception as e:
+                    print(f"⚠️ Failed to reset wizard state: {e}")
+            # Also update session_metadata dict for current function scope
+            if session_metadata:
+                session_metadata["selected_document_code"] = None
+                session_metadata["selected_topic"] = None
+                session_metadata["wizard_stage"] = None
+                session_metadata["wizard_depth"] = 0
+        # Stage 1: Choose document (if no document selected and no code in query)
+        # Use Query Rewrite Strategy from slow_path_handler instead of old LLM suggestions
+        if (
+            intent == "search_legal"
+            and not selected_doc_code
+            and not has_doc_code_in_query
+            and not disable_wizard_flow
+        ):
+            print("[WIZARD] ✅ Stage 1: Using Query Rewrite Strategy from slow_path_handler")
+            # Delegate to slow_path_handler which has Query Rewrite Strategy
+            slow_handler = SlowPathHandler()
+            response = slow_handler.handle(
+                query=query,
+                intent=intent,
+                session_id=session_id,
+                selected_document_code=None,  # No document selected yet
+            )
+            # Ensure response has wizard metadata
+            if response:
+                response.setdefault("wizard_stage", "choose_document")
+                response.setdefault("routing", "legal_wizard")
+                response.setdefault("type", "options")
+                # Update session metadata
+                if session_id:
+                    try:
+                        ConversationContext.update_session_metadata(
+                            session_id,
+                            {
+                                "wizard_stage": "choose_document",
+                                "wizard_depth": 1,
+                            }
+                        )
+                    except Exception as e:
+                        logger.warning("[WIZARD] Failed to update session metadata: %s", e)
+                # Save bot message to context
+                if session_id:
+                    try:
+                        bot_message = response.get("message") or response.get("clarification", {}).get("message", "")
+                        ConversationContext.add_message(
+                            session_id=session_id,
+                            role="bot",
+                            content=bot_message,
+                            intent=intent,
+                        )
+                    except Exception as e:
+                        print(f"⚠️ Failed to save wizard bot message: {e}")
+            return response if response else {
+                "message": "Xin lỗi, có lỗi xảy ra khi tìm kiếm văn bản.",
                 "intent": intent,
                 "results": [],
                 "count": 0,
             }
         # Stage 2: Choose topic/section (if document selected but no topic yet)
         # Skip if wizard_stage is already "answer" (user wants final answer)
+        if (
+            intent == "search_legal"
+            and selected_doc_code
+            and not selected_topic
+            and not has_doc_code_in_query
+            and wizard_stage != "answer"
+            and not disable_wizard_flow
+        ):
             print("[WIZARD] ✅ Stage 2 triggered: Choose topic/section")
             # Get document title

backend/hue_portal/chatbot/slow_path_handler.py CHANGED Viewed

@@ -4,7 +4,8 @@ Slow Path Handler - Full RAG pipeline for complex queries.
 import os
 import time
 import logging
-from typing import Dict, Any, Optional, List
 import unicodedata
 import re
 from concurrent.futures import ThreadPoolExecutor, Future
@@ -20,12 +21,16 @@ from hue_portal.core.models import (
     LegalDocument,
 )
 from hue_portal.core.search_ml import search_with_ml
 # Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
 # from hue_portal.core.reranker import rerank_documents
 from hue_portal.chatbot.llm_integration import get_llm_generator
 from hue_portal.chatbot.structured_legal import format_structured_legal_answer
 from hue_portal.chatbot.context_manager import ConversationContext
 from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
 logger = logging.getLogger(__name__)
@@ -38,9 +43,15 @@ class SlowPathHandler:
         self.llm_generator = get_llm_generator()
         # Thread pool for parallel search (max 2 workers to avoid overwhelming DB)
         self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="parallel_search")
-        # Cache for prefetched results by session_id
         self._prefetched_cache: Dict[str, Dict[str, Any]] = {}
         self._cache_lock = threading.Lock()
     def handle(
         self,
@@ -106,71 +117,240 @@ class SlowPathHandler:
         )
         if (
             intent == "search_legal"
             and not selected_document_code_normalized
             and not has_explicit_code
         ):
-            logger.info("[WIZARD] ✅ Wizard conditions met, returning options payload")
-            canonical_candidates: List[Dict[str, Any]] = []
             try:
-                canonical_docs = list(
-                    LegalDocument.objects.filter(
-                        code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
-                    )
                 )
-                for doc in canonical_docs:
-                    summary = getattr(doc, "summary", "") or ""
-                    metadata = getattr(doc, "metadata", {}) or {}
-                    if not summary and isinstance(metadata, dict):
-                        summary = metadata.get("summary", "")
-                    canonical_candidates.append(
                         {
-                            "code": doc.code,
-                            "title": getattr(doc, "title", "") or doc.code,
-                            "summary": summary,
-                            "doc_type": getattr(doc, "doc_type", "") or "",
-                            "section_title": "",
-                        }
                     )
             except Exception as exc:
-                logger.warning(
-                    "[CLARIFICATION] Canonical documents lookup failed, using static list: %s",
                     exc,
                 )
-            if not canonical_candidates:
-                canonical_candidates = [
-                    {
-                        "code": "264-QD-TW",
-                        "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "QD-69-TW",
-                        "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "TT-02-CAND",
-                        "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
-                        "summary": "",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                ]
-            clarification_payload = self._build_clarification_payload(
-                query, canonical_candidates
-            )
-            if clarification_payload:
-                clarification_payload.setdefault("intent", intent)
-                clarification_payload.setdefault("_source", "clarification")
-                clarification_payload.setdefault("routing", "clarification")
-                clarification_payload.setdefault("confidence", 0.3)
-                return clarification_payload
         # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
         search_result = self._search_by_intent(
@@ -685,6 +865,23 @@ class SlowPathHandler:
                     keywords[:5],
                 )
                 # Search in the selected document
                 query_text = " ".join(keywords) if keywords else ""
                 search_result = self._search_by_intent(
@@ -694,16 +891,27 @@ class SlowPathHandler:
                     preferred_document_code=document_code.upper(),
                 )
-                # Store in cache
                 with self._cache_lock:
                     if session_id not in self._prefetched_cache:
                         self._prefetched_cache[session_id] = {}
-                    self._prefetched_cache[session_id]["document_results"] = {
-                        "document_code": document_code,
-                        "results": search_result.get("results", []),
-                        "count": search_result.get("count", 0),
-                        "timestamp": time.time(),
-                    }
                 logger.info(
                     "[PARALLEL_SEARCH] Completed background search for doc=%s, found %d results",
@@ -905,13 +1113,12 @@ class SlowPathHandler:
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
-            # Retrieve top-15 for reranking (will be reduced to top-4 after rerank)
-            search_results = search_with_ml(
                 qs,
-                keywords,
-                text_fields,
                 top_k=limit,  # limit=15 for reranking, will be reduced to 4
-                min_score=0.02,  # Lower threshold for legal
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
             logger.info(

 import os
 import time
 import logging
+import hashlib
+from typing import Dict, Any, Optional, List, Set
 import unicodedata
 import re
 from concurrent.futures import ThreadPoolExecutor, Future
     LegalDocument,
 )
 from hue_portal.core.search_ml import search_with_ml
+from hue_portal.core.pure_semantic_search import pure_semantic_search
 # Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
 # from hue_portal.core.reranker import rerank_documents
 from hue_portal.chatbot.llm_integration import get_llm_generator
 from hue_portal.chatbot.structured_legal import format_structured_legal_answer
 from hue_portal.chatbot.context_manager import ConversationContext
 from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
+from hue_portal.core.query_rewriter import get_query_rewriter
+from hue_portal.core.pure_semantic_search import pure_semantic_search, parallel_vector_search
+from hue_portal.core.redis_cache import get_redis_cache
 logger = logging.getLogger(__name__)
         self.llm_generator = get_llm_generator()
         # Thread pool for parallel search (max 2 workers to avoid overwhelming DB)
         self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="parallel_search")
+        # Cache for prefetched results by session_id (in-memory fallback)
         self._prefetched_cache: Dict[str, Dict[str, Any]] = {}
         self._cache_lock = threading.Lock()
+        # Redis cache for prefetch results
+        self.redis_cache = get_redis_cache()
+        # Prefetch cache TTL (30 minutes default)
+        self.prefetch_cache_ttl = int(os.environ.get("CACHE_PREFETCH_TTL", "1800"))
+        # Toggle wizard flow (disable to answer directly)
+        self.disable_wizard_flow = os.environ.get("DISABLE_WIZARD_FLOW", "false").lower() == "true"
     def handle(
         self,
         )
         if (
             intent == "search_legal"
+            and not self.disable_wizard_flow
             and not selected_document_code_normalized
             and not has_explicit_code
         ):
+            logger.info("[QUERY_REWRITE] ✅ Wizard conditions met, using Query Rewrite Strategy")
+            # Query Rewrite Strategy: Rewrite query into 3-5 optimized legal queries
+            query_rewriter = get_query_rewriter(self.llm_generator)
+            # Get conversation context for query rewriting
+            context = None
+            if session_id:
+                try:
+                    recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
+                    context = [
+                        {"role": msg.role, "content": msg.content}
+                        for msg in recent_messages
+                    ]
+                except Exception as exc:
+                    logger.warning("[QUERY_REWRITE] Failed to load context: %s", exc)
+            # Rewrite query into 3-5 queries
+            rewritten_queries = query_rewriter.rewrite_query(
+                query,
+                context=context,
+                max_queries=5,
+                min_queries=3
+            )
+            if not rewritten_queries:
+                # Fallback to original query if rewrite fails
+                rewritten_queries = [query]
+            logger.info(
+                "[QUERY_REWRITE] Rewrote query into %d queries: %s",
+                len(rewritten_queries),
+                rewritten_queries[:3]
+            )
+            # Parallel vector search with multiple queries
             try:
+                from hue_portal.core.models import LegalSection
+                # Search all legal sections (no document filter yet)
+                qs = LegalSection.objects.all()
+                text_fields = ["section_title", "section_code", "content"]
+                # Use parallel vector search
+                search_results = parallel_vector_search(
+                    rewritten_queries,
+                    qs,
+                    top_k_per_query=5,
+                    final_top_k=7,
+                    text_fields=text_fields
                 )
+                # Extract unique document codes from results
+                doc_codes_seen: Set[str] = set()
+                document_options: List[Dict[str, Any]] = []
+                for section, score in search_results:
+                    doc = getattr(section, "document", None)
+                    if not doc:
+                        continue
+                    doc_code = getattr(doc, "code", "").upper()
+                    if not doc_code or doc_code in doc_codes_seen:
+                        continue
+                    doc_codes_seen.add(doc_code)
+                    # Get document metadata
+                    doc_title = getattr(doc, "title", "") or doc_code
+                    doc_summary = getattr(doc, "summary", "") or ""
+                    if not doc_summary:
+                        metadata = getattr(doc, "metadata", {}) or {}
+                        if isinstance(metadata, dict):
+                            doc_summary = metadata.get("summary", "")
+                    document_options.append({
+                        "code": doc_code,
+                        "title": doc_title,
+                        "summary": doc_summary,
+                        "score": float(score),
+                        "doc_type": getattr(doc, "doc_type", "") or "",
+                    })
+                    # Limit to top 5 documents
+                    if len(document_options) >= 5:
+                        break
+                # If no documents found, use canonical fallback
+                if not document_options:
+                    logger.warning("[QUERY_REWRITE] No documents found, using canonical fallback")
+                    canonical_candidates = [
                         {
+                            "code": "264-QD-TW",
+                            "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                        {
+                            "code": "QD-69-TW",
+                            "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                        {
+                            "code": "TT-02-CAND",
+                            "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
+                            "summary": "",
+                            "doc_type": "",
+                        },
+                    ]
+                    clarification_payload = self._build_clarification_payload(
+                        query, canonical_candidates
                     )
+                    if clarification_payload:
+                        clarification_payload.setdefault("intent", intent)
+                        clarification_payload.setdefault("_source", "clarification")
+                        clarification_payload.setdefault("routing", "clarification")
+                        clarification_payload.setdefault("confidence", 0.3)
+                        return clarification_payload
+                # Build options from search results
+                options = [
+                    {
+                        "code": opt["code"],
+                        "title": opt["title"],
+                        "reason": opt.get("summary") or f"Độ liên quan: {opt['score']:.2f}",
+                    }
+                    for opt in document_options
+                ]
+                # Add "Khác" option
+                if not any(opt.get("code") == "__other__" for opt in options):
+                    options.append({
+                        "code": "__other__",
+                        "title": "Khác",
+                        "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
+                    })
+                message = (
+                    "Tôi đã tìm thấy các văn bản pháp luật liên quan đến câu hỏi của bạn.\n\n"
+                    "Bạn hãy chọn văn bản muốn tra cứu để tôi trả lời chi tiết hơn:"
+                )
+                logger.info(
+                    "[QUERY_REWRITE] ✅ Found %d documents using Query Rewrite Strategy",
+                    len(document_options)
+                )
+                return {
+                    "type": "options",
+                    "wizard_stage": "choose_document",
+                    "message": message,
+                    "options": options,
+                    "clarification": {
+                        "message": message,
+                        "options": options,
+                    },
+                    "results": [],
+                    "count": 0,
+                    "intent": intent,
+                    "_source": "query_rewrite",
+                    "routing": "query_rewrite",
+                    "confidence": 0.95,  # High confidence with Query Rewrite Strategy
+                }
             except Exception as exc:
+                logger.error(
+                    "[QUERY_REWRITE] Error in Query Rewrite Strategy: %s, falling back to LLM suggestions",
                     exc,
+                    exc_info=True
                 )
+                # Fallback to original LLM-based clarification
+                canonical_candidates: List[Dict[str, Any]] = []
+                try:
+                    canonical_docs = list(
+                        LegalDocument.objects.filter(
+                            code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
+                        )
+                    )
+                    for doc in canonical_docs:
+                        summary = getattr(doc, "summary", "") or ""
+                        metadata = getattr(doc, "metadata", {}) or {}
+                        if not summary and isinstance(metadata, dict):
+                            summary = metadata.get("summary", "")
+                        canonical_candidates.append(
+                            {
+                                "code": doc.code,
+                                "title": getattr(doc, "title", "") or doc.code,
+                                "summary": summary,
+                                "doc_type": getattr(doc, "doc_type", "") or "",
+                                "section_title": "",
+                            }
+                        )
+                except Exception as e:
+                    logger.warning("[CLARIFICATION] Canonical documents lookup failed: %s", e)
+                if not canonical_candidates:
+                    canonical_candidates = [
+                        {
+                            "code": "264-QD-TW",
+                            "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                        {
+                            "code": "QD-69-TW",
+                            "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                        {
+                            "code": "TT-02-CAND",
+                            "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
+                            "summary": "",
+                            "doc_type": "",
+                            "section_title": "",
+                        },
+                    ]
+                clarification_payload = self._build_clarification_payload(
+                    query, canonical_candidates
+                )
+                if clarification_payload:
+                    clarification_payload.setdefault("intent", intent)
+                    clarification_payload.setdefault("_source", "clarification_fallback")
+                    clarification_payload.setdefault("routing", "clarification")
+                    clarification_payload.setdefault("confidence", 0.3)
+                    return clarification_payload
         # Search based on intent - retrieve top-15 for reranking (balance speed and RAM)
         search_result = self._search_by_intent(
                     keywords[:5],
                 )
+                # Check Redis cache first
+                cache_key = f"prefetch:{document_code.upper()}:{hashlib.sha256(' '.join(keywords).encode()).hexdigest()[:16]}"
+                cached_result = None
+                if self.redis_cache and self.redis_cache.is_available():
+                    cached_result = self.redis_cache.get(cache_key)
+                    if cached_result:
+                        logger.info(
+                            "[PARALLEL_SEARCH] ✅ Cache hit for doc=%s",
+                            document_code
+                        )
+                        # Store in in-memory cache too
+                        with self._cache_lock:
+                            if session_id not in self._prefetched_cache:
+                                self._prefetched_cache[session_id] = {}
+                            self._prefetched_cache[session_id]["document_results"] = cached_result
+                        return
                 # Search in the selected document
                 query_text = " ".join(keywords) if keywords else ""
                 search_result = self._search_by_intent(
                     preferred_document_code=document_code.upper(),
                 )
+                # Prepare cache data
+                cache_data = {
+                    "document_code": document_code,
+                    "results": search_result.get("results", []),
+                    "count": search_result.get("count", 0),
+                    "timestamp": time.time(),
+                }
+                # Store in Redis cache
+                if self.redis_cache and self.redis_cache.is_available():
+                    self.redis_cache.set(cache_key, cache_data, ttl_seconds=self.prefetch_cache_ttl)
+                    logger.debug(
+                        "[PARALLEL_SEARCH] Cached prefetch results (TTL: %ds)",
+                        self.prefetch_cache_ttl
+                    )
+                # Store in in-memory cache (fallback)
                 with self._cache_lock:
                     if session_id not in self._prefetched_cache:
                         self._prefetched_cache[session_id] = {}
+                    self._prefetched_cache[session_id]["document_results"] = cache_data
                 logger.info(
                     "[PARALLEL_SEARCH] Completed background search for doc=%s, found %d results",
                     )
             else:
                 logger.debug("[SEARCH] No document code detected for query: %s", query)
+            # Use pure semantic search (100% vector, no BM25)
+            search_results = pure_semantic_search(
+                [keywords],
                 qs,
                 top_k=limit,  # limit=15 for reranking, will be reduced to 4
+                text_fields=text_fields
             )
             results = self._format_legal_results(search_results, detected_code, query=query)
             logger.info(