Spaces:

davidtran999
/

hue-portal-backendDocker

Running

App Files Files Community

davidtran999 commited on 17 days ago

Commit

df3858b

verified ·

1 Parent(s): 1504d2c

Upload backend/hue_portal/chatbot/chatbot.py with huggingface_hub

Browse files

Files changed (1) hide show

backend/hue_portal/chatbot/chatbot.py +327 -113

backend/hue_portal/chatbot/chatbot.py CHANGED Viewed

@@ -136,7 +136,7 @@ class Chatbot(CoreChatbot):
         # tránh trả lại các câu trả lời cũ không có options.
         cached_response = None
         if intent != "search_legal":
-            cached_response = EXACT_MATCH_CACHE.get(query, intent)
         if cached_response:
             cached_response["_cache"] = "exact_match"
             cached_response["_source"] = cached_response.get("_source", "cache")
@@ -162,128 +162,212 @@ class Chatbot(CoreChatbot):
             return cached_response
         # Wizard / option-first ngay tại chatbot layer:
-        # Nếu là câu hỏi search_legal chung, chưa chọn văn bản, không có mã văn bản trong câu hỏi
-        # => trả về danh sách văn bản để người dùng chọn, không sinh câu trả lời chi tiết.
-        # ⚠️ QUAN TRỌNG: Wizard check PHẢI ở TRƯỚC nhánh "if intent == search_legal" để được trigger.
         has_doc_code_in_query = self._query_has_document_code(query)
-        print(f"[WIZARD] Chatbot layer check - intent={intent}, selected_doc_code={selected_doc_code}, has_doc_code_in_query={has_doc_code_in_query}, query='{query[:50]}'")
-        # Logic wizard:
-        # - Nếu user đã chọn văn bản (selected_doc_code có giá trị) → không bật wizard, đi thẳng vào slow_path để trả lời
-        # - Nếu user chưa chọn và không có mã trong query → bật wizard để user chọn
-        # - Nếu có mã trong query → không bật wizard, đi thẳng vào slow_path
         if intent == "search_legal" and not selected_doc_code and not has_doc_code_in_query:
-            print("[WIZARD] ✅ Chatbot layer wizard triggered, using AI to generate options")
-            # Load canonical documents từ DB
-            canonical_candidates = []
             try:
-                canonical_docs = list(
-                    LegalDocument.objects.filter(
-                        code__in=["264-QD-TW", "QD-69-TW", "TT-02-CAND"]
                     )
                 )
-                for doc in canonical_docs:
-                    summary = getattr(doc, "summary", "") or ""
-                    metadata = getattr(doc, "metadata", {}) or {}
-                    if not summary and isinstance(metadata, dict):
-                        summary = metadata.get("summary", "")
-                    canonical_candidates.append(
-                        {
-                            "code": doc.code,
-                            "title": getattr(doc, "title", "") or doc.code,
-                            "summary": summary,
-                            "doc_type": getattr(doc, "doc_type", "") or "",
-                            "section_title": "",
-                        }
-                    )
-            except Exception as exc:
-                logger.warning("[WIZARD] Failed to load canonical documents: %s", exc)
-            # Fallback nếu không load được từ DB
-            if not canonical_candidates:
-                canonical_candidates = [
-                    {
-                        "code": "264-QD-TW",
-                        "title": "Quyết định 264-QĐ/TW về kỷ luật đảng viên",
-                        "summary": "Quy định chung về xử lý kỷ luật đối với đảng viên vi phạm.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "QD-69-TW",
-                        "title": "Quy định 69-QĐ/TW về kỷ luật tổ chức đảng, đảng viên",
-                        "summary": "Quy định chi tiết về các hành vi vi phạm và hình thức kỷ luật.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                    {
-                        "code": "TT-02-CAND",
-                        "title": "Thông tư 02/2021/TT-BCA về điều lệnh CAND",
-                        "summary": "Quy định về điều lệnh, lễ tiết, tác phong trong CAND.",
-                        "doc_type": "",
-                        "section_title": "",
-                    },
-                ]
-            # Dùng LLM để đề xuất options dựa trên câu hỏi
-            clarification_options = []
-            intro_message = (
-                "Tôi tìm thấy một số nhóm văn bản có thể liên quan đến câu hỏi của bạn.\n\n"
-                "Bạn hãy chọn văn bản muốn tra cứu trước, sau đó tôi sẽ trả lời chi tiết hơn:"
-            )
             if self.llm_generator:
                 try:
-                    llm_payload = self.llm_generator.suggest_clarification_topics(
-                        query,
-                        canonical_candidates,
                         max_options=3,
                     )
                     if llm_payload:
                         intro_message = llm_payload.get("message") or intro_message
-                        raw_options = llm_payload.get("options")
-                        if isinstance(raw_options, list) and len(raw_options) > 0:
-                            clarification_options = [
-                                {
-                                    "code": (opt.get("code") or candidate.get("code", "")).upper(),
-                                    "title": opt.get("title") or opt.get("document_title") or candidate.get("title", ""),
-                                    "reason": opt.get("reason")
-                                    or opt.get("summary")
-                                    or candidate.get("summary")
-                                    or candidate.get("section_title")
-                                    or "",
-                                }
-                                for opt, candidate in zip(
-                                    raw_options,
-                                    canonical_candidates[: len(raw_options)],
-                                )
-                                if (opt.get("code") or candidate.get("code"))
-                                and (opt.get("title") or opt.get("document_title") or candidate.get("title"))
-                            ]
-                            print(f"[WIZARD] ✅ LLM generated {len(clarification_options)} options")
                 except Exception as exc:
-                    logger.warning("[WIZARD] LLM suggestion failed: %s, using fallback", exc)
-            # Fallback nếu LLM không trả về options hợp lệ
-            if not clarification_options:
-                clarification_options = [
                     {
-                        "code": candidate["code"].upper(),
-                        "title": candidate["title"],
-                        "reason": candidate.get("summary") or candidate.get("section_title") or "",
                     }
-                    for candidate in canonical_candidates[:3]
                 ]
-                print("[WIZARD] Using fallback options (LLM unavailable or failed)")
-            # Thêm option "Khác" nếu chưa có
-            if not any(opt.get("code") == "__other__" for opt in clarification_options):
-                clarification_options.append(
-                    {
-                        "code": "__other__",
-                        "title": "Khác",
-                        "reason": "Tôi muốn hỏi văn bản hoặc chủ đề pháp luật khác.",
-                    }
                 )
             response = {
                 "message": intro_message,
                 "intent": intent,
@@ -292,12 +376,12 @@ class Chatbot(CoreChatbot):
                 "count": 0,
                 "routing": "legal_wizard",
                 "type": "options",
-                "wizard_stage": "choose_document",
                 "clarification": {
                     "message": intro_message,
-                    "options": clarification_options,
                 },
-                "options": clarification_options,
             }
             if session_id:
                 response["session_id"] = session_id
@@ -308,10 +392,140 @@ class Chatbot(CoreChatbot):
                         content=intro_message,
                         intent=intent,
                     )
                 except Exception as e:
-                    print(f"⚠️ Failed to save wizard bot message: {e}")
             return response
         # Always send legal intent through Slow Path RAG
         if intent == "search_legal":
             response = self._run_slow_path_legal(
@@ -491,14 +705,14 @@ class Chatbot(CoreChatbot):
                         "Tôi có thể giúp bạn tra cứu các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên. "
                         "Bạn muốn tìm gì?"
                     )
-                    response = {
-                        "message": message,
-                        "intent": intent,
-                        "confidence": confidence,
-                        "results": [],
-                        "count": 0,
                         "routing": "small_talk",
-                    }
         else:  # IntentRoute.SEARCH
             # Use core chatbot search for other intents

         # tránh trả lại các câu trả lời cũ không có options.
         cached_response = None
         if intent != "search_legal":
+        cached_response = EXACT_MATCH_CACHE.get(query, intent)
         if cached_response:
             cached_response["_cache"] = "exact_match"
             cached_response["_source"] = cached_response.get("_source", "cache")
             return cached_response
         # Wizard / option-first ngay tại chatbot layer:
+        # Multi-stage wizard flow:
+        # Stage 1: Choose document (if no document selected)
+        # Stage 2: Choose topic/section (if document selected but no topic)
+        # Stage 3: Choose detail (if topic selected, ask for more details)
+        # Final: Answer (when user says "Không" or after detail selection)
         has_doc_code_in_query = self._query_has_document_code(query)
+        wizard_stage = session_metadata.get("wizard_stage") if session_metadata else None
+        selected_topic = session_metadata.get("selected_topic") if session_metadata else None
+        wizard_depth = session_metadata.get("wizard_depth", 0) if session_metadata else 0
+        print(f"[WIZARD] Chatbot layer check - intent={intent}, wizard_stage={wizard_stage}, selected_doc_code={selected_doc_code}, selected_topic={selected_topic}, has_doc_code_in_query={has_doc_code_in_query}, query='{query[:50]}'")
+        # Stage 1: Choose document (if no document selected and no code in query)
+        # Use Query Rewrite Strategy from slow_path_handler instead of old LLM suggestions
         if intent == "search_legal" and not selected_doc_code and not has_doc_code_in_query:
+            print("[WIZARD] ✅ Stage 1: Using Query Rewrite Strategy from slow_path_handler")
+            # Delegate to slow_path_handler which has Query Rewrite Strategy
+            slow_handler = SlowPathHandler()
+            response = slow_handler.handle(
+                query=query,
+                intent=intent,
+                session_id=session_id,
+                selected_document_code=None,  # No document selected yet
+            )
+            # Ensure response has wizard metadata
+            if response:
+                response.setdefault("wizard_stage", "choose_document")
+                response.setdefault("routing", "legal_wizard")
+                response.setdefault("type", "options")
+                # Update session metadata
+                if session_id:
+                    try:
+                        ConversationContext.update_session_metadata(
+                            session_id,
+                            {
+                                "wizard_stage": "choose_document",
+                                "wizard_depth": 1,
+                            }
+                        )
+                    except Exception as e:
+                        logger.warning("[WIZARD] Failed to update session metadata: %s", e)
+                # Save bot message to context
+                if session_id:
+                    try:
+                        bot_message = response.get("message") or response.get("clarification", {}).get("message", "")
+                        ConversationContext.add_message(
+                            session_id=session_id,
+                            role="bot",
+                            content=bot_message,
+                            intent=intent,
+                        )
+                    except Exception as e:
+                        print(f"⚠️ Failed to save wizard bot message: {e}")
+            return response if response else {
+                "message": "Xin lỗi, có lỗi xảy ra khi tìm kiếm văn bản.",
+                "intent": intent,
+                "results": [],
+                "count": 0,
+            }
+        # Stage 2: Choose topic/section (if document selected but no topic yet)
+        # Skip if wizard_stage is already "answer" (user wants final answer)
+        if intent == "search_legal" and selected_doc_code and not selected_topic and not has_doc_code_in_query and wizard_stage != "answer":
+            print("[WIZARD] ✅ Stage 2 triggered: Choose topic/section")
+            # Get document title
+            document_title = selected_doc_code
             try:
+                doc = LegalDocument.objects.filter(code=selected_doc_code).first()
+                if doc:
+                    document_title = getattr(doc, "title", "") or selected_doc_code
+            except Exception:
+                pass
+            # Extract keywords from query for parallel search
+            search_keywords_from_query = []
+            if self.llm_generator:
+                try:
+                    conversation_context = None
+                    if session_id:
+                        try:
+                            recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
+                            conversation_context = [
+                                {"role": msg.role, "content": msg.content}
+                                for msg in recent_messages
+                            ]
+                        except Exception:
+                            pass
+                    search_keywords_from_query = self.llm_generator.extract_search_keywords(
+                        query=query,
+                        selected_options=None,  # No options selected yet
+                        conversation_context=conversation_context,
                     )
+                    print(f"[WIZARD] Extracted keywords: {search_keywords_from_query[:5]}")
+                except Exception as exc:
+                    logger.warning("[WIZARD] Keyword extraction failed: %s", exc)
+            # Fallback to simple keyword extraction
+            if not search_keywords_from_query:
+                search_keywords_from_query = self.chatbot.extract_keywords(query)
+            # Trigger parallel search for document (if not already done)
+            slow_handler = SlowPathHandler()
+            prefetched_results = slow_handler._get_prefetched_results(session_id, "document_results")
+            if not prefetched_results:
+                # Trigger parallel search now
+                slow_handler._parallel_search_prepare(
+                    document_code=selected_doc_code,
+                    keywords=search_keywords_from_query,
+                    session_id=session_id,
                 )
+                logger.info("[WIZARD] Triggered parallel search for document")
+            # Get prefetched search results from parallel search (if available)
+            prefetched_results = slow_handler._get_prefetched_results(session_id, "document_results")
+            search_results = []
+            if prefetched_results:
+                search_results = prefetched_results.get("results", [])
+                logger.info("[WIZARD] Using prefetched results: %d sections", len(search_results))
+            else:
+                # Fallback: search synchronously if prefetch not ready
+                search_result = slow_handler._search_by_intent(
+                    intent="search_legal",
+                    query=query,
+                    limit=20,
+                    preferred_document_code=selected_doc_code.upper(),
+                )
+                search_results = search_result.get("results", [])
+                logger.info("[WIZARD] Fallback search: %d sections", len(search_results))
+            # Extract keywords for topic options
+            conversation_context = None
+            if session_id:
+                try:
+                    recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
+                    conversation_context = [
+                        {"role": msg.role, "content": msg.content}
+                        for msg in recent_messages
+                    ]
+                except Exception:
+                    pass
+            # Use LLM to generate topic options
+            topic_options = []
+            intro_message = f"Bạn muốn tìm điều khoản/chủ đề nào cụ thể trong {document_title}?"
+            search_keywords = []
             if self.llm_generator:
                 try:
+                    llm_payload = self.llm_generator.suggest_topic_options(
+                        query=query,
+                        document_code=selected_doc_code,
+                        document_title=document_title,
+                        search_results=search_results[:10],  # Top 10 for options
+                        conversation_context=conversation_context,
                         max_options=3,
                     )
                     if llm_payload:
                         intro_message = llm_payload.get("message") or intro_message
+                        topic_options = llm_payload.get("options", [])
+                        search_keywords = llm_payload.get("search_keywords", [])
+                        print(f"[WIZARD] ✅ LLM generated {len(topic_options)} topic options")
                 except Exception as exc:
+                    logger.warning("[WIZARD] LLM topic suggestion failed: %s", exc)
+            # Fallback: build options from search results
+            if not topic_options and search_results:
+                for result in search_results[:3]:
+                    data = result.get("data", {})
+                    section_title = data.get("section_title") or data.get("title") or ""
+                    article = data.get("article") or data.get("article_number") or ""
+                    if section_title or article:
+                        topic_options.append({
+                            "title": section_title or article,
+                            "article": article,
+                            "reason": data.get("excerpt", "")[:100] or "",
+                            "keywords": [],
+                        })
+            # If still no options, create generic ones
+            if not topic_options:
+                topic_options = [
                     {
+                        "title": "Các điều khoản liên quan",
+                        "article": "",
+                        "reason": "Tìm kiếm các điều khoản liên quan đến câu hỏi của bạn",
+                        "keywords": [],
                     }
                 ]
+            # Trigger parallel search for selected keywords
+            if search_keywords:
+                slow_handler._parallel_search_topic(
+                    document_code=selected_doc_code,
+                    topic_keywords=search_keywords,
+                    session_id=session_id,
                 )
             response = {
                 "message": intro_message,
                 "intent": intent,
                 "count": 0,
                 "routing": "legal_wizard",
                 "type": "options",
+                "wizard_stage": "choose_topic",
                 "clarification": {
                     "message": intro_message,
+                    "options": topic_options,
                 },
+                "options": topic_options,
             }
             if session_id:
                 response["session_id"] = session_id
                         content=intro_message,
                         intent=intent,
                     )
+                    ConversationContext.update_session_metadata(
+                        session_id,
+                        {
+                            "wizard_stage": "choose_topic",
+                        },
+                    )
                 except Exception as e:
+                    print(f"⚠️ Failed to save Stage 2 bot message: {e}")
             return response
+        # Stage 3: Choose detail (if topic selected, ask if user wants more details)
+        # Skip if wizard_stage is already "answer" (user wants final answer)
+        if intent == "search_legal" and selected_doc_code and selected_topic and wizard_stage != "answer":
+            # Check if user is asking for more details or saying "Không"
+            query_lower = query.lower()
+            wants_more = any(kw in query_lower for kw in ["có", "cần", "muốn", "thêm", "chi tiết", "nữa"])
+            says_no = any(kw in query_lower for kw in ["không", "khong", "thôi", "đủ", "xong"])
+            if says_no or wizard_depth >= 2:
+                # User doesn't want more details or already asked twice - proceed to final answer
+                print("[WIZARD] ✅ User wants final answer, proceeding to slow_path")
+                # Clear wizard stage to allow normal answer flow
+                if session_id:
+                    try:
+                        ConversationContext.update_session_metadata(
+                            session_id,
+                            {
+                                "wizard_stage": "answer",
+                            },
+                        )
+                    except Exception:
+                        pass
+            elif wants_more or wizard_depth == 0:
+                # User wants more details - generate detail options
+                print("[WIZARD] ✅ Stage 3 triggered: Choose detail")
+                # Get conversation context
+                conversation_context = None
+                if session_id:
+                    try:
+                        recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
+                        conversation_context = [
+                            {"role": msg.role, "content": msg.content}
+                            for msg in recent_messages
+                        ]
+                    except Exception:
+                        pass
+                # Use LLM to generate detail options
+                detail_options = []
+                intro_message = "Bạn muốn chi tiết gì cho chủ đề này nữa không?"
+                search_keywords = []
+                if self.llm_generator:
+                    try:
+                        llm_payload = self.llm_generator.suggest_detail_options(
+                            query=query,
+                            selected_document_code=selected_doc_code,
+                            selected_topic=selected_topic,
+                            conversation_context=conversation_context,
+                            max_options=3,
+                        )
+                        if llm_payload:
+                            intro_message = llm_payload.get("message") or intro_message
+                            detail_options = llm_payload.get("options", [])
+                            search_keywords = llm_payload.get("search_keywords", [])
+                            print(f"[WIZARD] ✅ LLM generated {len(detail_options)} detail options")
+                    except Exception as exc:
+                        logger.warning("[WIZARD] LLM detail suggestion failed: %s", exc)
+                # Fallback options
+                if not detail_options:
+                    detail_options = [
+                        {
+                            "title": "Thẩm quyền xử lý",
+                            "reason": "Tìm hiểu về thẩm quyền xử lý kỷ luật",
+                            "keywords": ["thẩm quyền", "xử lý"],
+                        },
+                        {
+                            "title": "Trình tự, thủ tục",
+                            "reason": "Tìm hiểu về trình tự, thủ tục xử lý",
+                            "keywords": ["trình tự", "thủ tục"],
+                        },
+                        {
+                            "title": "Hình thức kỷ luật",
+                            "reason": "Tìm hiểu về các hình thức kỷ luật",
+                            "keywords": ["hình thức", "kỷ luật"],
+                        },
+                    ]
+                # Trigger parallel search for detail keywords
+                if search_keywords and session_id:
+                    slow_handler = SlowPathHandler()
+                    slow_handler._parallel_search_topic(
+                        document_code=selected_doc_code,
+                        topic_keywords=search_keywords,
+                        session_id=session_id,
+                    )
+                response = {
+                    "message": intro_message,
+                    "intent": intent,
+                    "confidence": confidence,
+                    "results": [],
+                    "count": 0,
+                    "routing": "legal_wizard",
+                    "type": "options",
+                    "wizard_stage": "choose_detail",
+                    "clarification": {
+                        "message": intro_message,
+                        "options": detail_options,
+                    },
+                    "options": detail_options,
+                }
+                if session_id:
+                    response["session_id"] = session_id
+                    try:
+                        ConversationContext.add_message(
+                            session_id=session_id,
+                            role="bot",
+                            content=intro_message,
+                            intent=intent,
+                        )
+                        ConversationContext.update_session_metadata(
+                            session_id,
+                            {
+                                "wizard_stage": "choose_detail",
+                                "wizard_depth": wizard_depth + 1,
+                            },
+                        )
+                    except Exception as e:
+                        print(f"⚠️ Failed to save Stage 3 bot message: {e}")
+                return response
         # Always send legal intent through Slow Path RAG
         if intent == "search_legal":
             response = self._run_slow_path_legal(
                         "Tôi có thể giúp bạn tra cứu các văn bản quy định pháp luật về xử lí kỷ luật cán bộ đảng viên. "
                         "Bạn muốn tìm gì?"
                     )
+                response = {
+                    "message": message,
+                    "intent": intent,
+                    "confidence": confidence,
+                    "results": [],
+                    "count": 0,
                         "routing": "small_talk",
+                }
         else:  # IntentRoute.SEARCH
             # Use core chatbot search for other intents