Spaces:

davidtran999
/

hue-portal-backendDocker

Running

App Files Files Community

Davidtran99 commited on 22 days ago

Commit

c5e03e2

1 Parent(s): d320689

Update: Bypass LLM logic và tăng n_ctx lên 8192

Browse files

Files changed (2) hide show

backend/hue_portal/chatbot/llm_integration.py +1 -1
backend/hue_portal/chatbot/slow_path_handler.py +106 -5

backend/hue_portal/chatbot/llm_integration.py CHANGED Viewed

@@ -464,7 +464,7 @@ class LLMGenerator:
             logger.error("Unable to resolve GGUF model path for llama.cpp")
             return
-        n_ctx = int(os.environ.get("LLAMA_CPP_CONTEXT", "4096"))
         n_threads = int(os.environ.get("LLAMA_CPP_THREADS", str(max(1, os.cpu_count() or 2))))
         n_batch = int(os.environ.get("LLAMA_CPP_BATCH", "512"))
         n_gpu_layers = int(os.environ.get("LLAMA_CPP_GPU_LAYERS", "0"))

             logger.error("Unable to resolve GGUF model path for llama.cpp")
             return
+        n_ctx = int(os.environ.get("LLAMA_CPP_CONTEXT", "8192"))
         n_threads = int(os.environ.get("LLAMA_CPP_THREADS", str(max(1, os.cpu_count() or 2))))
         n_batch = int(os.environ.get("LLAMA_CPP_BATCH", "512"))
         n_gpu_layers = int(os.environ.get("LLAMA_CPP_GPU_LAYERS", "0"))

backend/hue_portal/chatbot/slow_path_handler.py CHANGED Viewed

@@ -107,6 +107,82 @@ class SlowPathHandler:
             except Exception as e:
                 logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
         # Get conversation context if available
         context = None
         if session_id:
@@ -150,11 +226,36 @@ class SlowPathHandler:
         # Fallback to template if LLM not available or failed
         if not message:
             if search_result["count"] > 0:
-                template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
-                message = template.format(
-                    count=search_result["count"],
-                    query=query
-                )
             else:
                 message = RESPONSE_TEMPLATES["no_results"].format(query=query)

             except Exception as e:
                 logger.warning("[RERANKER] Reranking failed: %s, using original results", e)
+        # BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
+        # Chỉ áp dụng cho legal queries có results với score cao
+        if intent == "search_legal" and search_result["count"] > 0:
+            top_result = search_result["results"][0]
+            top_score = top_result.get("score", 0.0) or 0.0
+            top_data = top_result.get("data", {})
+            doc_code = (top_data.get("document_code") or "").upper()
+            content = top_data.get("content", "") or top_data.get("excerpt", "")
+            # Bypass LLM nếu:
+            # 1. Có document code (TT-02-CAND, etc.) và content đủ dài
+            # 2. Score >= 0.4 (giảm threshold để dễ trigger hơn)
+            # 3. Hoặc có keywords quan trọng (%, hạ bậc, thi đua, tỷ lệ) với score >= 0.3
+            should_bypass = False
+            query_lower = query.lower()
+            has_keywords = any(kw in query_lower for kw in ["%", "phần trăm", "tỷ lệ", "12%", "20%", "10%", "hạ bậc", "thi đua", "xếp loại", "vi phạm", "cán bộ"])
+            # Điều kiện bypass dễ hơn: có doc_code + content đủ dài + score hợp lý
+            if doc_code and len(content) > 100:
+                if top_score >= 0.4:
+                    should_bypass = True
+                elif has_keywords and top_score >= 0.3:
+                    should_bypass = True
+            # Hoặc có keywords quan trọng + content đủ dài
+            elif has_keywords and len(content) > 100 and top_score >= 0.3:
+                should_bypass = True
+            if should_bypass:
+                # Template trả thẳng cho query về tỷ lệ vi phạm + hạ bậc thi đua
+                if any(kw in query_lower for kw in ["12%", "tỷ lệ", "phần trăm", "hạ bậc", "thi đua"]):
+                    # Query về tỷ lệ vi phạm và hạ bậc thi đua
+                    section_code = top_data.get("section_code", "")
+                    section_title = top_data.get("section_title", "")
+                    doc_title = top_data.get("document_title", "văn bản pháp luật")
+                    # Trích xuất đoạn liên quan từ content
+                    content_preview = content[:600] + "..." if len(content) > 600 else content
+                    answer = (
+                        f"Theo {doc_title} ({doc_code}):\n\n"
+                        f"{section_code}: {section_title}\n\n"
+                        f"{content_preview}\n\n"
+                        f"Nguồn: {section_code}, {doc_title} ({doc_code})"
+                    )
+                else:
+                    # Template chung cho legal queries
+                    section_code = top_data.get("section_code", "Điều liên quan")
+                    section_title = top_data.get("section_title", "")
+                    doc_title = top_data.get("document_title", "văn bản pháp luật")
+                    content_preview = content[:500] + "..." if len(content) > 500 else content
+                    answer = (
+                        f"Kết quả chính xác nhất:\n\n"
+                        f"- Văn bản: {doc_title} ({doc_code})\n"
+                        f"- Điều khoản: {section_code}" + (f" – {section_title}" if section_title else "") + "\n\n"
+                        f"{content_preview}\n\n"
+                        f"Nguồn: {section_code}, {doc_title} ({doc_code})"
+                    )
+                logger.info(
+                    "[BYPASS_LLM] Using raw template for legal query (score=%.3f, doc=%s, query='%s')",
+                    top_score,
+                    doc_code,
+                    query[:50]
+                )
+                return {
+                    "message": answer,
+                    "intent": intent,
+                    "confidence": min(0.99, top_score + 0.05),
+                    "results": search_result["results"][:3],
+                    "count": min(3, search_result["count"]),
+                    "_source": "raw_template",
+                    "routing": "raw_template"
+                }
         # Get conversation context if available
         context = None
         if session_id:
         # Fallback to template if LLM not available or failed
         if not message:
             if search_result["count"] > 0:
+                # Đặc biệt xử lý legal queries: format tốt hơn thay vì dùng template chung
+                if intent == "search_legal" and search_result["results"]:
+                    top_result = search_result["results"][0]
+                    top_data = top_result.get("data", {})
+                    doc_code = top_data.get("document_code", "")
+                    doc_title = top_data.get("document_title", "văn bản pháp luật")
+                    section_code = top_data.get("section_code", "")
+                    section_title = top_data.get("section_title", "")
+                    content = top_data.get("content", "") or top_data.get("excerpt", "")
+                    if content and len(content) > 50:
+                        content_preview = content[:400] + "..." if len(content) > 400 else content
+                        message = (
+                            f"Tôi tìm thấy {search_result['count']} điều khoản liên quan đến '{query}':\n\n"
+                            f"**{section_code}**: {section_title or 'Nội dung liên quan'}\n\n"
+                            f"{content_preview}\n\n"
+                            f"Nguồn: {doc_title}" + (f" ({doc_code})" if doc_code else "")
+                        )
+                    else:
+                        template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
+                        message = template.format(
+                            count=search_result["count"],
+                            query=query
+                        )
+                else:
+                    template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
+                    message = template.format(
+                        count=search_result["count"],
+                        query=query
+                    )
             else:
                 message = RESPONSE_TEMPLATES["no_results"].format(query=query)