Spaces:

davidtran999
/

hue-portal-backend

Paused

hue-portal-backend / backend /hue_portal /chatbot /slow_path_handler.py

Davidtran99

Update: Bypass LLM logic và tăng n_ctx lên 8192

c5e03e2 22 days ago

28.8 kB

	"""
	Slow Path Handler - Full RAG pipeline for complex queries.
	"""
	import time
	import logging
	from typing import Dict, Any, Optional, List
	import unicodedata
	import re

	from hue_portal.core.chatbot import get_chatbot, RESPONSE_TEMPLATES
	from hue_portal.core.models import (
	Fine,
	Procedure,
	Office,
	Advisory,
	LegalSection,
	LegalDocument,
	)
	from hue_portal.core.search_ml import search_with_ml
	# Lazy import reranker to avoid blocking startup (FlagEmbedding may download model)
	# from hue_portal.core.reranker import rerank_documents
	from hue_portal.chatbot.llm_integration import get_llm_generator
	from hue_portal.chatbot.structured_legal import format_structured_legal_answer
	from hue_portal.chatbot.context_manager import ConversationContext

	logger = logging.getLogger(__name__)


	class SlowPathHandler:
	"""Handle Slow Path queries with full RAG pipeline."""

	def __init__(self):
	self.chatbot = get_chatbot()
	self.llm_generator = get_llm_generator()

	def handle(self, query: str, intent: str, session_id: Optional[str] = None) -> Dict[str, Any]:
	"""
	Full RAG pipeline:
	1. Search (hybrid: BM25 + vector)
	2. Retrieve top 20 documents
	3. LLM generation with structured output (for legal queries)
	4. Guardrails validation
	5. Retry up to 3 times if needed

	Args:
	query: User query.
	intent: Detected intent.
	session_id: Optional session ID for context.

	Returns:
	Response dict with message, intent, results, etc.
	"""
	query = query.strip()

	# Handle greetings
	if intent == "greeting":
	query_lower = query.lower().strip()
	query_words = query_lower.split()
	is_simple_greeting = (
	len(query_words) <= 3 and
	any(greeting in query_lower for greeting in ["xin chào", "chào", "hello", "hi"]) and
	not any(kw in query_lower for kw in ["phạt", "mức phạt", "vi phạm", "thủ tục", "hồ sơ", "địa chỉ", "công an", "cảnh báo"])
	)
	if is_simple_greeting:
	return {
	"message": RESPONSE_TEMPLATES["greeting"],
	"intent": "greeting",
	"results": [],
	"count": 0,
	"_source": "slow_path"
	}

	# Search based on intent - retrieve top-8 for reranking
	search_result = self._search_by_intent(intent, query, limit=8) # Increased to 8 for reranker

	# Fast path for high-confidence legal queries (skip for complex queries)
	fast_path_response = None
	if intent == "search_legal" and not self._is_complex_query(query):
	fast_path_response = self._maybe_fast_path_response(search_result["results"], query)
	if fast_path_response:
	fast_path_response["intent"] = intent
	fast_path_response["_source"] = "fast_path"
	return fast_path_response

	# Rerank results from top-8 to top-3 for legal queries (reduces prompt size by ~40%)
	# Always rerank if we have legal results (even if <= 3, reranker improves relevance)
	if intent == "search_legal":
	try:
	# Lazy import to avoid blocking startup (FlagEmbedding may download model)
	from hue_portal.core.reranker import rerank_documents

	legal_results = [r for r in search_result["results"] if r.get("type") == "legal"]
	if len(legal_results) > 0:
	# Rerank to top-3 (or all if we have fewer)
	top_k = min(3, len(legal_results))
	reranked = rerank_documents(query, legal_results, top_k=top_k)
	# Update search_result with reranked results (keep non-legal results)
	non_legal = [r for r in search_result["results"] if r.get("type") != "legal"]
	search_result["results"] = reranked + non_legal
	search_result["count"] = len(search_result["results"])
	logger.info(
	"[RERANKER] Reranked %d legal results to top-%d for query: %s",
	len(legal_results),
	top_k,
	query[:50]
	)
	except Exception as e:
	logger.warning("[RERANKER] Reranking failed: %s, using original results", e)

	# BƯỚC 1: Bypass LLM khi có results tốt (tránh context overflow + tăng tốc 30-40%)
	# Chỉ áp dụng cho legal queries có results với score cao
	if intent == "search_legal" and search_result["count"] > 0:
	top_result = search_result["results"][0]
	top_score = top_result.get("score", 0.0) or 0.0
	top_data = top_result.get("data", {})
	doc_code = (top_data.get("document_code") or "").upper()
	content = top_data.get("content", "") or top_data.get("excerpt", "")

	# Bypass LLM nếu:
	# 1. Có document code (TT-02-CAND, etc.) và content đủ dài
	# 2. Score >= 0.4 (giảm threshold để dễ trigger hơn)
	# 3. Hoặc có keywords quan trọng (%, hạ bậc, thi đua, tỷ lệ) với score >= 0.3
	should_bypass = False
	query_lower = query.lower()
	has_keywords = any(kw in query_lower for kw in ["%", "phần trăm", "tỷ lệ", "12%", "20%", "10%", "hạ bậc", "thi đua", "xếp loại", "vi phạm", "cán bộ"])

	# Điều kiện bypass dễ hơn: có doc_code + content đủ dài + score hợp lý
	if doc_code and len(content) > 100:
	if top_score >= 0.4:
	should_bypass = True
	elif has_keywords and top_score >= 0.3:
	should_bypass = True
	# Hoặc có keywords quan trọng + content đủ dài
	elif has_keywords and len(content) > 100 and top_score >= 0.3:
	should_bypass = True

	if should_bypass:
	# Template trả thẳng cho query về tỷ lệ vi phạm + hạ bậc thi đua
	if any(kw in query_lower for kw in ["12%", "tỷ lệ", "phần trăm", "hạ bậc", "thi đua"]):
	# Query về tỷ lệ vi phạm và hạ bậc thi đua
	section_code = top_data.get("section_code", "")
	section_title = top_data.get("section_title", "")
	doc_title = top_data.get("document_title", "văn bản pháp luật")

	# Trích xuất đoạn liên quan từ content
	content_preview = content[:600] + "..." if len(content) > 600 else content

	answer = (
	f"Theo {doc_title} ({doc_code}):\n\n"
	f"{section_code}: {section_title}\n\n"
	f"{content_preview}\n\n"
	f"Nguồn: {section_code}, {doc_title} ({doc_code})"
	)
	else:
	# Template chung cho legal queries
	section_code = top_data.get("section_code", "Điều liên quan")
	section_title = top_data.get("section_title", "")
	doc_title = top_data.get("document_title", "văn bản pháp luật")
	content_preview = content[:500] + "..." if len(content) > 500 else content

	answer = (
	f"Kết quả chính xác nhất:\n\n"
	f"- Văn bản: {doc_title} ({doc_code})\n"
	f"- Điều khoản: {section_code}" + (f" – {section_title}" if section_title else "") + "\n\n"
	f"{content_preview}\n\n"
	f"Nguồn: {section_code}, {doc_title} ({doc_code})"
	)

	logger.info(
	"[BYPASS_LLM] Using raw template for legal query (score=%.3f, doc=%s, query='%s')",
	top_score,
	doc_code,
	query[:50]
	)

	return {
	"message": answer,
	"intent": intent,
	"confidence": min(0.99, top_score + 0.05),
	"results": search_result["results"][:3],
	"count": min(3, search_result["count"]),
	"_source": "raw_template",
	"routing": "raw_template"
	}

	# Get conversation context if available
	context = None
	if session_id:
	try:
	recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
	context = [
	{
	"role": msg.role,
	"content": msg.content,
	"intent": msg.intent
	}
	for msg in recent_messages
	]
	except Exception:
	pass

	# Generate response message using LLM if available and we have documents
	message = None
	if self.llm_generator and search_result["count"] > 0:
	# For legal queries, use structured output (now with top-3 reranked results)
	if intent == "search_legal" and search_result["results"]:
	legal_docs = [r["data"] for r in search_result["results"] if r.get("type") == "legal"][:3] # Top-3 after reranking
	if legal_docs:
	structured_answer = self.llm_generator.generate_structured_legal_answer(
	query,
	legal_docs,
	prefill_summary=None
	)
	if structured_answer:
	message = format_structured_legal_answer(structured_answer)

	# For other intents or if structured failed, use regular LLM generation
	if not message:
	documents = [r["data"] for r in search_result["results"][:3]] # Top-3 after reranking
	message = self.llm_generator.generate_answer(
	query,
	context=context,
	documents=documents
	)

	# Fallback to template if LLM not available or failed
	if not message:
	if search_result["count"] > 0:
	# Đặc biệt xử lý legal queries: format tốt hơn thay vì dùng template chung
	if intent == "search_legal" and search_result["results"]:
	top_result = search_result["results"][0]
	top_data = top_result.get("data", {})
	doc_code = top_data.get("document_code", "")
	doc_title = top_data.get("document_title", "văn bản pháp luật")
	section_code = top_data.get("section_code", "")
	section_title = top_data.get("section_title", "")
	content = top_data.get("content", "") or top_data.get("excerpt", "")

	if content and len(content) > 50:
	content_preview = content[:400] + "..." if len(content) > 400 else content
	message = (
	f"Tôi tìm thấy {search_result['count']} điều khoản liên quan đến '{query}':\n\n"
	f"{section_code}: {section_title or 'Nội dung liên quan'}\n\n"
	f"{content_preview}\n\n"
	f"Nguồn: {doc_title}" + (f" ({doc_code})" if doc_code else "")
	)
	else:
	template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
	message = template.format(
	count=search_result["count"],
	query=query
	)
	else:
	template = RESPONSE_TEMPLATES.get(intent, RESPONSE_TEMPLATES["general_query"])
	message = template.format(
	count=search_result["count"],
	query=query
	)
	else:
	message = RESPONSE_TEMPLATES["no_results"].format(query=query)

	# Limit results to top 5 for response
	results = search_result["results"][:5]

	response = {
	"message": message,
	"intent": intent,
	"confidence": 0.95, # High confidence for Slow Path (thorough search)
	"results": results,
	"count": len(results),
	"_source": "slow_path"
	}

	return response

	def _search_by_intent(self, intent: str, query: str, limit: int = 5) -> Dict[str, Any]:
	"""Search based on classified intent. Reduced limit from 20 to 5 for faster inference on free tier."""
	# Use original query for better matching
	keywords = query.strip()
	extracted = " ".join(self.chatbot.extract_keywords(query))
	if extracted and len(extracted) > 2:
	keywords = f"{keywords} {extracted}"

	results = []

	if intent == "search_fine":
	qs = Fine.objects.all()
	text_fields = ["name", "code", "article", "decree", "remedial"]
	search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
	results = [{"type": "fine", "data": {
	"id": f.id,
	"name": f.name,
	"code": f.code,
	"min_fine": float(f.min_fine) if f.min_fine else None,
	"max_fine": float(f.max_fine) if f.max_fine else None,
	"article": f.article,
	"decree": f.decree,
	}} for f in search_results]

	elif intent == "search_procedure":
	qs = Procedure.objects.all()
	text_fields = ["title", "domain", "conditions", "dossier"]
	search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
	results = [{"type": "procedure", "data": {
	"id": p.id,
	"title": p.title,
	"domain": p.domain,
	"level": p.level,
	}} for p in search_results]

	elif intent == "search_office":
	qs = Office.objects.all()
	text_fields = ["unit_name", "address", "district", "service_scope"]
	search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
	results = [{"type": "office", "data": {
	"id": o.id,
	"unit_name": o.unit_name,
	"address": o.address,
	"district": o.district,
	"phone": o.phone,
	"working_hours": o.working_hours,
	}} for o in search_results]

	elif intent == "search_advisory":
	qs = Advisory.objects.all()
	text_fields = ["title", "summary"]
	search_results = search_with_ml(qs, keywords, text_fields, top_k=limit, min_score=0.1)
	results = [{"type": "advisory", "data": {
	"id": a.id,
	"title": a.title,
	"summary": a.summary,
	}} for a in search_results]

	elif intent == "search_legal":
	qs = LegalSection.objects.all()
	text_fields = ["section_title", "section_code", "content"]
	detected_code = self._detect_document_code(query)
	filtered = False
	if detected_code:
	filtered_qs = qs.filter(document__code__iexact=detected_code)
	if filtered_qs.exists():
	qs = filtered_qs
	filtered = True
	logger.info(
	"[SEARCH] Prefiltering legal sections for document code %s (query='%s')",
	detected_code,
	query,
	)
	else:
	logger.info(
	"[SEARCH] Document code %s detected but no sections found locally, falling back to full corpus",
	detected_code,
	)
	else:
	logger.debug("[SEARCH] No document code detected for query: %s", query)
	# Retrieve top-8 for reranking (will be reduced to top-3 after rerank)
	search_results = search_with_ml(
	qs,
	keywords,
	text_fields,
	top_k=limit, # limit=8 for reranking, will be reduced to 3
	min_score=0.02, # Lower threshold for legal
	)
	results = self._format_legal_results(search_results, detected_code, query=query)
	logger.info(
	"[SEARCH] Legal intent processed (query='%s', code=%s, filtered=%s, results=%d)",
	query,
	detected_code or "None",
	filtered,
	len(results),
	)

	return {
	"intent": intent,
	"query": query,
	"keywords": keywords,
	"results": results,
	"count": len(results)
	}

	def _should_save_to_golden(self, query: str, response: Dict) -> bool:
	"""
	Decide if response should be saved to golden dataset.

	Criteria:
	- High confidence (>0.95)
	- Has results
	- Response is complete and well-formed
	- Not already in golden dataset
	"""
	try:
	from hue_portal.core.models import GoldenQuery

	# Check if already exists
	query_normalized = self._normalize_query(query)
	if GoldenQuery.objects.filter(query_normalized=query_normalized, is_active=True).exists():
	return False

	# Check criteria
	has_results = response.get("count", 0) > 0
	has_message = bool(response.get("message", "").strip())
	confidence = response.get("confidence", 0.0)

	# Only save if high quality
	if has_results and has_message and confidence >= 0.95:
	# Additional check: message should be substantial (not just template)
	message = response.get("message", "")
	if len(message) > 50: # Substantial response
	return True

	return False
	except Exception as e:
	logger.warning(f"Error checking if should save to golden: {e}")
	return False

	def _normalize_query(self, query: str) -> str:
	"""Normalize query for matching."""
	normalized = query.lower().strip()
	# Remove accents
	normalized = unicodedata.normalize("NFD", normalized)
	normalized = "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")
	# Remove extra spaces
	normalized = re.sub(r'\s+', ' ', normalized).strip()
	return normalized

	def _detect_document_code(self, query: str) -> Optional[str]:
	"""Detect known document code mentioned in the query."""
	normalized_query = self._remove_accents(query).upper()
	if not normalized_query:
	return None
	try:
	codes = LegalDocument.objects.values_list("code", flat=True)
	except Exception as exc:
	logger.debug("Unable to fetch document codes: %s", exc)
	return None

	for code in codes:
	if not code:
	continue
	tokens = self._split_code_tokens(code)
	if tokens and all(token in normalized_query for token in tokens):
	logger.info("[SEARCH] Detected document code %s in query", code)
	return code
	return None

	def _split_code_tokens(self, code: str) -> List[str]:
	"""Split a document code into uppercase accentless tokens."""
	normalized = self._remove_accents(code).upper()
	return [tok for tok in re.split(r"[-/\s]+", normalized) if tok]

	def _remove_accents(self, text: str) -> str:
	if not text:
	return ""
	normalized = unicodedata.normalize("NFD", text)
	return "".join(ch for ch in normalized if unicodedata.category(ch) != "Mn")

	def _format_legal_results(
	self,
	search_results: List[Any],
	detected_code: Optional[str],
	query: Optional[str] = None,
	) -> List[Dict[str, Any]]:
	"""Build legal result payload and apply ordering/boosting based on doc code and keywords."""
	entries: List[Dict[str, Any]] = []
	upper_detected = detected_code.upper() if detected_code else None

	# Keywords that indicate important legal concepts (boost score if found)
	important_keywords = []
	if query:
	query_lower = query.lower()
	# Keywords for percentage/threshold queries
	if any(kw in query_lower for kw in ["%", "phần trăm", "tỷ lệ", "12%", "20%", "10%"]):
	important_keywords.extend(["%", "phần trăm", "tỷ lệ", "12", "20", "10"])
	# Keywords for ranking/demotion queries
	if any(kw in query_lower for kw in ["hạ bậc", "thi đua", "xếp loại", "đánh giá"]):
	important_keywords.extend(["hạ bậc", "thi đua", "xếp loại", "đánh giá"])

	for ls in search_results:
	doc = ls.document
	doc_code = doc.code if doc else None
	score = getattr(ls, "_ml_score", getattr(ls, "rank", 0.0)) or 0.0

	# Boost score if content contains important keywords
	content_text = (ls.content or ls.section_title or "").lower()
	keyword_boost = 0.0
	if important_keywords and content_text:
	for kw in important_keywords:
	if kw.lower() in content_text:
	keyword_boost += 0.15 # Boost 0.15 per keyword match
	logger.debug(
	"[BOOST] Keyword '%s' found in section %s, boosting score",
	kw,
	ls.section_code,
	)

	entries.append(
	{
	"type": "legal",
	"score": float(score) + keyword_boost,
	"data": {
	"id": ls.id,
	"section_code": ls.section_code,
	"section_title": ls.section_title,
	"content": ls.content[:500] if ls.content else "",
	"excerpt": ls.excerpt,
	"document_code": doc_code,
	"document_title": doc.title if doc else None,
	"page_start": ls.page_start,
	"page_end": ls.page_end,
	},
	}
	)

	if upper_detected:
	exact_matches = [
	r for r in entries if (r["data"].get("document_code") or "").upper() == upper_detected
	]
	if exact_matches:
	others = [r for r in entries if r not in exact_matches]
	entries = exact_matches + others
	else:
	for entry in entries:
	doc_code = (entry["data"].get("document_code") or "").upper()
	if doc_code == upper_detected:
	entry["score"] = (entry.get("score") or 0.1) * 10
	entries.sort(key=lambda r: r.get("score") or 0, reverse=True)
	else:
	# Sort by boosted score
	entries.sort(key=lambda r: r.get("score") or 0, reverse=True)
	return entries

	def _is_complex_query(self, query: str) -> bool:
	"""
	Detect if query is complex and requires LLM reasoning (not suitable for Fast Path).

	Complex queries contain keywords like: %, bậc, thi đua, tỷ lệ, liên đới, tăng nặng, giảm nhẹ, đơn vị vi phạm
	"""
	if not query:
	return False
	query_lower = query.lower()
	complex_keywords = [
	"%", "phần trăm",
	"bậc", "hạ bậc", "nâng bậc",
	"thi đua", "xếp loại", "đánh giá",
	"tỷ lệ", "tỉ lệ",
	"liên đới", "liên quan",
	"tăng nặng", "tăng nặng hình phạt",
	"giảm nhẹ", "giảm nhẹ hình phạt",
	"đơn vị vi phạm", "đơn vị có",
	]
	for keyword in complex_keywords:
	if keyword in query_lower:
	logger.info(
	"[FAST_PATH] Complex query detected (keyword: '%s'), forcing Slow Path",
	keyword,
	)
	return True
	return False

	def _maybe_fast_path_response(
	self, results: List[Dict[str, Any]], query: Optional[str] = None
	) -> Optional[Dict[str, Any]]:
	"""Return fast-path response if results are confident enough."""
	if not results:
	return None

	# Double-check: if query is complex, never use Fast Path
	if query and self._is_complex_query(query):
	return None
	top_result = results[0]
	top_score = top_result.get("score", 0.0) or 0.0
	doc_code = (top_result.get("data", {}).get("document_code") or "").upper()

	if top_score >= 0.88 and doc_code:
	logger.info(
	"[FAST_PATH] Top score hit (%.3f) for document %s", top_score, doc_code
	)
	message = self._format_fast_legal_message(top_result)
	return {
	"message": message,
	"results": results[:3],
	"count": min(3, len(results)),
	"confidence": min(0.99, top_score + 0.05),
	}

	top_three = results[:3]
	if len(top_three) >= 2:
	doc_codes = [
	(res.get("data", {}).get("document_code") or "").upper()
	for res in top_three
	if res.get("data", {}).get("document_code")
	]
	if doc_codes and len(set(doc_codes)) == 1:
	logger.info(
	"[FAST_PATH] Top-%d results share same document %s",
	len(top_three),
	doc_codes[0],
	)
	message = self._format_fast_legal_message(top_three[0])
	return {
	"message": message,
	"results": top_three,
	"count": len(top_three),
	"confidence": min(0.97, (top_three[0].get("score") or 0.9) + 0.04),
	}
	return None

	def _format_fast_legal_message(self, result: Dict[str, Any]) -> str:
	"""Format a concise legal answer without LLM."""
	data = result.get("data", {})
	doc_title = data.get("document_title") or "văn bản pháp luật"
	doc_code = data.get("document_code") or ""
	section_code = data.get("section_code") or "Điều liên quan"
	section_title = data.get("section_title") or ""
	content = (data.get("content") or data.get("excerpt") or "").strip()
	if len(content) > 400:
	trimmed = content[:400].rsplit(" ", 1)[0]
	content = f"{trimmed}..."
	intro = "Kết quả chính xác nhất:"
	lines = [intro]
	if doc_title or doc_code:
	lines.append(f"- Văn bản: {doc_title or 'văn bản pháp luật'}" + (f" ({doc_code})" if doc_code else ""))
	section_label = section_code
	if section_title:
	section_label = f"{section_code} – {section_title}"
	lines.append(f"- Điều khoản: {section_label}")
	lines.append("")
	lines.append(content)
	citation_doc = doc_title or doc_code or "nguồn chính thức"
	lines.append(f"\nNguồn: {section_label}, {citation_doc}.")
	return "\n".join(lines)