davidtran999 commited on
Commit
dc97c5a
·
verified ·
1 Parent(s): 6ad06fa

Upload backend/hue_portal/chatbot/slow_path_handler.py with huggingface_hub

Browse files
backend/hue_portal/chatbot/slow_path_handler.py CHANGED
@@ -30,7 +30,6 @@ from hue_portal.chatbot.context_manager import ConversationContext
30
  from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
31
  from hue_portal.core.query_rewriter import get_query_rewriter
32
  from hue_portal.core.pure_semantic_search import pure_semantic_search, parallel_vector_search
33
- from hue_portal.core.redis_cache import get_redis_cache
34
 
35
  logger = logging.getLogger(__name__)
36
 
@@ -50,8 +49,6 @@ class SlowPathHandler:
50
  self.redis_cache = get_redis_cache()
51
  # Prefetch cache TTL (30 minutes default)
52
  self.prefetch_cache_ttl = int(os.environ.get("CACHE_PREFETCH_TTL", "1800"))
53
- # Toggle wizard flow (disable to answer directly)
54
- self.disable_wizard_flow = os.environ.get("DISABLE_WIZARD_FLOW", "false").lower() == "true"
55
 
56
  def handle(
57
  self,
@@ -117,46 +114,13 @@ class SlowPathHandler:
117
  )
118
  if (
119
  intent == "search_legal"
120
- and not self.disable_wizard_flow
121
  and not selected_document_code_normalized
122
  and not has_explicit_code
123
  ):
124
- logger.info("[QUERY_REWRITE] Wizard conditions met, using Query Rewrite Strategy")
 
 
125
 
126
- # Query Rewrite Strategy: Rewrite query into 3-5 optimized legal queries
127
- query_rewriter = get_query_rewriter(self.llm_generator)
128
-
129
- # Get conversation context for query rewriting
130
- context = None
131
- if session_id:
132
- try:
133
- recent_messages = ConversationContext.get_recent_messages(session_id, limit=5)
134
- context = [
135
- {"role": msg.role, "content": msg.content}
136
- for msg in recent_messages
137
- ]
138
- except Exception as exc:
139
- logger.warning("[QUERY_REWRITE] Failed to load context: %s", exc)
140
-
141
- # Rewrite query into 3-5 queries
142
- rewritten_queries = query_rewriter.rewrite_query(
143
- query,
144
- context=context,
145
- max_queries=5,
146
- min_queries=3
147
- )
148
-
149
- if not rewritten_queries:
150
- # Fallback to original query if rewrite fails
151
- rewritten_queries = [query]
152
-
153
- logger.info(
154
- "[QUERY_REWRITE] Rewrote query into %d queries: %s",
155
- len(rewritten_queries),
156
- rewritten_queries[:3]
157
- )
158
-
159
- # Parallel vector search with multiple queries
160
  try:
161
  from hue_portal.core.models import LegalSection
162
 
@@ -164,12 +128,13 @@ class SlowPathHandler:
164
  qs = LegalSection.objects.all()
165
  text_fields = ["section_title", "section_code", "content"]
166
 
167
- # Use parallel vector search
168
- search_results = parallel_vector_search(
169
- rewritten_queries,
 
 
170
  qs,
171
- top_k_per_query=5,
172
- final_top_k=7,
173
  text_fields=text_fields
174
  )
175
 
@@ -177,6 +142,7 @@ class SlowPathHandler:
177
  doc_codes_seen: Set[str] = set()
178
  document_options: List[Dict[str, Any]] = []
179
 
 
180
  for section, score in search_results:
181
  doc = getattr(section, "document", None)
182
  if not doc:
@@ -265,7 +231,7 @@ class SlowPathHandler:
265
  )
266
 
267
  logger.info(
268
- "[QUERY_REWRITE] ✅ Found %d documents using Query Rewrite Strategy",
269
  len(document_options)
270
  )
271
 
@@ -281,14 +247,14 @@ class SlowPathHandler:
281
  "results": [],
282
  "count": 0,
283
  "intent": intent,
284
- "_source": "query_rewrite",
285
- "routing": "query_rewrite",
286
- "confidence": 0.95, # High confidence with Query Rewrite Strategy
287
  }
288
 
289
  except Exception as exc:
290
  logger.error(
291
- "[QUERY_REWRITE] Error in Query Rewrite Strategy: %s, falling back to LLM suggestions",
292
  exc,
293
  exc_info=True
294
  )
 
30
  from hue_portal.chatbot.router import DOCUMENT_CODE_PATTERNS
31
  from hue_portal.core.query_rewriter import get_query_rewriter
32
  from hue_portal.core.pure_semantic_search import pure_semantic_search, parallel_vector_search
 
33
 
34
  logger = logging.getLogger(__name__)
35
 
 
49
  self.redis_cache = get_redis_cache()
50
  # Prefetch cache TTL (30 minutes default)
51
  self.prefetch_cache_ttl = int(os.environ.get("CACHE_PREFETCH_TTL", "1800"))
 
 
52
 
53
  def handle(
54
  self,
 
114
  )
115
  if (
116
  intent == "search_legal"
 
117
  and not selected_document_code_normalized
118
  and not has_explicit_code
119
  ):
120
+ # DISABLED: Query Rewrite Strategy (mất 5-10s, gây timeout)
121
+ # Sử dụng pure semantic search trực tiếp với query gốc để nhanh hơn
122
+ logger.info("[SEARCH] Using direct semantic search (Query Rewrite disabled for performance)")
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  try:
125
  from hue_portal.core.models import LegalSection
126
 
 
128
  qs = LegalSection.objects.all()
129
  text_fields = ["section_title", "section_code", "content"]
130
 
131
+ # Use direct semantic search with original query (no rewrite)
132
+ # Use _single_query_search directly to get (section, score) tuples
133
+ from hue_portal.core.pure_semantic_search import _single_query_search
134
+ search_results = _single_query_search(
135
+ query,
136
  qs,
137
+ top_k=10,
 
138
  text_fields=text_fields
139
  )
140
 
 
142
  doc_codes_seen: Set[str] = set()
143
  document_options: List[Dict[str, Any]] = []
144
 
145
+ # search_results is List[Tuple[section, score]]
146
  for section, score in search_results:
147
  doc = getattr(section, "document", None)
148
  if not doc:
 
231
  )
232
 
233
  logger.info(
234
+ "[SEARCH] ✅ Found %d documents using direct semantic search",
235
  len(document_options)
236
  )
237
 
 
247
  "results": [],
248
  "count": 0,
249
  "intent": intent,
250
+ "_source": "direct_search",
251
+ "routing": "direct_search",
252
+ "confidence": 0.85, # Slightly lower than query rewrite but still good
253
  }
254
 
255
  except Exception as exc:
256
  logger.error(
257
+ "[SEARCH] Error in direct semantic search: %s, falling back to LLM suggestions",
258
  exc,
259
  exc_info=True
260
  )