Spaces:

TradaAI
/

Chatopus

Sleeping

App Files Files Community

VietCat commited on Aug 25, 2025

Commit

844483c

1 Parent(s): af289ca

adjust parallel search and rerank

Browse files

Files changed (1) hide show

app/message_processor.py +69 -19

app/message_processor.py CHANGED Viewed

@@ -331,37 +331,87 @@ class MessageProcessor:
         logger.info(f"[MOCK] Creating Facebook post for sender_id={sender_id} with history={history}")
         return "https://facebook.com/mock_post_url"
-    async def handle_muc_phat(self, conv, conversation_context, page_token, sender_id):
-        vehicle = conv.get('originalvehicle', '')
-        action = conv.get('originalaction', '')
-        question = conv.get('originalquestion', '')
-        if not action and not question:
-             return "Để tra cứu mức phạt, bạn vui lòng cung cấp hành vi vi phạm nhé."
-        search_query = action or question
-        logger.info(f"[DEBUG] tạo embedding cho: '{search_query}'")
         try:
-            embedding = await self.channel.embedder.create_embedding(search_query)
-            logger.info(f"[DEBUG] embedding: {embedding[:5]} ... (total {len(embedding)})")
             loop = asyncio.get_event_loop()
             match_count = get_settings().match_count
             matches = await loop.run_in_executor(
                 None,
                 lambda: self.channel.supabase.match_documents(
-                    embedding,
                     match_count=match_count,
-                    user_question=search_query
                 )
             )
-            logger.info(f"[DEBUG] matches: {matches[:2]}...{matches[-2:]}")
-            if matches:
-                response = await self.format_search_results(conversation_context, question or action, matches, page_token, sender_id)
             else:
                 response = "Xin lỗi, tôi không tìm thấy thông tin phù hợp với hành vi bạn mô tả."
         except Exception as e:

         logger.info(f"[MOCK] Creating Facebook post for sender_id={sender_id} with history={history}")
         return "https://facebook.com/mock_post_url"
+    async def _search_and_rerank_task(self, keyword: str, full_query_context: str, vehicle_keywords: List[str]) -> List[Dict[str, Any]]:
+        """
+        Hàm trợ giúp để thực hiện một tác vụ song song: query từ Supabase và sau đó rerank kết quả.
+        LƯU Ý: Việc rerank cho mỗi luồng riêng lẻ có thể tốn kém và không hiệu quả về chất lượng kết quả cuối cùng.
+        """
         try:
+            logger.info(f"[SEARCH_RERANK_TASK] Bắt đầu tác vụ cho từ khóa: '{keyword}'")
+            # 1. Query Supabase
+            embedding = await self.channel.embedder.create_embedding(keyword)
             loop = asyncio.get_event_loop()
             match_count = get_settings().match_count
             matches = await loop.run_in_executor(
                 None,
                 lambda: self.channel.supabase.match_documents(
+                    embedding=embedding,
                     match_count=match_count,
+                    user_question=full_query_context,
+                    vehicle_keywords=vehicle_keywords
                 )
             )
+            if not matches:
+                logger.info(f"[SEARCH_RERANK_TASK] Không tìm thấy kết quả nào từ Supabase cho từ khóa: '{keyword}'")
+                return []
+            logger.info(f"[SEARCH_RERANK_TASK] Tìm thấy {len(matches)} kết quả. Bắt đầu rerank cho từ khóa: '{keyword}'")
+            # 2. Rerank (Tạm thời bỏ qua theo logic code gốc, nhưng nếu bật sẽ chạy ở đây)
+            # CẢNH BÁO: Bước này rất tốn kém và làm chậm hệ thống nếu chạy cho mỗi từ khóa.
+            # Việc rerank nhiều lần sẽ làm tăng chi phí và có thể chạm giới hạn API.
+            reranked_matches = matches # Mặc định trả về kết quả gốc nếu rerank bị lỗi hoặc tắt
+            # try:
+            #     # Sử dụng full_query_context để rerank sẽ cho kết quả tốt hơn là chỉ dùng keyword
+            #     reranked = await self.channel.reranker.rerank(full_query_context, matches, top_k=10)
+            #     if reranked:
+            #         reranked_matches = reranked
+            #         logger.info(f"[SEARCH_RERANK_TASK] Rerank thành công cho từ khóa '{keyword}', còn lại {len(reranked_matches)} kết quả.")
+            # except Exception as e:
+            #     logger.error(f"[SEARCH_RERANK_TASK] Lỗi khi rerank cho từ khóa '{keyword}': {e}. Sử dụng kết quả gốc.")
+            return reranked_matches
+        except Exception as e:
+            logger.error(f"Lỗi trong tác vụ tìm kiếm và rerank cho từ khóa '{keyword}': {e}")
+            return [] # Trả về danh sách rỗng để không làm hỏng luồng chung
+    async def handle_muc_phat(self, conv, conversation_context, page_token, sender_id):
+        vehicle_str = conv.get('originalvehicle', '')
+        vehicle_keywords = vehicle_str.split(',') if vehicle_str else []
+        action_keywords_str = conv.get('originalaction', '')
+        question = conv.get('originalquestion', '')
+        tu_khoa_list = action_keywords_str.split()
+        if not tu_khoa_list and not question:
+             return "Để tra cứu mức phạt, bạn vui lòng cung cấp hành vi vi phạm nhé."
+        main_query_for_context = question or action_keywords_str
+        try:
+            # --- 1. Tạo và chạy song song các tác vụ Query -> Rerank ---
+            search_terms = tu_khoa_list if tu_khoa_list else [main_query_for_context]
+            tasks = [self._search_and_rerank_task(term, main_query_for_context, vehicle_keywords) for term in search_terms]
+            list_of_reranked_results = await asyncio.gather(*tasks)
+            # --- 2. Tổng hợp và loại bỏ kết quả trùng lặp ---
+            combined_matches = []
+            seen_ids = set()
+            for reranked_list in list_of_reranked_results:
+                for match in reranked_list:
+                    match_id = match.get('id')
+                    if match_id and match_id not in seen_ids:
+                        combined_matches.append(match)
+                        seen_ids.add(match_id)
+            logger.info(f"Tổng hợp được {len(combined_matches)} văn bản duy nhất từ các tác vụ song song.")
+            # --- 3. Tạo câu trả lời ---
+            if combined_matches:
+                response = await self.format_search_results(conversation_context, main_query_for_context, combined_matches, page_token, sender_id)
             else:
                 response = "Xin lỗi, tôi không tìm thấy thông tin phù hợp với hành vi bạn mô tả."
         except Exception as e: