Spaces:

TradaAI
/

Chatopus

Sleeping

App Files Files Community

VietCat commited on Aug 25, 2025

Commit

fd5dbb4

1 Parent(s): 96d59ac

update reranker

Browse files

Files changed (5) hide show

app/facebook.py +122 -69
app/gemini_client.py +1 -1
app/llm.py +32 -22
app/message_processor.py +1 -1
app/reranker.py +36 -3

app/facebook.py CHANGED Viewed

@@ -13,8 +13,15 @@ from .config import Settings, get_settings
 from .utils import timing_decorator_async, timing_decorator_sync, _safe_truncate
 class FacebookClient:
-    def __init__(self, app_secret: str, page_id: Optional[str] = None, page_token: Optional[str] = None, sender_id: Optional[str] = None):
         """
         Khởi tạo FacebookClient với app_secret.
         Input: app_secret (str) - Facebook App Secret.
@@ -26,7 +33,12 @@ class FacebookClient:
         self.page_token = page_token
         self.sender_id = sender_id
-    def update_context(self, page_id: Optional[str] = None, page_token: Optional[str] = None, sender_id: Optional[str] = None):
         """
         Cập nhật các thông tin context (page_id, page_token, sender_id) của client.
         Input: page_id (str), page_token (str), sender_id (str)
@@ -40,7 +52,9 @@ class FacebookClient:
             self.sender_id = sender_id
     @timing_decorator_async
-    async def verify_webhook(self, token: str, challenge: str, verify_token: str) -> int:
         """
         Xác thực webhook Facebook bằng verify_token và trả về challenge.
         Input: token (str), challenge (str), verify_token (str)
@@ -61,27 +75,26 @@ class FacebookClient:
             return False
         expected = hmac.new(
-            self.app_secret.encode(),
-            payload,
-            hashlib.sha256
         ).hexdigest()
         return hmac.compare_digest(signature[7:], expected)
     def format_message(self, text: str) -> str:
         # 1. Thay bullet markdown bằng ký hiệu khác
-        text = text.replace('\n*   ', '\n- ')
-        text = text.replace('\n    *   ', '\n    + ')
-        text = text.replace('\n* ', '\n- ')
-        text = text.replace('\n    * ', '\n    + ')
         # 2. Chuyển **text** hoặc __text__ thành *text*
         import re
-        text = re.sub(r'\*\*([^\*]+)\*\*', r'*\1*', text)
-        text = re.sub(r'__([^_]+)__', r'*\1*', text)
         # 3. Loại bỏ các tiêu đề markdown kiểu #, ##, ###, ...
-        text = re.sub(r'^#+\s+', '', text, flags=re.MULTILINE)
         # 4. Rút gọn nhiều dòng trống liên tiếp thành 1 dòng trống
-        text = re.sub(r'\n{3,}', '\n\n', text)
         # 5. Loại bỏ các markdown không hỗ trợ khác nếu cần
         return text
@@ -89,7 +102,7 @@ class FacebookClient:
         """
         Chia message thành các đoạn <= max_length ký tự, ưu tiên chia theo dòng.
         """
-        lines = text.split('\n')
         messages = []
         current = ""
         for line in lines:
@@ -97,16 +110,13 @@ class FacebookClient:
             if len(current) + len(line) + 1 > max_length:
                 messages.append(current.rstrip())
                 current = ""
-            current += (line + '\n')
         if current.strip():
             messages.append(current.rstrip())
         return messages
     def send_message_forwarder(
-        self,
-        access_token: str,
-        recipient_id: str,
-        message: str
     ) -> dict:
         """
         Gửi tin nhắn đến Facebook Messenger qua API được triển khai.
@@ -126,17 +136,21 @@ class FacebookClient:
         payload = {
             "recipient_id": recipient_id,
             "access_token": access_token,
-            "message": message
         }
         # Ghi lại toàn bộ payload để gỡ lỗi.
         # CẢNH BÁO: Việc này sẽ ghi lại cả PAGE_ACCESS_TOKEN. Chỉ nên dùng trong môi trường dev hoặc khi cần gỡ lỗi.
-        logger.info(f"[FACEBOOK_FORWARDER] Forwarding message to {url}. Full payload: {json.dumps(payload, ensure_ascii=False)}")
         try:
             response = requests.post(url, json=payload, timeout=10)
             response.raise_for_status()  # Sẽ raise HTTPError cho các status 4xx/5xx
-            logger.info(f"[FACEBOOK_FORWARDER] Forwarder API returned status {response.status_code}.")
             return response.json()
         except requests.HTTPError as e:
             # Lỗi HTTP (4xx, 5xx), log chi tiết hơn để gỡ lỗi phía forwarder
@@ -154,40 +168,53 @@ class FacebookClient:
             return {"error": str(e), "details": error_content}
         except requests.RequestException as e:
             # Các lỗi request khác (timeout, connection error)
-            logger.error(f"[FACEBOOK_FORWARDER] Request Error calling forwarder API: {e}")
             return {"error": str(e)}
-    def _send_message_sync(self, page_access_token: str, recipient_id: str, message: str) -> dict:
         """
         Gửi tin nhắn sử dụng facebook-sdk với request method trực tiếp.
         """
         max_retries = 3
         retry_delay = 1  # giây
         for attempt in range(max_retries):
             try:
                 graph = facebook.GraphAPI(access_token=page_access_token, version="3.1")
                 # Sử dụng request method trực tiếp cho Messenger API với timeout
                 result = graph.request(
                     path="me/messages",
                     post_args={
                         "recipient": {"id": recipient_id},
-                        "message": {"text": message}
                     },
-                    timeout=30  # Thêm timeout 30 giây
                 )
                 return result
             except facebook.GraphAPIError as e:
-                logger.error(f"Facebook GraphAPI Error (attempt {attempt + 1}/{max_retries}): {e}")
                 if attempt == max_retries - 1:  # Lần cuối
-                    raise HTTPException(status_code=500, detail=f"Failed to send message to Facebook: {e}")
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
             except Exception as e:
-                logger.error(f"Unexpected error sending message to Facebook (attempt {attempt + 1}/{max_retries}): {e}")
                 if attempt == max_retries - 1:  # Lần cuối
-                    raise HTTPException(status_code=500, detail="Failed to send message to Facebook")
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
@@ -197,85 +224,111 @@ class FacebookClient:
         """
         max_retries = 3
         retry_delay = 1  # giây
         for attempt in range(max_retries):
             try:
                 graph = facebook.GraphAPI(access_token=page_access_token, version="3.1")
                 result = graph.get_object(page_id)
                 return result
             except facebook.GraphAPIError as e:
-                logger.error(f"Facebook GraphAPI Error getting page info (attempt {attempt + 1}/{max_retries}): {e}")
                 if attempt == max_retries - 1:  # Lần cuối
-                    raise HTTPException(status_code=500, detail=f"Failed to get page info: {e}")
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
             except Exception as e:
-                logger.error(f"Unexpected error getting page info (attempt {attempt + 1}/{max_retries}): {e}")
                 if attempt == max_retries - 1:  # Lần cuối
-                    raise HTTPException(status_code=500, detail="Failed to get page info")
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
     @timing_decorator_async
-    async def send_message(self, page_access_token: Optional[str] = None, recipient_id: Optional[str] = None, message: str = "") -> dict:
         page_access_token = page_access_token or self.page_token
         recipient_id = recipient_id or self.sender_id
         if not message or not str(message).strip():
-            logger.warning(f"[FACEBOOK_SEND] Attempted to send an empty or whitespace-only message to recipient {recipient_id}. Aborting.")
             return {}
         if not page_access_token or not recipient_id:
-            logger.error(f"[FACEBOOK_SEND] Missing page_access_token or recipient_id. Cannot send message.")
-            raise ValueError("FacebookClient: page_access_token and recipient_id must not be None when sending a message.")
-        logger.info(f"[FACEBOOK_SEND] Preparing to send message to recipient {recipient_id}. Full message (truncated): '{_safe_truncate(str(message))}'")
         # Format message
-        response_to_send = self.format_message(str(message).replace('**', '*'))
         # Chia nhỏ nếu quá dài
         messages = self.split_message(response_to_send)
         results = []
         for i, msg_part in enumerate(messages, 1):
             if len(msg_part) > 2000:
                 msg_part = msg_part[:2000]  # fallback cắt cứng
-            logger.info(f"[FACEBOOK_SEND] Sending part {i}/{len(messages)} to recipient {recipient_id}.")
             try:
                 # Wrap sync HTTP call in thread executor để giữ async
                 loop = asyncio.get_event_loop()
                 result = await loop.run_in_executor(
-                    None,
-                    self.send_message_forwarder,
-                    page_access_token,
-                    recipient_id,
-                    msg_part
                 )
                 results.append(result)
             except Exception as e:
-                logger.error(f"[FACEBOOK_SEND] Failed to send part {i}/{len(messages)} to {recipient_id}. Error: {e}")
                 results.append({"error": str(e), "part": i})
         return results[0] if results else {}
     @timing_decorator_async
-    async def get_page_info(self, page_access_token: Optional[str] = None, page_id: Optional[str] = None) -> dict:
         """
         Lấy thông tin page sử dụng Facebook SDK (async).
         """
         page_access_token = page_access_token or self.page_token
         page_id = page_id or self.page_id
         if not page_access_token or not page_id:
-            raise ValueError("FacebookClient: page_access_token and page_id must not be None when getting page info.")
         loop = asyncio.get_event_loop()
         result = await loop.run_in_executor(
-            None,
-            self._get_page_info_sync,
-            page_access_token,
-            page_id
         )
         return result
@@ -289,17 +342,17 @@ class FacebookClient:
         try:
             entry = body["entry"][0]
             messaging = entry["messaging"][0]
             sender_id = messaging["sender"]["id"]
             recipient_id = messaging["recipient"]["id"]
             timestamp = messaging["timestamp"]
             message_data = {
                 "sender_id": sender_id,
                 "page_id": recipient_id,
                 "timestamp": timestamp,
                 "text": None,
-                "attachments": []
             }
             if "message" in messaging:
@@ -312,4 +365,4 @@ class FacebookClient:
             return message_data
         except (KeyError, IndexError) as e:
             logger.error(f"Error parsing Facebook message: {e}\n\n{body}")
-            return None

 from .utils import timing_decorator_async, timing_decorator_sync, _safe_truncate
 class FacebookClient:
+    def __init__(
+        self,
+        app_secret: str,
+        page_id: Optional[str] = None,
+        page_token: Optional[str] = None,
+        sender_id: Optional[str] = None,
+    ):
         """
         Khởi tạo FacebookClient với app_secret.
         Input: app_secret (str) - Facebook App Secret.
         self.page_token = page_token
         self.sender_id = sender_id
+    def update_context(
+        self,
+        page_id: Optional[str] = None,
+        page_token: Optional[str] = None,
+        sender_id: Optional[str] = None,
+    ):
         """
         Cập nhật các thông tin context (page_id, page_token, sender_id) của client.
         Input: page_id (str), page_token (str), sender_id (str)
             self.sender_id = sender_id
     @timing_decorator_async
+    async def verify_webhook(
+        self, token: str, challenge: str, verify_token: str
+    ) -> int:
         """
         Xác thực webhook Facebook bằng verify_token và trả về challenge.
         Input: token (str), challenge (str), verify_token (str)
             return False
         expected = hmac.new(
+            self.app_secret.encode(), payload, hashlib.sha256
         ).hexdigest()
         return hmac.compare_digest(signature[7:], expected)
     def format_message(self, text: str) -> str:
         # 1. Thay bullet markdown bằng ký hiệu khác
+        text = text.replace("\n*   ", "\n- ")
+        text = text.replace("\n    *   ", "\n    + ")
+        text = text.replace("\n* ", "\n- ")
+        text = text.replace("\n    * ", "\n    + ")
         # 2. Chuyển **text** hoặc __text__ thành *text*
         import re
+        text = re.sub(r"\*\*([^\*]+)\*\*", r"*\1*", text)
+        text = re.sub(r"__([^_]+)__", r"*\1*", text)
         # 3. Loại bỏ các tiêu đề markdown kiểu #, ##, ###, ...
+        text = re.sub(r"^#+\s+", "", text, flags=re.MULTILINE)
         # 4. Rút gọn nhiều dòng trống liên tiếp thành 1 dòng trống
+        text = re.sub(r"\n{3,}", "\n\n", text)
         # 5. Loại bỏ các markdown không hỗ trợ khác nếu cần
         return text
         """
         Chia message thành các đoạn <= max_length ký tự, ưu tiên chia theo dòng.
         """
+        lines = text.split("\n")
         messages = []
         current = ""
         for line in lines:
             if len(current) + len(line) + 1 > max_length:
                 messages.append(current.rstrip())
                 current = ""
+            current += line + "\n"
         if current.strip():
             messages.append(current.rstrip())
         return messages
     def send_message_forwarder(
+        self, access_token: str, recipient_id: str, message: str
     ) -> dict:
         """
         Gửi tin nhắn đến Facebook Messenger qua API được triển khai.
         payload = {
             "recipient_id": recipient_id,
             "access_token": access_token,
+            "message": message,
         }
         # Ghi lại toàn bộ payload để gỡ lỗi.
         # CẢNH BÁO: Việc này sẽ ghi lại cả PAGE_ACCESS_TOKEN. Chỉ nên dùng trong môi trường dev hoặc khi cần gỡ lỗi.
+        logger.debug(
+            f"[FACEBOOK_FORWARDER] Forwarding message to {url}. Full payload: {json.dumps(payload, ensure_ascii=False)}"
+        )
         try:
             response = requests.post(url, json=payload, timeout=10)
             response.raise_for_status()  # Sẽ raise HTTPError cho các status 4xx/5xx
+            logger.info(
+                f"[FACEBOOK_FORWARDER] Forwarder API returned status {response.status_code}."
+            )
             return response.json()
         except requests.HTTPError as e:
             # Lỗi HTTP (4xx, 5xx), log chi tiết hơn để gỡ lỗi phía forwarder
             return {"error": str(e), "details": error_content}
         except requests.RequestException as e:
             # Các lỗi request khác (timeout, connection error)
+            logger.error(
+                f"[FACEBOOK_FORWARDER] Request Error calling forwarder API: {e}"
+            )
             return {"error": str(e)}
+    def _send_message_sync(
+        self, page_access_token: str, recipient_id: str, message: str
+    ) -> dict:
         """
         Gửi tin nhắn sử dụng facebook-sdk với request method trực tiếp.
         """
         max_retries = 3
         retry_delay = 1  # giây
         for attempt in range(max_retries):
             try:
                 graph = facebook.GraphAPI(access_token=page_access_token, version="3.1")
                 # Sử dụng request method trực tiếp cho Messenger API với timeout
                 result = graph.request(
                     path="me/messages",
                     post_args={
                         "recipient": {"id": recipient_id},
+                        "message": {"text": message},
                     },
+                    timeout=30,  # Thêm timeout 30 giây
                 )
                 return result
             except facebook.GraphAPIError as e:
+                logger.error(
+                    f"Facebook GraphAPI Error (attempt {attempt + 1}/{max_retries}): {e}"
+                )
                 if attempt == max_retries - 1:  # Lần cuối
+                    raise HTTPException(
+                        status_code=500,
+                        detail=f"Failed to send message to Facebook: {e}",
+                    )
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
             except Exception as e:
+                logger.error(
+                    f"Unexpected error sending message to Facebook (attempt {attempt + 1}/{max_retries}): {e}"
+                )
                 if attempt == max_retries - 1:  # Lần cuối
+                    raise HTTPException(
+                        status_code=500, detail="Failed to send message to Facebook"
+                    )
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
         """
         max_retries = 3
         retry_delay = 1  # giây
         for attempt in range(max_retries):
             try:
                 graph = facebook.GraphAPI(access_token=page_access_token, version="3.1")
                 result = graph.get_object(page_id)
                 return result
             except facebook.GraphAPIError as e:
+                logger.error(
+                    f"Facebook GraphAPI Error getting page info (attempt {attempt + 1}/{max_retries}): {e}"
+                )
                 if attempt == max_retries - 1:  # Lần cuối
+                    raise HTTPException(
+                        status_code=500, detail=f"Failed to get page info: {e}"
+                    )
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
             except Exception as e:
+                logger.error(
+                    f"Unexpected error getting page info (attempt {attempt + 1}/{max_retries}): {e}"
+                )
                 if attempt == max_retries - 1:  # Lần cuối
+                    raise HTTPException(
+                        status_code=500, detail="Failed to get page info"
+                    )
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
     @timing_decorator_async
+    async def send_message(
+        self,
+        page_access_token: Optional[str] = None,
+        recipient_id: Optional[str] = None,
+        message: str = "",
+    ) -> dict:
         page_access_token = page_access_token or self.page_token
         recipient_id = recipient_id or self.sender_id
         if not message or not str(message).strip():
+            logger.warning(
+                f"[FACEBOOK_SEND] Attempted to send an empty or whitespace-only message to recipient {recipient_id}. Aborting."
+            )
             return {}
         if not page_access_token or not recipient_id:
+            logger.error(
+                f"[FACEBOOK_SEND] Missing page_access_token or recipient_id. Cannot send message."
+            )
+            raise ValueError(
+                "FacebookClient: page_access_token and recipient_id must not be None when sending a message."
+            )
+        logger.info(
+            f"[FACEBOOK_SEND] Preparing to send message to recipient {recipient_id}. Full message (truncated): '{_safe_truncate(str(message))}'"
+        )
         # Format message
+        response_to_send = self.format_message(str(message).replace("**", "*"))
         # Chia nhỏ nếu quá dài
         messages = self.split_message(response_to_send)
         results = []
         for i, msg_part in enumerate(messages, 1):
             if len(msg_part) > 2000:
                 msg_part = msg_part[:2000]  # fallback cắt cứng
+            logger.info(
+                f"[FACEBOOK_SEND] Sending part {i}/{len(messages)} to recipient {recipient_id}."
+            )
             try:
                 # Wrap sync HTTP call in thread executor để giữ async
                 loop = asyncio.get_event_loop()
                 result = await loop.run_in_executor(
+                    None,
+                    self.send_message_forwarder,
+                    page_access_token,
+                    recipient_id,
+                    msg_part,
                 )
                 results.append(result)
             except Exception as e:
+                logger.error(
+                    f"[FACEBOOK_SEND] Failed to send part {i}/{len(messages)} to {recipient_id}. Error: {e}"
+                )
                 results.append({"error": str(e), "part": i})
         return results[0] if results else {}
     @timing_decorator_async
+    async def get_page_info(
+        self, page_access_token: Optional[str] = None, page_id: Optional[str] = None
+    ) -> dict:
         """
         Lấy thông tin page sử dụng Facebook SDK (async).
         """
         page_access_token = page_access_token or self.page_token
         page_id = page_id or self.page_id
         if not page_access_token or not page_id:
+            raise ValueError(
+                "FacebookClient: page_access_token and page_id must not be None when getting page info."
+            )
         loop = asyncio.get_event_loop()
         result = await loop.run_in_executor(
+            None, self._get_page_info_sync, page_access_token, page_id
         )
         return result
         try:
             entry = body["entry"][0]
             messaging = entry["messaging"][0]
             sender_id = messaging["sender"]["id"]
             recipient_id = messaging["recipient"]["id"]
             timestamp = messaging["timestamp"]
             message_data = {
                 "sender_id": sender_id,
                 "page_id": recipient_id,
                 "timestamp": timestamp,
                 "text": None,
+                "attachments": [],
             }
             if "message" in messaging:
             return message_data
         except (KeyError, IndexError) as e:
             logger.error(f"Error parsing Facebook message: {e}\n\n{body}")
+            return None

app/gemini_client.py CHANGED Viewed

@@ -167,7 +167,7 @@ class GeminiClient:
                     )
                 try:
-                    logger.info(
                         f"[GEMINI][TEXT_RESPONSE] {_safe_truncate(response.text)}"
                     )
                     return response.text

                     )
                 try:
+                    logger.debug(
                         f"[GEMINI][TEXT_RESPONSE] {_safe_truncate(response.text)}"
                     )
                     return response.text

app/llm.py CHANGED Viewed

@@ -447,16 +447,18 @@ class LLMClient:
         Bạn là một chuyên gia phân tích ngôn ngữ tự nhiên (NLP) chuyên xử lý các câu hỏi về luật giao thông Việt Nam. Nhiệm vụ của bạn là đọc kỹ **lịch sử trò chuyện** và **câu hỏi mới nhất** của người dùng để trích xuất thông tin vào một cấu trúc JSON duy nhất. **Luôn chỉ trả về đối tượng JSON hợp lệ**, không thêm bất kỳ giải thích nào.
         Định dạng JSON bắt buộc:
-        {{
             "muc_dich": "...",
             "phuong_tien": "...",
             "tu_khoa": [],
             "cau_hoi": "..."
-        }}
         Hướng dẫn chi tiết cho từng trường:
-        **muc_dich**: Phải là một trong các giá trị sau, dựa vào **câu hỏi mới nhất**:
         - "hỏi về mức phạt"
         - "hỏi về quy tắc giao thông"
         - "hỏi về báo hiệu đường bộ"
@@ -464,35 +466,41 @@ class LLMClient:
         - "thông tin cá nhân của AI"
         - "khác"
-        **phuong_tien**: Tên phương tiện được đề cập trong câu hỏi mới hoặc trong lịch sử gần nhất. Nếu không có, để chuỗi rỗng "".
-        **tu_khoa**: **MỘT DANH SÁCH (LIST) các thuật ngữ pháp lý và các khái niệm liên quan** để tìm kiếm hiệu quả nhất trong cơ sở dữ liệu luật.
-        - **Quy tắc 1 (Chuyển đổi & Trực tiếp)**: Chuyển đổi ngôn ngữ đời thường của người dùng (ví dụ: "vượt đèn đỏ") thành thuật ngữ pháp lý chính xác ("Không chấp hành hiệu lệnh của đèn tín hiệu giao thông"). Trích xuất các hành vi, đối tượng, địa điểm được đề cập trực tiếp.
-        - **Quy tắc 2 (Suy luận & Mở rộng)**: **Đây là quy tắc quan trọng nhất.** Dựa vào câu hỏi, hãy suy luận ra các từ khóa tìm kiếm tiềm năng khác có thể chứa câu trả lời, ngay cả khi chúng không được nhắc đến trực tiếp.
-            - **Suy luận theo loại**: Nếu hỏi về một biển báo cụ thể (ví dụ: "biển hạn chế tốc độ tối đa"), hãy suy luận ra loại chung của nó ("biển báo cấm").
-            - **Suy luận theo mã hiệu**: Nếu biết mã hiệu của một đối tượng pháp lý (ví dụ: biển báo P.127), hãy thêm từ khóa về mã hiệu đó.
-            - **Suy luận theo khái niệm**: Nếu câu hỏi về một tình huống (ví dụ: "hiệu lực biển báo khi qua ngã tư"), hãy tạo từ khóa về khái niệm đó ("hiệu lực của biển báo tại nơi đường giao nhau").
-        - **Quy tắc 3 (Đa dạng hóa)**: Nếu câu hỏi phức tạp, hãy kết hợp các quy tắc trên để trích xuất một bộ từ khóa đa dạng và toàn diện. Ví dụ: "vượt đèn đỏ khi đang say rượu" -> ["Không chấp hành hiệu lệnh của đèn tín hiệu giao thông", "Điều khiển xe trên đường mà trong máu hoặc hơi thở có nồng độ cồn"].
-        - **Quy tắc 4 (Xử lý ngữ cảnh không hài lòng)**: Đọc kỹ lịch sử. Nếu người dùng hỏi lại hoặc thể hiện không hài lòng (ví dụ: "không phải", "ý tôi là..."), và trong lịch sử có ghi chú (từ khóa đã dùng: ...), TUYỆT ĐỐI KHÔNG SỬ DỤNG LẠI các từ khóa đó. Hãy tạo ra một bộ từ khóa **HOÀN TOÀN MỚI** dựa trên các quy tắc trên để tìm kiếm thông tin chính xác hơn.
-        **cau_hoi**: Diễn đạt lại câu hỏi mới nhất của người dùng thành một câu hỏi hoàn chỉnh, kết hợp ngữ cảnh từ lịch sử nếu cần, sử dụng đúng thuật ngữ pháp lý.
         VÍ DỤ MẪU:
         **VÍ DỤ 1 (Xử lý ngữ cảnh):**
         Lịch sử trò chuyện:
-        "Người dùng: xe máy đi vào đường cấm thì sao? (từ khóa đã dùng: đi vào khu vực cấm)
         Trợ lý: Mức phạt cho hành vi đi vào khu vực cấm là..."
         Câu hỏi mới nhất: "không phải, ý tôi là đi vào đường cao tốc cơ"
         Kết quả JSON mong muốn:
-        {{
             "muc_dich": "hỏi về mức phạt",
             "phuong_tien": "Xe máy",
-            "tu_khoa": ["Điều khiển xe đi vào đường cao tốc"],
             "cau_hoi": "Mức xử phạt cho hành vi xe máy đi vào đường cao tốc là bao nhiêu?"
-        }}
         **VÍ DỤ 2 (Suy luận từ khóa):**
         Lịch sử trò chuyện:
@@ -501,18 +509,20 @@ class LLMClient:
         Câu hỏi mới nhất: "qua ngã 3, ngã 4 thì biển báo hạn chế tốc độ tối đa (nền trắng, viền đỏ) có hết hiệu lực không hay chỉ khi gặp biển báo 'Hết tốc độ tối đa cho phép' thì mới hết hiệu lực?"
         Kết quả JSON mong muốn:
-        {{
             "muc_dich": "hỏi về quy tắc giao thông",
             "phuong_tien": "",
             "tu_khoa": [
-                "hiệu lực của biển báo cấm",
-                "hiệu lực của biển báo P.127",
                 "biển báo hết tốc độ tối đa cho phép",
                 "biển báo DP.134",
-                "hiệu lực của biển báo tại nơi đường giao nhau"
             ],
             "cau_hoi": "Hiệu lực của biển báo hạn chế tốc độ tối đa (P.127) khi đi qua nơi đường giao nhau (ngã ba, ngã tư) như thế nào và khi nào thì hết hiệu lực?"
-        }}
         Bây giờ, hãy phân tích lịch sử và câu hỏi sau và chỉ trả về đối tượng JSON.

         Bạn là một chuyên gia phân tích ngôn ngữ tự nhiên (NLP) chuyên xử lý các câu hỏi về luật giao thông Việt Nam. Nhiệm vụ của bạn là đọc kỹ **lịch sử trò chuyện** và **câu hỏi mới nhất** của người dùng để trích xuất thông tin vào một cấu trúc JSON duy nhất. **Luôn chỉ trả về đối tượng JSON hợp lệ**, không thêm bất kỳ giải thích nào.
         Định dạng JSON bắt buộc:
+        ```json
+        {
             "muc_dich": "...",
             "phuong_tien": "...",
             "tu_khoa": [],
             "cau_hoi": "..."
+        }
+        ```
         Hướng dẫn chi tiết cho từng trường:
+        - **muc_dich**: Phải là một trong các giá trị sau, dựa vào **câu hỏi mới nhất**:
         - "hỏi về mức phạt"
         - "hỏi về quy tắc giao thông"
         - "hỏi về báo hiệu đường bộ"
         - "thông tin cá nhân của AI"
         - "khác"
+        - **phuong_tien**: Tên phương tiện được đề cập trong câu hỏi mới hoặc trong lịch sử gần nhất. Nếu không có, để chuỗi rỗng "".
+        - **tu_khoa**: **MỘT DANH SÁCH (LIST) các CỤM TỪ KHÓA NGẮN GỌN** là thuật ngữ pháp lý hoặc khái niệm cốt lõi để tìm kiếm trong văn bản luật.
+          - **QUY TẮC 1 (Trích xuất & Chuẩn hóa)**: Xác định các hành vi vi phạm chính và chuyển đổi chúng thành cụm từ khóa pháp lý ngắn gọn. **KHÔNG** dùng cả câu mô tả đầy đủ hành vi.
+            - Tốt: "vượt đèn đỏ" -> ["không chấp hành hiệu lệnh đèn tín hiệu giao thông"]
+            - Xấu: "vượt đèn đỏ" -> ["Điều khiển xe ô tô không chấp hành hiệu lệnh của đèn tín hiệu giao thông"]
+          - **QUY TẮC 2 (Suy luận & Mở rộng)**: Dựa vào câu hỏi, suy luận các từ khóa liên quan.
+            - Ví dụ: hỏi về "biển hạn chế tốc độ tối đa" -> suy luận thêm ["biển báo cấm", "biển báo P.127"].
+            - Ví dụ: hỏi về "hiệu lực biển báo khi qua ngã tư" -> suy luận thêm ["hiệu lực của biển báo", "nơi đường giao nhau"].
+          - **QUY TẮC 3 (Xử lý ngữ cảnh không hài lòng)**: Đọc kỹ lịch sử. Nếu người dùng hỏi lại hoặc thể hiện không hài lòng (ví dụ: "không phải", "ý tôi là..."), và trong lịch sử có ghi chú `(từ khóa đã dùng: ...)` thì **TUYỆT ĐỐI KHÔNG SỬ DỤNG LẠI** các từ khóa đó. Hãy tạo ra một bộ từ khóa **HOÀN TOÀN MỚI** để tìm kiếm chính xác hơn.
+          - **QUY TẮC 4 (CẤM)**: Danh sách `tu_khoa` **CHỈ** chứa các thuật ngữ pháp lý hoặc khái niệm. **KHÔNG** được chứa:
+            - Từ ngữ đời thường (ví dụ: "vượt đèn đỏ", "say rượu").
+            - Các câu hỏi hoặc cụm từ chứa ý định hỏi (ví dụ: "mức phạt bao nhiêu", "phạt tiền").
+            - Các câu diễn giải dài dòng.
+        - **cau_hoi**: Diễn đạt lại câu hỏi mới nhất của người dùng thành một câu hỏi hoàn chỉnh, kết hợp ngữ cảnh từ lịch sử nếu cần, sử dụng đúng thuật ngữ pháp lý.
         VÍ DỤ MẪU:
         **VÍ DỤ 1 (Xử lý ngữ cảnh):**
         Lịch sử trò chuyện:
+        "##Người dùng##: xe máy đi vào đường cấm thì sao? (từ khóa đã dùng: đi vào khu vực cấm)
         Trợ lý: Mức phạt cho hành vi đi vào khu vực cấm là..."
         Câu hỏi mới nhất: "không phải, ý tôi là đi vào đường cao tốc cơ"
         Kết quả JSON mong muốn:
+        ```json
+        {
             "muc_dich": "hỏi về mức phạt",
             "phuong_tien": "Xe máy",
+            "tu_khoa": ["đi vào đường cao tốc", "xe máy đi vào đường cao tốc"],
             "cau_hoi": "Mức xử phạt cho hành vi xe máy đi vào đường cao tốc là bao nhiêu?"
+        }
+        ```
         **VÍ DỤ 2 (Suy luận từ khóa):**
         Lịch sử trò chuyện:
         Câu hỏi mới nhất: "qua ngã 3, ngã 4 thì biển báo hạn chế tốc độ tối đa (nền trắng, viền đỏ) có hết hiệu lực không hay chỉ khi gặp biển báo 'Hết tốc độ tối đa cho phép' thì mới hết hiệu lực?"
         Kết quả JSON mong muốn:
+        ```json
+        {
             "muc_dich": "hỏi về quy tắc giao thông",
             "phuong_tien": "",
             "tu_khoa": [
+                "hiệu lực biển báo cấm",
+                "biển báo P.127",
                 "biển báo hết tốc độ tối đa cho phép",
                 "biển báo DP.134",
+                "nơi đường giao nhau"
             ],
             "cau_hoi": "Hiệu lực của biển báo hạn chế tốc độ tối đa (P.127) khi đi qua nơi đường giao nhau (ngã ba, ngã tư) như thế nào và khi nào thì hết hiệu lực?"
+        }
+        ```
         Bây giờ, hãy phân tích lịch sử và câu hỏi sau và chỉ trả về đối tượng JSON.

app/message_processor.py CHANGED Viewed

@@ -61,7 +61,7 @@ class MessageProcessor:
         history = await loop.run_in_executor(
             None, lambda: sheets_client.get_conversation_history(sender_id, page_id)
         )
-        logger.info(f"[DEBUG] history: ... {history[-3:]}")
         for row in history:
             sheet_timestamps = [str(ts) for ts in row.get("timestamp", [])]

         history = await loop.run_in_executor(
             None, lambda: sheets_client.get_conversation_history(sender_id, page_id)
         )
+        logger.debug(f"[DEBUG] history: ... {history[-3:]}")
         for row in history:
             sheet_timestamps = [str(ts) for ts in row.get("timestamp", [])]

app/reranker.py CHANGED Viewed

@@ -162,10 +162,43 @@ class Reranker:
                 )
                 logger.info(f"[RERANK] Got batch scores from Gemini: {response}")
-                # Cải thiện parsing scores bằng regex để chỉ lấy các số hợp lệ
                 scores_text = str(response).strip()
-                # Tìm tất cả các chuỗi số (integer hoặc float) trong văn bản trả về
-                score_strings = re.findall(r"\b\d+(?:\.\d+)?\b", scores_text)
                 scores = []
                 for s in score_strings:

                 )
                 logger.info(f"[RERANK] Got batch scores from Gemini: {response}")
+                # --- START: Cải thiện logic trích xuất điểm ---
                 scores_text = str(response).strip()
+                scores_line = ""
+                score_strings = []
+                # Ưu tiên tìm dòng có "Kết quả:" hoặc các từ khóa tương tự
+                match = re.search(
+                    r"(?i)(?:Kết quả:|Scores:|Scores\s*:|Trả về:)\s*([0-9.,\s]+)$",
+                    scores_text,
+                    re.MULTILINE,
+                )
+                if match:
+                    scores_line = match.group(1)
+                    logger.debug(
+                        f"[RERANK] Found scores line using keyword: '{scores_line}'"
+                    )
+                else:
+                    # Fallback: tìm dòng cuối cùng chỉ chứa số, dấu phẩy, và khoảng trắng
+                    lines = scores_text.split("\n")
+                    for line in reversed(lines):
+                        line = line.strip()
+                        if line and re.match(r"^[0-9.,\s]+$", line):
+                            scores_line = line
+                            logger.debug(
+                                f"[RERANK] Found scores line using fallback pattern: '{scores_line}'"
+                            )
+                            break
+                if scores_line:
+                    # Trích xuất tất cả các số từ dòng đã tìm thấy
+                    score_strings = re.findall(r"\b\d+(?:\.\d+)?\b", scores_line)
+                else:
+                    logger.warning(
+                        "[RERANK] Could not find a dedicated score line. Falling back to parsing all numbers from response."
+                    )
+                    score_strings = re.findall(r"\b\d+(?:\.\d+)?\b", scores_text)
+                # --- END: Cải thiện logic trích xuất điểm ---
                 scores = []
                 for s in score_strings: