Spaces:

anhkhoiphan
/

092_agent_api

Sleeping

App Files Files Community

quachtiensinh27 commited on Apr 11

Commit

7fee926

1 Parent(s): 7a4edb9

feat: upgrade tool and add doc

Browse files

Files changed (4) hide show

tools/__init__.py +1 -0
tools/summarizer.py +20 -13
tools/utils.py +43 -4
tools/web.py +62 -0

tools/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ from .base import get_tool_schemas, execute_tool, get_langchain_tools, get_llm
 from .scheduler import tool_get_schedule, tool_add_event
 from .summarizer import tool_summarize_chat
 from .memory import tool_save_memory, tool_get_memories
 # The actual list to be used by the agent can be derived from the registry
 # or explicitly exported here.

 from .scheduler import tool_get_schedule, tool_add_event
 from .summarizer import tool_summarize_chat
 from .memory import tool_save_memory, tool_get_memories
+from .web import tool_read_link
 # The actual list to be used by the agent can be derived from the registry
 # or explicitly exported here.

tools/summarizer.py CHANGED Viewed

@@ -10,44 +10,42 @@ from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from .base import register_tool, get_llm
-from .utils import preprocess_messages
-from ..redis_client import redis_client
-logger = logging.getLogger(__name__)
 # --- Pydantic Schemas ---
 class ThreadSummary(BaseModel):
     """Schema cho tóm tắt của một thread."""
     thread_id: str = Field(description="ID hoặc tên chủ đề của thread")
     main_discussion: str = Field(description="Nội dung chính đang thảo luận")
     status: str = Field(description="Trạng thái: 'Đã chốt' hoặc 'Chưa chốt'")
     conclusion: str = Field(description="Kết luận cuối cùng")
 class TLDRResponse(BaseModel):
     """Schema cho response JSON tổng thể."""
-    summary: list[ThreadSummary] = Field(description="Danh sách tóm tắt")
 # --- System Prompt ---
 SYSTEM_PROMPT_TLDR = """
 Bạn là một THƯ KÝ DỰ ÁN chuyên nghiệp.
 Nhiệm vụ: tóm tắt các đoạn chat nhóm thành báo cáo chính xác tuyệt đối.
 - Chỉ chốt các thông tin có keyword "ok", "chốt", "thống nhất"...
 - Nếu đang tranh luận chưa ngã ngũ -> status = "Chưa chốt".
 - Bỏ qua tin nhắn rác.
 - Trả về JSON đúng schema.
 """
-def _extract_token_usage(response_metadata: dict) -> dict:
-    """Helper to extract token usage."""
-    # (Simplified for now, can be expanded if needed)
-    return response_metadata.get("token_usage", {"total_tokens": 0})
 @register_tool(
     name="summarize_chat",
-    description="Tóm tắt các tin nhắn trong group chat theo từng luồng (thread) bằng AI. Cần cung cấp room_id.",
     parameters=[
-        {"name": "room_id", "type": "string", "description": "ID của phòng chat hoặc nhóm chat cần tóm tắt. Bắt buộc phải có để lấy tin nhắn.", "required": True},
-        {"name": "limit", "type": "integer", "description": "Số lượng tin nhắn tối đa cần lấy. Mặc định là 100.", "required": False}
     ]
 )
 def tool_summarize_chat(
@@ -64,9 +62,14 @@ def tool_summarize_chat(
             messages = redis_client.get_room_messages(room_id, limit)
         if not messages:
-            return {"status": "success", "data": {"summary": []}, "metrics": {"processing_time_sec": 0}}
         formatted_threads = preprocess_messages(messages)
         llm = get_llm()
         parser = JsonOutputParser(pydantic_object=TLDRResponse)
@@ -81,6 +84,10 @@ def tool_summarize_chat(
             "format_instructions": parser.get_format_instructions(),
         })
         processing_time = round(time.time() - start_time, 2)
         return {
             "status": "success",

 from langchain_core.prompts import ChatPromptTemplate
 from .base import register_tool, get_llm
+from .utils import preprocess_messages, extract_metadata_from_messages
 # --- Pydantic Schemas ---
 class ThreadSummary(BaseModel):
     """Schema cho tóm tắt của một thread."""
     thread_id: str = Field(description="ID hoặc tên chủ đề của thread")
     main_discussion: str = Field(description="Nội dung chính đang thảo luận")
+    conversation_flow: str = Field(description="Tóm tắt diễn biến cuộc hội thoại (ai hỏi, ai trả lời, mâu thuẫn/đồng thuận)")
     status: str = Field(description="Trạng thái: 'Đã chốt' hoặc 'Chưa chốt'")
     conclusion: str = Field(description="Kết luận cuối cùng")
 class TLDRResponse(BaseModel):
     """Schema cho response JSON tổng thể."""
+    summary: list[ThreadSummary] = Field(description="Danh sách tóm tắt các thread")
+    links_found: list[str] = Field(description="Danh sách các liên kết (URL) được tìm thấy")
+    files_found: list[dict] = Field(description="Danh sách các tệp tin/ảnh được tìm thấy")
 # --- System Prompt ---
 SYSTEM_PROMPT_TLDR = """
 Bạn là một THƯ KÝ DỰ ÁN chuyên nghiệp.
 Nhiệm vụ: tóm tắt các đoạn chat nhóm thành báo cáo chính xác tuyệt đối.
+- Xác định rõ 'conversation_flow': mô tả cách cuộc thảo luận diễn ra (ví dụ: A đề xuất, B phản đối, cuối cùng cả nhóm đồng ý).
 - Chỉ chốt các thông tin có keyword "ok", "chốt", "thống nhất"...
 - Nếu đang tranh luận chưa ngã ngũ -> status = "Chưa chốt".
 - Bỏ qua tin nhắn rác.
 - Trả về JSON đúng schema.
 """
+# ... (execute_tool part)
 @register_tool(
     name="summarize_chat",
+    description="Tóm tắt tin nhắn, diễn biến cuộc hội thoại và trích xuất Link/File bằng AI. Cần cung cấp room_id.",
     parameters=[
+        {"name": "room_id", "type": "string", "description": "ID của phòng chat.", "required": True},
+        {"name": "limit", "type": "integer", "description": "Số lượng tin nhắn tối đa (Mặc định: 100).", "required": False}
     ]
 )
 def tool_summarize_chat(
             messages = redis_client.get_room_messages(room_id, limit)
         if not messages:
+            return {"status": "success", "data": {"summary": [], "links_found": [], "files_found": []}, "metrics": {"processing_time_sec": 0}}
+        # 1. Trích xuất Metadata (Links/Files)
+        metadata = extract_metadata_from_messages(messages)
+        # 2. Tiền xử lý văn bản cho LLM
         formatted_threads = preprocess_messages(messages)
         llm = get_llm()
         parser = JsonOutputParser(pydantic_object=TLDRResponse)
             "format_instructions": parser.get_format_instructions(),
         })
+        # Merge metadata vào kết quả nếu LLM chưa tự điền (LLM thường chỉ tóm tắt text)
+        result["links_found"] = list(set(result.get("links_found", []) + metadata["links"]))
+        result["files_found"] = metadata["files"]
         processing_time = round(time.time() - start_time, 2)
         return {
             "status": "success",

tools/utils.py CHANGED Viewed

@@ -1,10 +1,49 @@
-"""
-Utility functions for message processing and formatting.
-"""
-from collections import defaultdict
 def group_messages_by_thread(messages: list[dict]) -> dict[str, list[dict]]:
     """
     Gom nhóm các tin nhắn có cùng roomId lại với nhau.
     """

+import re
+import json
+def extract_metadata_from_messages(messages: list[dict]) -> dict:
+    """
+    Trích xuất danh sách Link và File từ danh sách tin nhắn.
+    """
+    links = []
+    files = []
+    # Regex đơn giản cho URL
+    url_pattern = r'https?://[^\s]+'
+    for msg in messages:
+        content = msg.get("content", "")
+        # Tìm links
+        found_links = re.findall(url_pattern, content)
+        links.extend(found_links)
+        # Tìm files từ trường attachment
+        attachment_raw = msg.get("attachment")
+        if attachment_raw and attachment_raw != "null":
+            try:
+                # Trường attachment có thể là string JSON hoặc dict tùy vào RedisClient
+                if isinstance(attachment_raw, str):
+                    att = json.loads(attachment_raw)
+                else:
+                    att = attachment_raw
+                files.append({
+                    "id": att.get("id"),
+                    "type": att.get("type"),
+                    "name": att.get("name"),
+                    "sender": msg.get("senderName")
+                })
+            except:
+                pass
+    return {
+        "links": list(set(links)), # Loại bỏ trùng
+        "files": files
+    }
 def group_messages_by_thread(messages: list[dict]) -> dict[str, list[dict]]:
+# ... (rest of the file)
     """
     Gom nhóm các tin nhắn có cùng roomId lại với nhau.
     """

tools/web.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""
+Web scraping tools for fetching content from URLs.
+"""
+import logging
+import requests
+from bs4 import BeautifulSoup
+from .base import register_tool
+logger = logging.getLogger(__name__)
+@register_tool(
+    name="read_link",
+    description="Truy cập và bóc tách nội dung chính từ một đường dẫn (URL). Giúp Agent hiểu nội dung bài báo, tài liệu online.",
+    parameters=[
+        {"name": "url", "type": "string", "description": "Đường dẫn (URL) cần đọc nội dung.", "required": True}
+    ]
+)
+def tool_read_link(url: str) -> dict:
+    """
+    Fetches and parses a URL to extract the main content.
+    """
+    try:
+        # 1. Fetch content with timeout
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        response = requests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+        # 2. Parse with BeautifulSoup
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Remove script and style elements
+        for script_or_style in soup(["script", "style", "nav", "footer", "header"]):
+            script_or_style.decompose()
+        # 3. Extract title and main text
+        title = soup.title.string if soup.title else "No Title"
+        # Simple extraction: get all paragraphs
+        paragraphs = soup.find_all('p')
+        text_content = "\n".join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
+        # Truncate content for LLM context (approx 3000 chars)
+        if len(text_content) > 3000:
+            text_content = text_content[:3000] + "..."
+        return {
+            "status": "success",
+            "data": {
+                "url": url,
+                "title": title.strip(),
+                "content": text_content
+            }
+        }
+    except requests.exceptions.Timeout:
+        return {"status": "error", "message": "Yêu cầu quá hạn (timeout) khi truy cập URL."}
+    except Exception as e:
+        logger.error(f"Error in read_link: {e}")
+        return {"status": "error", "message": f"Không thể đọc nội dung từ link: {str(e)}"}