Spaces:

anhkhoiphan
/

092_agent_api

Sleeping

App Files Files Community

quachtiensinh27 commited on Apr 7

Commit

2571293

1 Parent(s): d2e0613

feat: add Redis message fetching and AI summarization pipeline

Browse files

Files changed (2) hide show

redis_client.py +96 -0
tools.py +34 -231

redis_client.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+Redis client module — simplified for fetching messages only.
+Connects to Redis and retrieves recent messages from a room for summarization.
+"""
+import json
+import logging
+from typing import Optional
+import redis
+from .config import REDIS_HOST, REDIS_PORT, REDIS_DB, REDIS_PASSWORD, REDIS_KEY_PREFIX
+logger = logging.getLogger(__name__)
+class RedisClient:
+    """Singleton Redis client for fetching messages."""
+    _instance = None
+    _client: Optional[redis.Redis] = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if self._client is None:
+            self._client = redis.Redis(
+                host=REDIS_HOST,
+                port=REDIS_PORT,
+                db=REDIS_DB,
+                password=REDIS_PASSWORD,
+                decode_responses=True,
+                socket_connect_timeout=5,
+                socket_timeout=5,
+            )
+            self._key_prefix = REDIS_KEY_PREFIX
+            logger.info(f"Redis client connected to {REDIS_HOST}:{REDIS_PORT}/{REDIS_DB}")
+    def _key(self, *parts: str) -> str:
+        """Build a prefixed key."""
+        return f"{self._key_prefix}:{':'.join(str(p) for p in parts)}"
+    def ping(self) -> bool:
+        """Test Redis connection."""
+        try:
+            return self._client.ping()
+        except Exception as e:
+            logger.error(f"Redis ping failed: {e}")
+            return False
+    def get_room_messages(self, room_id: str, limit: int = 100) -> list[dict]:
+        """
+        Fetch recent messages from a room, ordered by timestamp (newest first).
+        Args:
+            room_id: Room ID (e.g., "toan-cao-cap")
+            limit: Number of messages to fetch (default: 100)
+        Returns:
+            List of message dicts with fields:
+            - message_id: str
+            - thread_id: str (same as room_id)
+            - user_name: str
+            - content: str
+            - timestamp: str
+        """
+        key = self._key("room", "messages", room_id)
+        # Get message IDs from sorted set (newest first)
+        message_ids = self._client.zrevrange(key, 0, limit - 1)
+        messages = []
+        for msg_id in message_ids:
+            msg_data = self._client.hgetall(self._key("msg", msg_id))
+            if msg_data and msg_data.get("deleted") != "true":
+                messages.append({
+                    "message_id": msg_data.get("id", ""),
+                    "thread_id": msg_data.get("roomId", room_id),
+                    "user_name": msg_data.get("senderName", "Unknown"),
+                    "content": msg_data.get("content", ""),
+                    "timestamp": msg_data.get("timestamp", ""),
+                })
+        # Reverse to get chronological order (oldest first)
+        messages.reverse()
+        logger.info(f"Fetched {len(messages)} messages from Redis room: {room_id}")
+        return messages
+# Global singleton instance
+redis_client = RedisClient()

tools.py CHANGED Viewed

@@ -8,11 +8,9 @@ Team: Khôi (LangGraph Agent), Hoàng (Database)
 import json
 import time
-import sqlite3
 import logging
 from collections import defaultdict
 from typing import Any
-from pathlib import Path
 import httpx
 from langchain_openai import ChatOpenAI
@@ -20,7 +18,8 @@ from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import ChatPromptTemplate
 from pydantic import BaseModel, Field
-from .config import QWEN_API_KEY, QWEN_BASE_URL, QWEN_MODEL, LOG_LEVEL, DB_PATH, DB_TYPE, CHROMA_DB_PATH, CHROMA_COLLECTION
 logger = logging.getLogger(__name__)
@@ -201,212 +200,58 @@ def _extract_token_usage(response_metadata: dict) -> dict:
     return default_usage
-def fetch_messages_from_db(db_path: str = None, limit: int = 100) -> list[dict]:
-    """
-    Tự động lấy 100 tin nhắn gần nhất từ SQLite database.
-    Args:
-        db_path: Đường dẫn đến file SQLite. Nếu None, dùng DB_PATH từ config.
-        limit: Số lượng tin nhắn tối đa cần lấy (default: 100).
-    Returns:
-        List of message dicts với 5 fields: message_id, thread_id, user_name, content, timestamp
-    """
-    if db_path is None:
-        db_path = DB_PATH
-    if not Path(db_path).exists():
-        raise FileNotFoundError(f"Database file not found: {db_path}")
-    try:
-        conn = sqlite3.connect(db_path)
-        conn.row_factory = sqlite3.Row
-        cursor = conn.cursor()
-        # Lấy tin nhắn gần nhất, sort theo timestamp DESC
-        query = """
-            SELECT
-                id as message_id,
-                thread_id,
-                user_name,
-                content,
-                timestamp
-            FROM messages
-            ORDER BY timestamp DESC
-            LIMIT ?
-        """
-        cursor.execute(query, (limit,))
-        rows = cursor.fetchall()
-        # Convert to list of dicts
-        messages = [
-            {
-                "message_id": str(row["message_id"]),
-                "thread_id": row["thread_id"] or "unknown",
-                "user_name": row["user_name"] or "Unknown",
-                "content": row["content"] or "",
-                "timestamp": row["timestamp"] or ""
-            }
-            for row in rows
-        ]
-        # Reverse để sort theo timestamp ASC (thứ tự thời gian đúng)
-        messages.reverse()
-        logger.info(f"Fetched {len(messages)} messages from database: {db_path}")
-        return messages
-    except sqlite3.Error as e:
-        raise ConnectionError(f"Database error: {e}")
-    finally:
-        if conn:
-            conn.close()
-def fetch_messages_from_chromadb(
-    chroma_path: str = None,
-    collection_name: str = None,
-    limit: int = 100
-) -> list[dict]:
-    """
-    Tự động lấy 100 tin nhắn gần nhất từ ChromaDB.
-    Args:
-        chroma_path: Đường dẫn đến ChromaDB folder. Nếu None, dùng config.
-        collection_name: Tên collection. Nếu None, dùng config.
-        limit: Số lượng tin nhắn tối đa (default: 100).
-    Returns:
-        List of message dicts với 5 fields: message_id, thread_id, user_name, content, timestamp
-    """
-    import chromadb
-    from chromadb.config import Settings
-    if chroma_path is None:
-        chroma_path = CHROMA_DB_PATH
-    if collection_name is None:
-        collection_name = CHROMA_COLLECTION
-    try:
-        # Connect to persistent ChromaDB
-        client = chromadb.PersistentClient(path=chroma_path)
-        collection = client.get_collection(name=collection_name)
-        # Get all documents with metadata (limit to recent ones)
-        # ChromaDB doesn't have timestamp ordering by default, so we get all and sort
-        results = collection.get(
-            include=["documents", "metadatas"],
-            limit=limit * 2  # Get extra in case we need to filter
-        )
-        messages = []
-        for i, (doc, metadata) in enumerate(zip(results["documents"], results["metadatas"])):
-            messages.append({
-                "message_id": metadata.get("message_id", str(i)),
-                "thread_id": metadata.get("thread_id", "unknown"),
-                "user_name": metadata.get("user_name", "Unknown"),
-                "content": doc,  # Document is the text content
-                "timestamp": metadata.get("timestamp", "")
-            })
-        # Sort by timestamp DESC and take top N
-        messages.sort(key=lambda m: m.get("timestamp", ""), reverse=True)
-        messages = messages[:limit]
-        # Reverse to get ASC order (chronological)
-        messages.reverse()
-        logger.info(f"Fetched {len(messages)} messages from ChromaDB: {chroma_path}")
-        return messages
-    except Exception as e:
-        raise ConnectionError(f"ChromaDB error: {e}")
-def fetch_messages(db_type: str = None, limit: int = 100, **kwargs) -> list[dict]:
-    """
-    Universal message fetcher — supports both SQLite and ChromaDB.
-    Args:
-        db_type: "sqlite" or "chromadb". Nếu None, dùng config.
-        limit: Số tin nhắn cần lấy.
-        **kwargs: Additional params (db_path, chroma_path, collection_name).
-    Returns:
-        List of message dicts.
-    """
-    if db_type is None:
-        db_type = DB_TYPE
-    if db_type == "chromadb":
-        return fetch_messages_from_chromadb(
-            chroma_path=kwargs.get("chroma_path"),
-            collection_name=kwargs.get("collection_name"),
-            limit=limit
-        )
-    else:
-        return fetch_messages_from_db(
-            db_path=kwargs.get("db_path"),
-            limit=limit
-        )
 def tool_summarize_chat(
     messages: list[dict] = None,
     limit: int = 100,
-    db_path: str = None,
-    db_type: str = None,
-    chroma_path: str = None,
-    collection_name: str = None
 ) -> dict:
     """
-    Hàm chính xử lý lệnh /tldr:
-    1. Nếu không truyền messages → tự động lấy từ DB (SQLite hoặc ChromaDB)
-    2. Tiền xử lý: gom nhóm theo thread_id
     3. Gửi vào Qwen API với prompt chống bịa đặt
-    4. Trả về JSON tóm tắt theo hợp đồng dữ liệu của nhóm
     Args:
-        messages: (Optional) List of message dicts. Nếu None → tự lấy từ DB.
-        limit: (Optional) Số tin nhắn cần lấy từ DB (default: 100).
-        db_path: (Optional) Đường dẫn đến file SQLite.
-        db_type: (Optional) "sqlite" hoặc "chromadb". Nếu None → dùng config.
-        chroma_path: (Optional) Đường dẫn đến ChromaDB folder.
-        collection_name: (Optional) Tên collection trong ChromaDB.
     Returns:
         {
             "status": "success" | "error",
-            "data": <JSON_Object_from_LLM>,
-            "metrics": {
-                "processing_time_sec": <float>,
-                "token_usage": {"input_tokens": int, "output_tokens": int, "total_tokens": int}
-            }
         }
     Examples:
-        # Tự động lấy 100 tin từ DB
-        result = tool_summarize_chat()
-        # Tự lấy 50 tin từ DB custom
-        result = tool_summarize_chat(limit=50, db_path="data/my_db.sqlite")
-        # Truyền messages trực tiếp (tương thích ngược)
         result = tool_summarize_chat(messages=my_messages)
     """
     start_time = time.time()
     try:
-        # Auto-fetch from DB if messages not provided
         if messages is None:
-            logger.info(f"No messages provided, fetching from {db_type or DB_TYPE} database...")
-            messages = fetch_messages(
-                db_type=db_type,
-                limit=limit,
-                db_path=db_path,
-                chroma_path=chroma_path,
-                collection_name=collection_name
-            )
         # Validate input
         if not messages:
@@ -510,54 +355,12 @@ Trả về JSON tóm tắt cho từng thread. Tuân thủ nghiêm ngặt các qu
         }
-# =============================================================================
-# SECTION E: BACKWARD COMPATIBILITY — Legacy tools
-# =============================================================================
-def search_web(query: str) -> str:
-    """Search for information on the web (placeholder)."""
-    return f"Search results for: {query}"
-def calculate(expression: str) -> str:
-    """Evaluate a math expression."""
-    try:
-        result = eval(expression, {"__builtins__": {}})
-        return str(result)
-    except Exception as e:
-        return f"Error: {e}"
-def fetch_url(url: str) -> str:
-    """Fetch content from a URL."""
-    try:
-        resp = httpx.get(url, timeout=10, follow_redirects=True)
-        return resp.text[:2000]
-    except Exception as e:
-        return f"Error: {e}"
-# Tool registry — includes both legacy tools and new /tldr pipeline
 TOOLS = {
-    "search_web": {
-        "fn": search_web,
-        "description": "Search for information on the web",
-        "parameters": {"query": "string"},
-    },
-    "calculate": {
-        "fn": calculate,
-        "description": "Evaluate a math expression",
-        "parameters": {"expression": "string"},
-    },
-    "fetch_url": {
-        "fn": fetch_url,
-        "description": "Fetch content from a URL",
-        "parameters": {"url": "string"},
-    },
     "summarize_chat": {
         "fn": tool_summarize_chat,
-        "description": "Summarize group chat messages by thread using AI. Use this for /tldr command.",
-        "parameters": {"messages": "array"},
     },
 }

 import json
 import time
 import logging
 from collections import defaultdict
 from typing import Any
 import httpx
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from pydantic import BaseModel, Field
+from .config import QWEN_API_KEY, QWEN_BASE_URL, QWEN_MODEL, LOG_LEVEL
+from .redis_client import redis_client
 logger = logging.getLogger(__name__)
     return default_usage
 def tool_summarize_chat(
     messages: list[dict] = None,
     limit: int = 100,
+    room_id: str = None,
 ) -> dict:
     """
+    Tóm tắt tin nhắn chat từ Redis theo từng thread.
+    Luồng hoạt động:
+    1. Nếu không truyền messages → lấy từ Redis (cần room_id)
+    2. Gom nhóm tin nhắn theo thread_id
     3. Gửi vào Qwen API với prompt chống bịa đặt
+    4. Trả về JSON tóm tắt
     Args:
+        messages: (Optional) List of message dicts. Nếu None → lấy từ Redis.
+        limit: (Optional) Số tin nhắn tối đa (default: 100).
+        room_id: (Optional) Room ID trong Redis. Bắt buộc nếu không truyền messages.
     Returns:
         {
             "status": "success" | "error",
+            "data": {"summary": [{"thread_id", "main_discussion", "status", "conclusion"}]},
+            "metrics": {"processing_time_sec": float, "token_usage": {...}}
         }
     Examples:
+        # Lấy 100 tin từ Redis và tóm tắt
+        result = tool_summarize_chat(room_id="toan-cao-cap")
+        # Lấy 50 tin
+        result = tool_summarize_chat(room_id="toan-cao-cap", limit=50)
+        # Truyền messages trực tiếp
         result = tool_summarize_chat(messages=my_messages)
     """
     start_time = time.time()
     try:
+        # Auto-fetch from Redis if messages not provided
         if messages is None:
+            if not room_id:
+                return {
+                    "status": "error",
+                    "data": {"error": "room_id is required when messages is not provided"},
+                    "metrics": {
+                        "processing_time_sec": 0.0,
+                        "token_usage": {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+                    }
+                }
+            logger.info(f"Fetching {limit} messages from Redis room: {room_id}...")
+            messages = redis_client.get_room_messages(room_id, limit)
         # Validate input
         if not messages:
         }
+# Tool registry
 TOOLS = {
     "summarize_chat": {
         "fn": tool_summarize_chat,
+        "description": "Summarize group chat messages by thread using AI. Fetches from Redis if room_id is provided.",
+        "parameters": {"room_id": "string", "limit": "integer"},
     },
 }