Spaces:

anhkhoiphan
/

092_agent_api

Sleeping

App Files Files Community

anhkhoiphan commited on 27 days ago

Commit

66ba458

1 Parent(s): 91fe7ce

Thêm tool vẽ chart

Browse files

Files changed (2) hide show

tools/chart.py +201 -0
tools/chat_tools.py +2 -0

tools/chart.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""
+Chart summarization tool — reads messages, counts unique user opinions, formats for charting.
+"""
+import time
+import logging
+from typing import List
+from pydantic import BaseModel, Field
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from .base import register_tool, get_llm
+from .utils import preprocess_messages
+try:
+    from ..redis_client import redis_client
+except (ImportError, ValueError):
+    from redis_client import redis_client
+logger = logging.getLogger(__name__)
+# Categories dưới ngưỡng này (% trên tổng) sẽ bị gom vào "others"
+_OTHERS_THRESHOLD = 0.05
+# ── Pydantic schema ──────────────────────────────────────────────────────────
+class ChartItem(BaseModel):
+    label: str = Field(description="Tên danh mục / nhãn")
+    count: int = Field(description="Số lượng unique users có ý kiến này")
+class ChartDataResponse(BaseModel):
+    items: List[ChartItem] = Field(description="Danh sách danh mục và số lượng unique users, sắp xếp theo count giảm dần")
+# ── System prompt ────────────────────────────────────────────────────────────
+_SYSTEM_PROMPT = """
+Bạn là một nhà phân tích dữ liệu chuyên nghiệp.
+Nhiệm vụ: Đọc tin nhắn, xác định chủ đề từ query, thống kê ý kiến của các unique users.
+══ BƯỚC 1 — XÁC ĐỊNH LOẠI DỮ LIỆU ══
+Dựa vào query, phân loại dữ liệu cần thống kê:
+  PHÂN LOẠI (categorical): nghề nghiệp, môn học yêu thích, ngôn ngữ lập trình,
+      sở thích, hệ điều hành, stack công nghệ, v.v.
+      → Gom các giá trị tương đồng vào cùng nhãn.
+  SỐ (numerical): tuổi, năm kinh nghiệm, điểm GPA, mức lương, số giờ học/ngày.
+      → Binning thành khoảng giá trị thay vì giữ nguyên từng con số.
+  NHỊ PHÂN (binary): có/không, đồng ý/phản đối, nam/nữ.
+      → Giữ nguyên 2 nhãn, gom biến thể ("có", "yes", "ok" → "có").
+══ BƯỚC 2 — ĐẾM UNIQUE USERS ══
+- Mỗi user chỉ được đếm 1 lần cho mỗi danh mục.
+- Nếu user đề cập nhiều lần: lấy ý kiến RÕ RÀNG nhất (không phải đầu tiên).
+- Bỏ qua tin nhắn mơ hồ, không liên quan, hoặc chỉ là phản ứng (emoji, "ok", "oke").
+- Nhận diện user qua: tên hiển thị, username, hoặc sender_id — xử lý nhất quán.
+══ BƯỚC 3 — QUY TẮC THEO LOẠI ══
+▸ PHÂN LOẠI: Gom đồng nghĩa vào một nhãn chuẩn:
+    "SE", "software eng", "kỹ sư phần mềm" → "software engineer"
+    "ML", "machine learning eng" → "ml engineer"
+    "FE", "frontend" → "frontend developer"
+  Nhãn: viết thường, ngắn gọn, tiếng Anh nếu là thuật ngữ kỹ thuật.
+▸ SỐ (binning):
+  - Chọn kích thước bin phù hợp với độ phân tán:
+      Tuổi      → khoảng 3–5 tuổi  (VD: "18-22", "23-27", "28-32")
+      Kinh nghiệm → khoảng 1–2 năm (VD: "0-1 năm", "2-3 năm", "4+ năm")
+      GPA       → khoảng 0.5       (VD: "3.0-3.5", "3.5-4.0")
+  - Không tạo quá 8 bin; gom đuôi nếu cần (VD: "35+" thay vì nhiều bin lẻ).
+  - Nhãn bin viết dạng "min-max" hoặc "min+" nếu là đuôi hở.
+▸ NHỊ PHÂN: Chuẩn hóa về đúng 2 nhãn đối lập.
+══ BƯỚC 4 — CHUẨN HÓA OUTPUT ══
+- Sắp xếp theo count giảm dần.
+- Loại bỏ danh mục có count = 0.
+- Trả về đúng schema JSON yêu cầu.
+"""
+# ── Tool ─────────────────────────────────────────────────────────────────────
+@register_tool(
+    name="summarize_chart",
+    description=(
+        "Đọc tin nhắn nhóm, thống kê ý kiến của unique users theo chủ đề từ query, "
+        "xuất dữ liệu JSON để vẽ biểu đồ cột (column) hoặc tròn (pie). "
+        "Dùng khi người dùng muốn thống kê / vẽ biểu đồ từ dữ liệu trong chat."
+    ),
+    parameters=[
+        {"name": "query",           "type": "string",  "description": "Chủ đề/yêu cầu thống kê (VD: 'nghề nghiệp thành viên', 'độ tuổi').", "required": True},
+        {"name": "chart_type",      "type": "string",  "description": '"column" để vẽ biểu đồ cột, "pie" để vẽ biểu đồ tròn.', "required": True},
+        {"name": "conversation_id", "type": "string",  "description": "ID hội thoại (conversation_id trong dmmsg, hoặc room-{id} cho phòng nhóm).", "required": False},
+        {"name": "room_id",         "type": "string",  "description": "ID phòng chat nhóm (không có prefix room-).", "required": False},
+        {"name": "dm_id",           "type": "string",  "description": "ID cuộc hội thoại DM theo Sorted Set.", "required": False},
+        {"name": "limit",           "type": "integer", "description": "Số tin nhắn tối đa cần đọc (mặc định: 200).", "required": False},
+    ],
+)
+def tool_summarize_chart(
+    query: str,
+    chart_type: str,
+    messages: List[dict] = None,
+    conversation_id: str = None,
+    room_id: str = None,
+    dm_id: str = None,
+    limit: int = 200,
+) -> dict:
+    start_time = time.time()
+    chart_type = (chart_type or "column").strip().lower()
+    if chart_type not in ("column", "pie"):
+        chart_type = "column"
+    try:
+        # ── 1. Lấy tin nhắn ────────────────────────────────────────────────
+        if messages is None:
+            if conversation_id:
+                messages = redis_client.get_messages_by_conversation_id(conversation_id, limit)
+            elif room_id:
+                messages = redis_client.get_room_messages(room_id, limit)
+            elif dm_id:
+                messages = redis_client.get_dm_messages(dm_id, limit)
+            else:
+                return {"status": "error", "data": {"error": "Cần cung cấp conversation_id, room_id hoặc dm_id."}}
+        if not messages:
+            return {"status": "error", "data": {"error": "Không có tin nhắn để phân tích."}}
+        # ── 2. Gọi LLM thống kê ────────────────────────────────────────────
+        formatted = preprocess_messages(messages)
+        llm    = get_llm()
+        parser = JsonOutputParser(pydantic_object=ChartDataResponse)
+        prompt = ChatPromptTemplate.from_messages([
+            ("system", _SYSTEM_PROMPT),
+            ("human", (
+                "Query: {query}\n\n"
+                "NỘI DUNG TIN NHẮN:\n{messages}\n\n"
+                "{format_instructions}"
+            )),
+        ])
+        chain  = prompt | llm | parser
+        result = chain.invoke({
+            "query":               query,
+            "messages":            formatted,
+            "format_instructions": parser.get_format_instructions(),
+        })
+        raw_items = result.get("items", [])
+        if not raw_items:
+            return {"status": "error", "data": {"error": "Không tìm thấy dữ liệu phù hợp với query."}}
+        # ── 3. Format theo loại chart ──────────────────────────────────────
+        chart_data = _format_chart(raw_items, chart_type)
+        return {
+            "status":          "success",
+            "chart_type":      chart_type,
+            "chart_data":      chart_data,
+            "total_responses": sum(i.get("count", 0) for i in raw_items),
+            "metrics":         {"processing_time_sec": round(time.time() - start_time, 2)},
+        }
+    except Exception as e:
+        logger.error(f"Chart tool error: {e}")
+        return {"status": "error", "data": {"error": str(e)}}
+# ── Helpers ──────────────────────────────────────────────────────────────────
+def _format_chart(items: list[dict], chart_type: str) -> list[dict]:
+    total = sum(i.get("count", 0) for i in items)
+    if total == 0:
+        return []
+    main   = [i for i in items if i.get("count", 0) / total >= _OTHERS_THRESHOLD]
+    others = sum(i.get("count", 0) for i in items if i.get("count", 0) / total < _OTHERS_THRESHOLD)
+    if chart_type == "pie":
+        result = [
+            {"label": i["label"], "percentage": round(i["count"] / total * 100, 1)}
+            for i in main
+        ]
+        if others:
+            result.append({"label": "others", "percentage": round(others / total * 100, 1)})
+    else:  # column
+        result = [{"label": i["label"], "count": i["count"]} for i in main]
+        if others:
+            result.append({"label": "others", "count": others})
+    return result

tools/chat_tools.py CHANGED Viewed

@@ -7,12 +7,14 @@ Replaces the previous mock implementations.
 from . import memory as _memory_mod  # noqa: F401
 from . import scheduler as _scheduler_mod  # noqa: F401
 from . import summarizer as _summarizer_mod  # noqa: F401
 from .base import TOOLS as _REGISTRY, get_langchain_tools
 _ALLOWED = {
     # Facilitator
     "summarize_chat",
     # Scheduler
     "get_schedule", "add_event", "update_event", "delete_event",
     "add_reminder", "get_reminders",

 from . import memory as _memory_mod  # noqa: F401
 from . import scheduler as _scheduler_mod  # noqa: F401
 from . import summarizer as _summarizer_mod  # noqa: F401
+from . import chart as _chart_mod  # noqa: F401
 from .base import TOOLS as _REGISTRY, get_langchain_tools
 _ALLOWED = {
     # Facilitator
     "summarize_chat",
+    "summarize_chart",
     # Scheduler
     "get_schedule", "add_event", "update_event", "delete_event",
     "add_reminder", "get_reminders",