Final_Assignment_Template

Sleeping

App Files Files Community

trongld commited on Aug 14, 2025

Commit

189d69a

1 Parent(s): af4f5ea

Refactor LangGraphAgent system prompt and add new tools for reading Excel and text files

Browse files

Files changed (2) hide show

agent.py +8 -5
tools.py +183 -5

agent.py CHANGED Viewed

@@ -72,15 +72,16 @@ class LangGraphAgent:
         base_prompt = system_prompt or "You are a helpful assistant. Keep answers concise."
         self.system_prompt = (
             base_prompt
-            + "\n\nGuidelines:\n"
-              "- Use tools when they can verify facts or fetch fresh data.\n"
-              "- Think privately; do not reveal chain-of-thought.\n"
-              "- Provide the final user-facing result prefixed exactly with 'FINAL ANSWER:'."
         )
         # Choose an LLM if not provided
         if model is None:
-            model = ChatOpenRouter(model="openai/gpt-oss-20b:free")
             if model is None and ChatOpenAI is not None:
                 model = ChatOpenAI(
                     api_key=os.getenv("OPENROUTER_API_KEY"),
@@ -107,6 +108,8 @@ class LangGraphAgent:
             # If the last AI message includes tool calls, route to tools; else end.
             last = state["messages"][-1]
             if isinstance(last, AIMessage) and getattr(last, "tool_calls", None):
                 return "tools"
             return "end"

         base_prompt = system_prompt or "You are a helpful assistant. Keep answers concise."
         self.system_prompt = (
             base_prompt
+            # + "\n\nGuidelines:\n"
+            #   "- Use tools when they can verify facts or fetch fresh data.\n"
+            #   "- Think privately; do not reveal chain-of-thought.\n"
+            #   "- Provide the final user-facing result prefixed exactly with 'FINAL ANSWER:'."
         )
         # Choose an LLM if not provided
         if model is None:
+            model = ChatOpenRouter(
+                model="mistralai/mistral-small-3.2-24b-instruct:free")
             if model is None and ChatOpenAI is not None:
                 model = ChatOpenAI(
                     api_key=os.getenv("OPENROUTER_API_KEY"),
             # If the last AI message includes tool calls, route to tools; else end.
             last = state["messages"][-1]
             if isinstance(last, AIMessage) and getattr(last, "tool_calls", None):
+                print(
+                    f"Detected tool calls in last AI message: {last.tool_calls}")
                 return "tools"
             return "end"

tools.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional
 import json
 import re
 from datetime import datetime, timedelta
 # Structured tools
 try:
@@ -40,6 +41,16 @@ try:
 except Exception:
     ZoneInfo = None  # type: ignore
 def _parse_video_id(url_or_id: str) -> Optional[str]:
     s = (url_or_id or "").strip()
@@ -126,6 +137,49 @@ def youtube_transcript(video: str, languages: Optional[List[str]] = None, max_ch
         return {"ok": False, "error": f"Transcript fetch failed: {e}"}
 @tool("date_today", return_direct=False)
 def date_today(tz: Optional[str] = None) -> Dict[str, Any]:
     """
@@ -219,16 +273,137 @@ def date_format(date_str: str, fmt: str = "%Y-%m-%d %H:%M:%S", tz: Optional[str]
         return {"ok": False, "error": f"Format failed: {e}"}
 def get_tools():
     """
     Returns a list of tools that can be used by the agent.
     """
     tools = [
-        Tool(
-            name="BraveSearch",
-            func=BraveSearch().run,
-            description="Search the web using Brave Search."
-        ),
         Tool(
             name="YouTubeSearch",
             func=YouTubeSearchTool().run,
@@ -269,5 +444,8 @@ def get_tools():
         date_diff,
         next_weekday,
         date_format,
     ])
     return tools

 import json
 import re
 from datetime import datetime, timedelta
+import io  # for BytesIO
 # Structured tools
 try:
 except Exception:
     ZoneInfo = None  # type: ignore
+try:
+    import pandas as pd
+except Exception:
+    pd = None  # type: ignore
+try:
+    import requests
+except Exception:
+    requests = None  # type: ignore
 def _parse_video_id(url_or_id: str) -> Optional[str]:
     s = (url_or_id or "").strip()
         return {"ok": False, "error": f"Transcript fetch failed: {e}"}
+@tool("youtube_transcript_srt", return_direct=False)
+def youtube_transcript_srt(video: str, languages: Optional[List[str]] = None, max_segments: Optional[int] = None) -> Dict[str, Any]:
+    """
+    Return the YouTube transcript as SRT captions.
+    Params:
+      - video: URL or 11-char video ID
+      - languages: preferred languages, e.g. ["vi","en"]
+      - max_segments: limit number of caption segments (optional)
+    """
+    try:
+        # Reuse the existing transcript tool to fetch segments
+        res = youtube_transcript(video=video, languages=languages, max_chars=0)
+        if not res.get("ok"):
+            return res
+        segs = (res.get("data") or {}).get("segments") or []
+        if max_segments is not None and max_segments > 0:
+            segs = segs[:max_segments]
+        def _srt_time(sec: float) -> str:
+            sec = max(0.0, float(sec or 0.0))
+            ms = int(round((sec - int(sec)) * 1000))
+            s = int(sec) % 60
+            m = (int(sec) // 60) % 60
+            h = int(sec) // 3600
+            return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+        lines: List[str] = []
+        for i, seg in enumerate(segs, 1):
+            start = float(seg.get("start", 0.0))
+            end = start + float(seg.get("duration", 0.0))
+            text = str(seg.get("text", "")).strip()
+            lines.append(str(i))
+            lines.append(f"{_srt_time(start)} --> {_srt_time(end)}")
+            lines.append(text)
+            lines.append("")  # blank line between blocks
+        srt = "\n".join(lines).strip() + ("\n" if lines else "")
+        return {"ok": True, "data": {"srt": srt, "segments": len(segs)}}
+    except Exception as e:
+        return {"ok": False, "error": f"SRT generation failed: {e}"}
 @tool("date_today", return_direct=False)
 def date_today(tz: Optional[str] = None) -> Dict[str, Any]:
     """
         return {"ok": False, "error": f"Format failed: {e}"}
+@tool("read_excel", return_direct=False)
+def read_excel(path_or_url: str, sheet: Optional[str] = None, nrows: int = 100, usecols: Optional[str] = None, header: Optional[int] = 0) -> Dict[str, Any]:
+    """
+    Read a worksheet from an Excel file (.xlsx/.xls/.xlsm) from a local path or HTTP(S) URL.
+    Params:
+      - path_or_url: local file path or URL.
+      - sheet: sheet name or 0-based index (default: first sheet).
+      - nrows: max number of rows to return (default: 100).
+      - usecols: Excel-style column selection, e.g., 'A:D' or 'A,C:E'.
+      - header: row index to use as header (default: 0). Use None for no header.
+    """
+    if pd is None:
+        return {"ok": False, "error": "pandas not installed. pip install pandas openpyxl"}
+    src = (path_or_url or "").strip()
+    if not src:
+        return {"ok": False, "error": "Missing path_or_url"}
+    try:
+        data_src: Any
+        if re.match(r"^https?://", src, re.I):
+            if requests is None:
+                return {"ok": False, "error": "requests not installed for URL fetching. pip install requests"}
+            resp = requests.get(src, timeout=30)
+            resp.raise_for_status()
+            data_src = io.BytesIO(resp.content)
+        else:
+            if not os.path.exists(src):
+                return {"ok": False, "error": f"File not found: {src}"}
+            data_src = src
+        sheet_name = 0 if sheet is None else sheet
+        df = pd.read_excel(
+            data_src,
+            sheet_name=sheet_name,
+            nrows=None if (nrows is None or nrows <= 0) else nrows,
+            usecols=usecols,
+            header=header
+        )
+        if isinstance(df, dict):  # safety if engine returns multiple sheets
+            first_key = next(iter(df.keys()))
+            df = df[first_key]
+            sheet_used = first_key
+        else:
+            sheet_used = sheet_name
+        if nrows and nrows > 0:
+            df = df.head(nrows)
+        columns = [str(c) for c in df.columns.tolist()]
+        records = df.to_dict(orient="records")
+        return {
+            "ok": True,
+            "data": {
+                "sheet": sheet_used,
+                "columns": columns,
+                "records": records,
+                "info": {"rows": len(records), "cols": len(columns)}
+            }
+        }
+    except Exception as e:
+        return {"ok": False, "error": "Excel read failed: {}".format(e)}
+@tool("read_text", return_direct=False)
+def read_text(path_or_url: str, max_chars: int = 20000, encoding: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Read a text file from a local path or HTTP(S) URL.
+    Params:
+      - path_or_url: local file path or URL.
+      - max_chars: maximum characters to return (default: 20000).
+      - encoding: optional text encoding override; if omitted, try to detect.
+    """
+    src = (path_or_url or "").strip()
+    if not src:
+        return {"ok": False, "error": "Missing path_or_url"}
+    try:
+        text: str = ""
+        used_encoding: str = "utf-8"
+        if re.match(r"^https?://", src, re.I):
+            if requests is None:
+                return {"ok": False, "error": "requests not installed for URL fetching. pip install requests"}
+            resp = requests.get(src, timeout=30)
+            resp.raise_for_status()
+            used_encoding = encoding or resp.encoding or getattr(
+                resp, "apparent_encoding", None) or "utf-8"
+            text = resp.content.decode(used_encoding, errors="replace")
+        else:
+            if not os.path.exists(src):
+                return {"ok": False, "error": f"File not found: {src}"}
+            enc_candidates = [encoding] if encoding else [
+                "utf-8", "utf-16", "utf-16-le", "utf-16-be", "latin-1"]
+            for enc_try in enc_candidates:
+                try:
+                    with open(src, "r", encoding=enc_try, errors="strict") as f:
+                        text = f.read()
+                        used_encoding = enc_try or "utf-8"
+                        break
+                except Exception:
+                    continue
+            else:
+                with open(src, "rb") as f:
+                    raw = f.read()
+                used_encoding = "latin-1"
+                text = raw.decode(used_encoding, errors="replace")
+        truncated = False
+        if max_chars and max_chars > 0 and len(text) > max_chars:
+            text = text[:max_chars] + " ...[truncated]..."
+            truncated = True
+        return {
+            "ok": True,
+            "data": {
+                "path": src,
+                "encoding": used_encoding,
+                "truncated": truncated,
+                "length": len(text),
+                "text": text,
+            },
+        }
+    except Exception as e:
+        return {"ok": False, "error": f"Text read failed: {e}"}
 def get_tools():
     """
     Returns a list of tools that can be used by the agent.
     """
     tools = [
         Tool(
             name="YouTubeSearch",
             func=YouTubeSearchTool().run,
         date_diff,
         next_weekday,
         date_format,
+        read_text,
+        read_excel,
+        youtube_transcript_srt,  # new
     ])
     return tools