trongld commited on
Commit
189d69a
·
1 Parent(s): af4f5ea

Refactor LangGraphAgent system prompt and add new tools for reading Excel and text files

Browse files
Files changed (2) hide show
  1. agent.py +8 -5
  2. tools.py +183 -5
agent.py CHANGED
@@ -72,15 +72,16 @@ class LangGraphAgent:
72
  base_prompt = system_prompt or "You are a helpful assistant. Keep answers concise."
73
  self.system_prompt = (
74
  base_prompt
75
- + "\n\nGuidelines:\n"
76
- "- Use tools when they can verify facts or fetch fresh data.\n"
77
- "- Think privately; do not reveal chain-of-thought.\n"
78
- "- Provide the final user-facing result prefixed exactly with 'FINAL ANSWER:'."
79
  )
80
 
81
  # Choose an LLM if not provided
82
  if model is None:
83
- model = ChatOpenRouter(model="openai/gpt-oss-20b:free")
 
84
  if model is None and ChatOpenAI is not None:
85
  model = ChatOpenAI(
86
  api_key=os.getenv("OPENROUTER_API_KEY"),
@@ -107,6 +108,8 @@ class LangGraphAgent:
107
  # If the last AI message includes tool calls, route to tools; else end.
108
  last = state["messages"][-1]
109
  if isinstance(last, AIMessage) and getattr(last, "tool_calls", None):
 
 
110
  return "tools"
111
  return "end"
112
 
 
72
  base_prompt = system_prompt or "You are a helpful assistant. Keep answers concise."
73
  self.system_prompt = (
74
  base_prompt
75
+ # + "\n\nGuidelines:\n"
76
+ # "- Use tools when they can verify facts or fetch fresh data.\n"
77
+ # "- Think privately; do not reveal chain-of-thought.\n"
78
+ # "- Provide the final user-facing result prefixed exactly with 'FINAL ANSWER:'."
79
  )
80
 
81
  # Choose an LLM if not provided
82
  if model is None:
83
+ model = ChatOpenRouter(
84
+ model="mistralai/mistral-small-3.2-24b-instruct:free")
85
  if model is None and ChatOpenAI is not None:
86
  model = ChatOpenAI(
87
  api_key=os.getenv("OPENROUTER_API_KEY"),
 
108
  # If the last AI message includes tool calls, route to tools; else end.
109
  last = state["messages"][-1]
110
  if isinstance(last, AIMessage) and getattr(last, "tool_calls", None):
111
+ print(
112
+ f"Detected tool calls in last AI message: {last.tool_calls}")
113
  return "tools"
114
  return "end"
115
 
tools.py CHANGED
@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Optional
7
  import json
8
  import re
9
  from datetime import datetime, timedelta
 
10
 
11
  # Structured tools
12
  try:
@@ -40,6 +41,16 @@ try:
40
  except Exception:
41
  ZoneInfo = None # type: ignore
42
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def _parse_video_id(url_or_id: str) -> Optional[str]:
45
  s = (url_or_id or "").strip()
@@ -126,6 +137,49 @@ def youtube_transcript(video: str, languages: Optional[List[str]] = None, max_ch
126
  return {"ok": False, "error": f"Transcript fetch failed: {e}"}
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  @tool("date_today", return_direct=False)
130
  def date_today(tz: Optional[str] = None) -> Dict[str, Any]:
131
  """
@@ -219,16 +273,137 @@ def date_format(date_str: str, fmt: str = "%Y-%m-%d %H:%M:%S", tz: Optional[str]
219
  return {"ok": False, "error": f"Format failed: {e}"}
220
 
221
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  def get_tools():
223
  """
224
  Returns a list of tools that can be used by the agent.
225
  """
226
  tools = [
227
- Tool(
228
- name="BraveSearch",
229
- func=BraveSearch().run,
230
- description="Search the web using Brave Search."
231
- ),
232
  Tool(
233
  name="YouTubeSearch",
234
  func=YouTubeSearchTool().run,
@@ -269,5 +444,8 @@ def get_tools():
269
  date_diff,
270
  next_weekday,
271
  date_format,
 
 
 
272
  ])
273
  return tools
 
7
  import json
8
  import re
9
  from datetime import datetime, timedelta
10
+ import io # for BytesIO
11
 
12
  # Structured tools
13
  try:
 
41
  except Exception:
42
  ZoneInfo = None # type: ignore
43
 
44
+ try:
45
+ import pandas as pd
46
+ except Exception:
47
+ pd = None # type: ignore
48
+
49
+ try:
50
+ import requests
51
+ except Exception:
52
+ requests = None # type: ignore
53
+
54
 
55
  def _parse_video_id(url_or_id: str) -> Optional[str]:
56
  s = (url_or_id or "").strip()
 
137
  return {"ok": False, "error": f"Transcript fetch failed: {e}"}
138
 
139
 
140
+ @tool("youtube_transcript_srt", return_direct=False)
141
+ def youtube_transcript_srt(video: str, languages: Optional[List[str]] = None, max_segments: Optional[int] = None) -> Dict[str, Any]:
142
+ """
143
+ Return the YouTube transcript as SRT captions.
144
+ Params:
145
+ - video: URL or 11-char video ID
146
+ - languages: preferred languages, e.g. ["vi","en"]
147
+ - max_segments: limit number of caption segments (optional)
148
+ """
149
+ try:
150
+ # Reuse the existing transcript tool to fetch segments
151
+ res = youtube_transcript(video=video, languages=languages, max_chars=0)
152
+ if not res.get("ok"):
153
+ return res
154
+
155
+ segs = (res.get("data") or {}).get("segments") or []
156
+ if max_segments is not None and max_segments > 0:
157
+ segs = segs[:max_segments]
158
+
159
+ def _srt_time(sec: float) -> str:
160
+ sec = max(0.0, float(sec or 0.0))
161
+ ms = int(round((sec - int(sec)) * 1000))
162
+ s = int(sec) % 60
163
+ m = (int(sec) // 60) % 60
164
+ h = int(sec) // 3600
165
+ return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
166
+
167
+ lines: List[str] = []
168
+ for i, seg in enumerate(segs, 1):
169
+ start = float(seg.get("start", 0.0))
170
+ end = start + float(seg.get("duration", 0.0))
171
+ text = str(seg.get("text", "")).strip()
172
+ lines.append(str(i))
173
+ lines.append(f"{_srt_time(start)} --> {_srt_time(end)}")
174
+ lines.append(text)
175
+ lines.append("") # blank line between blocks
176
+
177
+ srt = "\n".join(lines).strip() + ("\n" if lines else "")
178
+ return {"ok": True, "data": {"srt": srt, "segments": len(segs)}}
179
+ except Exception as e:
180
+ return {"ok": False, "error": f"SRT generation failed: {e}"}
181
+
182
+
183
  @tool("date_today", return_direct=False)
184
  def date_today(tz: Optional[str] = None) -> Dict[str, Any]:
185
  """
 
273
  return {"ok": False, "error": f"Format failed: {e}"}
274
 
275
 
276
+ @tool("read_excel", return_direct=False)
277
+ def read_excel(path_or_url: str, sheet: Optional[str] = None, nrows: int = 100, usecols: Optional[str] = None, header: Optional[int] = 0) -> Dict[str, Any]:
278
+ """
279
+ Read a worksheet from an Excel file (.xlsx/.xls/.xlsm) from a local path or HTTP(S) URL.
280
+ Params:
281
+ - path_or_url: local file path or URL.
282
+ - sheet: sheet name or 0-based index (default: first sheet).
283
+ - nrows: max number of rows to return (default: 100).
284
+ - usecols: Excel-style column selection, e.g., 'A:D' or 'A,C:E'.
285
+ - header: row index to use as header (default: 0). Use None for no header.
286
+ """
287
+ if pd is None:
288
+ return {"ok": False, "error": "pandas not installed. pip install pandas openpyxl"}
289
+ src = (path_or_url or "").strip()
290
+ if not src:
291
+ return {"ok": False, "error": "Missing path_or_url"}
292
+ try:
293
+ data_src: Any
294
+ if re.match(r"^https?://", src, re.I):
295
+ if requests is None:
296
+ return {"ok": False, "error": "requests not installed for URL fetching. pip install requests"}
297
+ resp = requests.get(src, timeout=30)
298
+ resp.raise_for_status()
299
+ data_src = io.BytesIO(resp.content)
300
+ else:
301
+ if not os.path.exists(src):
302
+ return {"ok": False, "error": f"File not found: {src}"}
303
+ data_src = src
304
+
305
+ sheet_name = 0 if sheet is None else sheet
306
+ df = pd.read_excel(
307
+ data_src,
308
+ sheet_name=sheet_name,
309
+ nrows=None if (nrows is None or nrows <= 0) else nrows,
310
+ usecols=usecols,
311
+ header=header
312
+ )
313
+
314
+ if isinstance(df, dict): # safety if engine returns multiple sheets
315
+ first_key = next(iter(df.keys()))
316
+ df = df[first_key]
317
+ sheet_used = first_key
318
+ else:
319
+ sheet_used = sheet_name
320
+
321
+ if nrows and nrows > 0:
322
+ df = df.head(nrows)
323
+
324
+ columns = [str(c) for c in df.columns.tolist()]
325
+ records = df.to_dict(orient="records")
326
+ return {
327
+ "ok": True,
328
+ "data": {
329
+ "sheet": sheet_used,
330
+ "columns": columns,
331
+ "records": records,
332
+ "info": {"rows": len(records), "cols": len(columns)}
333
+ }
334
+ }
335
+ except Exception as e:
336
+ return {"ok": False, "error": "Excel read failed: {}".format(e)}
337
+
338
+
339
+ @tool("read_text", return_direct=False)
340
+ def read_text(path_or_url: str, max_chars: int = 20000, encoding: Optional[str] = None) -> Dict[str, Any]:
341
+ """
342
+ Read a text file from a local path or HTTP(S) URL.
343
+ Params:
344
+ - path_or_url: local file path or URL.
345
+ - max_chars: maximum characters to return (default: 20000).
346
+ - encoding: optional text encoding override; if omitted, try to detect.
347
+ """
348
+ src = (path_or_url or "").strip()
349
+ if not src:
350
+ return {"ok": False, "error": "Missing path_or_url"}
351
+
352
+ try:
353
+ text: str = ""
354
+ used_encoding: str = "utf-8"
355
+
356
+ if re.match(r"^https?://", src, re.I):
357
+ if requests is None:
358
+ return {"ok": False, "error": "requests not installed for URL fetching. pip install requests"}
359
+ resp = requests.get(src, timeout=30)
360
+ resp.raise_for_status()
361
+ used_encoding = encoding or resp.encoding or getattr(
362
+ resp, "apparent_encoding", None) or "utf-8"
363
+ text = resp.content.decode(used_encoding, errors="replace")
364
+ else:
365
+ if not os.path.exists(src):
366
+ return {"ok": False, "error": f"File not found: {src}"}
367
+ enc_candidates = [encoding] if encoding else [
368
+ "utf-8", "utf-16", "utf-16-le", "utf-16-be", "latin-1"]
369
+ for enc_try in enc_candidates:
370
+ try:
371
+ with open(src, "r", encoding=enc_try, errors="strict") as f:
372
+ text = f.read()
373
+ used_encoding = enc_try or "utf-8"
374
+ break
375
+ except Exception:
376
+ continue
377
+ else:
378
+ with open(src, "rb") as f:
379
+ raw = f.read()
380
+ used_encoding = "latin-1"
381
+ text = raw.decode(used_encoding, errors="replace")
382
+
383
+ truncated = False
384
+ if max_chars and max_chars > 0 and len(text) > max_chars:
385
+ text = text[:max_chars] + " ...[truncated]..."
386
+ truncated = True
387
+
388
+ return {
389
+ "ok": True,
390
+ "data": {
391
+ "path": src,
392
+ "encoding": used_encoding,
393
+ "truncated": truncated,
394
+ "length": len(text),
395
+ "text": text,
396
+ },
397
+ }
398
+ except Exception as e:
399
+ return {"ok": False, "error": f"Text read failed: {e}"}
400
+
401
+
402
  def get_tools():
403
  """
404
  Returns a list of tools that can be used by the agent.
405
  """
406
  tools = [
 
 
 
 
 
407
  Tool(
408
  name="YouTubeSearch",
409
  func=YouTubeSearchTool().run,
 
444
  date_diff,
445
  next_weekday,
446
  date_format,
447
+ read_text,
448
+ read_excel,
449
+ youtube_transcript_srt, # new
450
  ])
451
  return tools