manasajanj commited on
Commit
fef0990
·
verified ·
1 Parent(s): ff784fd

UPDATE TOOLS.PY

Browse files
Files changed (1) hide show
  1. tools.py +65 -1
tools.py CHANGED
@@ -7,7 +7,7 @@ from langchain_core.tools import tool
7
  from datetime import datetime
8
 
9
  wikipedia_tool = WikipediaQueryRun(
10
- api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000)
11
  )
12
  ddg_search_tool = DuckDuckGoSearchRun(
13
  api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=5)
@@ -19,5 +19,69 @@ arxiv_tool = ArxivQueryRun(
19
  def get_current_year() -> str:
20
  """returns the current year"""
21
  return str(datetime.now().year)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
 
 
 
23
 
 
7
  from datetime import datetime
8
 
9
  wikipedia_tool = WikipediaQueryRun(
10
+ api_wrapper=WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=4000,lang="en")
11
  )
12
  ddg_search_tool = DuckDuckGoSearchRun(
13
  api_wrapper=DuckDuckGoSearchAPIWrapper(max_results=5)
 
19
  def get_current_year() -> str:
20
  """returns the current year"""
21
  return str(datetime.now().year)
22
+ @tool
23
+ def get_youtube_transcript(url: str) -> str:
24
+ """"""
25
+ Get the full transcript/subtitles from a YouTube video.
26
+ Use this tool whenever the question contains a YouTube URL (youtube.com or youtu.be).
27
+ Extract the URL from the question and pass it as the argument.
28
+ Example: if question says 'In the video https://www.youtube.com/watch?v=ABC123, what...'
29
+ then call this tool with url='https://www.youtube.com/watch?v=ABC123'
30
+ """"""
31
+ try:
32
+ from youtube_transcript_api import YouTubeTranscriptApi
33
+ if "v=" in url:
34
+ video_id = url.split("v=")[-1].split("&")[0]
35
+ elif "youtu.be/" in url:
36
+ video_id = url.split("youtu.be/")[-1].split("?")[0]
37
+ else:
38
+ return "Could not extract video ID from URL."
39
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
40
+ return " ".join([t["text"] for t in transcript])[:5000]
41
+ except Exception as e:
42
+ return f"Transcript unavailable: {e}"
43
+ @tool
44
+ def fetch_url_content(url: str) -> str:
45
+ try:
46
+ headers = {'User-Agent': 'Mozilla/5.0'}
47
+ response = requests.get(url, headers=headers, timeout=10)
48
+ response.raise_for_status()
49
+ text = resp.text
50
+ import re
51
+ text = re.sub(r'<[^>]+>', ' ', text)
52
+ text = re.sub(r'\s+', ' ', text).strip()
53
+ return text[:6000]
54
+ except Exception as e:
55
+ return f"Could not fetch URL: {e}"
56
+ @tool
57
+ def get_gaia_file(task_id: str) -> str:
58
+ """Download and read a file attachment for a GAIA task. Use when question mentions an attached file."""
59
+ try:
60
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
61
+ resp = requests.get(url, timeout=15)
62
+ if resp.status_code != 200:
63
+ return f"Could not fetch file for task {task_id}"
64
+ content_type = resp.headers.get("content-type", "")
65
+ content_disposition = resp.headers.get("content-disposition", "")
66
+
67
+ filename = ""
68
+ if "filename=" in content_disposition:
69
+ filename = content_disposition.split("filename=")[-1].strip('"')
70
+ if filename.endswith(".py") or "text/plain" in content_type:
71
+ return resp.text
72
+ if filename.endswith(".xlsx") or filename.endswith(".xls") or "spreadsheet" in content_type or "excel" in content_type:
73
+ df = pd.read_excel(io.BytesIO(resp.content))
74
+ return df.to_string()
75
+ if filename.endswith(".csv") or "csv" in content_type:
76
+ df = pd.read_csv(io.BytesIO(resp.content))
77
+ return df.to_string()
78
+ if filename.endswith(".mp3") or "audio" in content_type:
79
+ return "This is an audio file. Audio transcription is not supported."
80
 
81
+ try:
82
+ return resp.text[:5000]
83
+ except:
84
+ return "File could not be read."
85
+ except Exception as e:
86
+ return f"Error fetching file: {e}"
87