alisamak commited on
Commit
c9ed4d2
·
verified ·
1 Parent(s): 89525b1

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +47 -5
tools.py CHANGED
@@ -9,6 +9,7 @@ import fitz # PyMuPDF
9
  import pandas as pd
10
  from imdb import IMDb
11
  from youtube_transcript_api import YouTubeTranscriptApi
 
12
  import whisper
13
  from bs4 import BeautifulSoup
14
  import re
@@ -251,16 +252,57 @@ def run_python_code(code: str) -> str:
251
  @tool
252
  def search_youtube_transcript(video_id: str) -> str:
253
  """
254
- Fetch transcript text from a YouTube video using its ID.
255
- Use this for questions asking what was said in a specific video.
256
- Returns full transcript as plain text.
257
  """
258
  print(f"🛠️ search_youtube_transcript called with: {video_id}")
 
259
  try:
260
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
261
- return " ".join([entry["text"] for entry in transcript])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  except Exception as e:
263
- return f"Error fetching transcript: {e}"
264
 
265
  @tool
266
  def search_baseball_stats(player: str) -> str:
 
9
  import pandas as pd
10
  from imdb import IMDb
11
  from youtube_transcript_api import YouTubeTranscriptApi
12
+ import yt_dlp
13
  import whisper
14
  from bs4 import BeautifulSoup
15
  import re
 
252
  @tool
253
  def search_youtube_transcript(video_id: str) -> str:
254
  """
255
+ Try to fetch transcript from a YouTube video using its ID.
256
+ If transcript is not available, fall back to title/description metadata using yt-dlp.
 
257
  """
258
  print(f"🛠️ search_youtube_transcript called with: {video_id}")
259
+
260
  try:
261
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
262
+ if not transcript:
263
+ return "Transcript is empty or unavailable for this video."
264
+
265
+ # Limit to first 1000 characters to prevent token overflow
266
+ text = " ".join([entry["text"] for entry in transcript])
267
+ return f"✅ Transcript found (preview):\n\n{text[:1000]}..."
268
+
269
+ except Exception as e:
270
+ error_msg = str(e)
271
+ print(f"⚠️ Transcript fetch failed: {error_msg}")
272
+
273
+ # Handle common errors
274
+ if "not available" in error_msg.lower() or "no transcript" in error_msg.lower():
275
+ fallback_info = fetch_video_metadata(video_id)
276
+ return f"⚠️ No transcript available.\n\n🔎 Here's some metadata instead:\n{fallback_info}"
277
+ elif "video unavailable" in error_msg.lower():
278
+ return "❌ This video is unavailable. Please check the link or video ID."
279
+ elif "404" in error_msg or "Not Found" in error_msg:
280
+ return "❌ The video could not be found. Please check the video ID."
281
+ else:
282
+ return f"❌ Error fetching transcript: {error_msg}"
283
+
284
+
285
+ def fetch_video_metadata(video_id: str) -> str:
286
+ """
287
+ Fallback to extract title and description if transcript is not available.
288
+ """
289
+ try:
290
+ url = f"https://www.youtube.com/watch?v={video_id}"
291
+ ydl_opts = {
292
+ 'quiet': True,
293
+ 'no_warnings': True,
294
+ 'extract_flat': True,
295
+ 'no_playlist': True
296
+ }
297
+
298
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
299
+ info = ydl.extract_info(url, download=False, process=False)
300
+ title = info.get("title", "Unknown Title")
301
+ description = info.get("description", "No description found.")
302
+ return f"📺 Title: {title}\n📝 Description: {description[:500]}..." # Truncate for LLM
303
+
304
  except Exception as e:
305
+ return f"Error fetching metadata: {str(e)}"
306
 
307
  @tool
308
  def search_baseball_stats(player: str) -> str: