Spaces:
Runtime error
Runtime error
Update tools.py
Browse files
tools.py
CHANGED
|
@@ -9,6 +9,7 @@ import fitz # PyMuPDF
|
|
| 9 |
import pandas as pd
|
| 10 |
from imdb import IMDb
|
| 11 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
| 12 |
import whisper
|
| 13 |
from bs4 import BeautifulSoup
|
| 14 |
import re
|
|
@@ -251,16 +252,57 @@ def run_python_code(code: str) -> str:
|
|
| 251 |
@tool
|
| 252 |
def search_youtube_transcript(video_id: str) -> str:
|
| 253 |
"""
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
Returns full transcript as plain text.
|
| 257 |
"""
|
| 258 |
print(f"🛠️ search_youtube_transcript called with: {video_id}")
|
|
|
|
| 259 |
try:
|
| 260 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
except Exception as e:
|
| 263 |
-
return f"Error fetching
|
| 264 |
|
| 265 |
@tool
|
| 266 |
def search_baseball_stats(player: str) -> str:
|
|
|
|
| 9 |
import pandas as pd
|
| 10 |
from imdb import IMDb
|
| 11 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 12 |
+
import yt_dlp
|
| 13 |
import whisper
|
| 14 |
from bs4 import BeautifulSoup
|
| 15 |
import re
|
|
|
|
| 252 |
@tool
|
| 253 |
def search_youtube_transcript(video_id: str) -> str:
|
| 254 |
"""
|
| 255 |
+
Try to fetch transcript from a YouTube video using its ID.
|
| 256 |
+
If transcript is not available, fall back to title/description metadata using yt-dlp.
|
|
|
|
| 257 |
"""
|
| 258 |
print(f"🛠️ search_youtube_transcript called with: {video_id}")
|
| 259 |
+
|
| 260 |
try:
|
| 261 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 262 |
+
if not transcript:
|
| 263 |
+
return "Transcript is empty or unavailable for this video."
|
| 264 |
+
|
| 265 |
+
# Limit to first 1000 characters to prevent token overflow
|
| 266 |
+
text = " ".join([entry["text"] for entry in transcript])
|
| 267 |
+
return f"✅ Transcript found (preview):\n\n{text[:1000]}..."
|
| 268 |
+
|
| 269 |
+
except Exception as e:
|
| 270 |
+
error_msg = str(e)
|
| 271 |
+
print(f"⚠️ Transcript fetch failed: {error_msg}")
|
| 272 |
+
|
| 273 |
+
# Handle common errors
|
| 274 |
+
if "not available" in error_msg.lower() or "no transcript" in error_msg.lower():
|
| 275 |
+
fallback_info = fetch_video_metadata(video_id)
|
| 276 |
+
return f"⚠️ No transcript available.\n\n🔎 Here's some metadata instead:\n{fallback_info}"
|
| 277 |
+
elif "video unavailable" in error_msg.lower():
|
| 278 |
+
return "❌ This video is unavailable. Please check the link or video ID."
|
| 279 |
+
elif "404" in error_msg or "Not Found" in error_msg:
|
| 280 |
+
return "❌ The video could not be found. Please check the video ID."
|
| 281 |
+
else:
|
| 282 |
+
return f"❌ Error fetching transcript: {error_msg}"
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
def fetch_video_metadata(video_id: str) -> str:
|
| 286 |
+
"""
|
| 287 |
+
Fallback to extract title and description if transcript is not available.
|
| 288 |
+
"""
|
| 289 |
+
try:
|
| 290 |
+
url = f"https://www.youtube.com/watch?v={video_id}"
|
| 291 |
+
ydl_opts = {
|
| 292 |
+
'quiet': True,
|
| 293 |
+
'no_warnings': True,
|
| 294 |
+
'extract_flat': True,
|
| 295 |
+
'no_playlist': True
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 299 |
+
info = ydl.extract_info(url, download=False, process=False)
|
| 300 |
+
title = info.get("title", "Unknown Title")
|
| 301 |
+
description = info.get("description", "No description found.")
|
| 302 |
+
return f"📺 Title: {title}\n📝 Description: {description[:500]}..." # Truncate for LLM
|
| 303 |
+
|
| 304 |
except Exception as e:
|
| 305 |
+
return f"❌ Error fetching metadata: {str(e)}"
|
| 306 |
|
| 307 |
@tool
|
| 308 |
def search_baseball_stats(player: str) -> str:
|