GAIA_Execution / tools /youtube_transcript.py
Inigoalon's picture
Upload 17 files
4c9e4c6 verified
from langchain_core.tools.base import BaseTool, ToolException
import requests
from youtube_transcript_api import YouTubeTranscriptApi
import re
class YoutubeTranscriptTool(BaseTool):
name: str = "youtube_transcript_tool"
description: str = "This tool can be used to retrieve the transcript of a youtube video given the FULL youtube link. You must pass the full youtube link!"
def _run(self, youtube_link: str) -> str:
"""
Fetch transcript for a YouTube video URL.
Args:
youtube_link: The full URL of the YouTube video.
Returns:
The transcript as a single string.
"""
# Get the video ID from the youtube URL
re_match = re.search(r"watch\?v=([^&]+)", youtube_link)
if not re_match:
raise ValueError(f"Invalid YouTube URL: {youtube_link}")
video_id = re_match.group(1)
# Initialize the transcriptAPI and retrieve the transcript for the given videoID
ytt_api = YouTubeTranscriptApi()
fetched_transcript = ytt_api.fetch(video_id)
transcript = []
for snippet in fetched_transcript:
transcript.append(snippet.text)
return "\n".join(transcript)