import yt_dlp import re from smolagents.tools import Tool class YoutubeTranscriptions(Tool): def __init__(self): self.name = "YoutubeTranscriptions" self.description = "Generates transcription for a YouTube video. If transcription cannot be generated, return 'No transcriptions available.'" self.inputs = {'URL': {'type': 'string', 'description': 'URL of the video to transcribe'}} self.output_type = "string" def forward(self, URL: str) -> str: # Extract video ID from URL video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL) if not video_id_match: return "Invalid YouTube URL. Please provide a valid URL." video_url = URL # yt-dlp options to get subtitles ydl_opts = { 'quiet': True, 'skip_download': True, 'writesubtitles': True, 'subtitleslangs': ['en'], # Adjust if you want other languages 'format': 'bestaudio/best' } try: with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(video_url, download=False) subtitles = info.get('subtitles', {}) if 'en' in subtitles: subtitle_url = subtitles['en'][0]['url'] # Download and extract subtitles import requests response = requests.get(subtitle_url) if response.status_code == 200: return response.text # Returns the actual transcription return "No transcriptions available." except Exception as e: return f"Error fetching transcription: {str(e)}"