First_agent_template / tools /transcription_tool.py
SherwinDesouza's picture
Update tools/transcription_tool.py
57db83d verified
import yt_dlp
import re
from smolagents.tools import Tool
class YoutubeTranscriptions(Tool):
def __init__(self):
self.name = "YoutubeTranscriptions"
self.description = "Generates transcription for a YouTube video. If transcription cannot be generated, return 'No transcriptions available.'"
self.inputs = {'URL': {'type': 'string', 'description': 'URL of the video to transcribe'}}
self.output_type = "string"
def forward(self, URL: str) -> str:
# Extract video ID from URL
video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
if not video_id_match:
return "Invalid YouTube URL. Please provide a valid URL."
video_url = URL
# yt-dlp options to get subtitles
ydl_opts = {
'quiet': True,
'skip_download': True,
'writesubtitles': True,
'subtitleslangs': ['en'], # Adjust if you want other languages
'format': 'bestaudio/best'
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(video_url, download=False)
subtitles = info.get('subtitles', {})
if 'en' in subtitles:
subtitle_url = subtitles['en'][0]['url']
# Download and extract subtitles
import requests
response = requests.get(subtitle_url)
if response.status_code == 200:
return response.text # Returns the actual transcription
return "No transcriptions available."
except Exception as e:
return f"Error fetching transcription: {str(e)}"