File size: 1,719 Bytes
0054c4b
35e662a
a0b3e68
20417b0
0054c4b
 
 
57db83d
 
0054c4b
d70b722
35e662a
 
 
 
 
0054c4b
 
 
 
 
 
 
 
 
 
35e662a
 
0054c4b
 
 
 
 
 
 
 
 
 
 
 
 
 
35e662a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import yt_dlp
import re
from smolagents.tools import Tool
class YoutubeTranscriptions(Tool):
    def __init__(self):
        self.name = "YoutubeTranscriptions"
        self.description = "Generates transcription for a YouTube video. If transcription cannot be generated, return 'No transcriptions available.'"
        self.inputs = {'URL': {'type': 'string', 'description': 'URL of the video to transcribe'}}
        self.output_type = "string"
    
    def forward(self, URL: str) -> str:
        # Extract video ID from URL
        video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
        if not video_id_match:
            return "Invalid YouTube URL. Please provide a valid URL."

        video_url = URL

        # yt-dlp options to get subtitles
        ydl_opts = {
            'quiet': True,
            'skip_download': True,
            'writesubtitles': True,
            'subtitleslangs': ['en'],  # Adjust if you want other languages
            'format': 'bestaudio/best'
        }

        try:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(video_url, download=False)
                subtitles = info.get('subtitles', {})

                if 'en' in subtitles:
                    subtitle_url = subtitles['en'][0]['url']

                    # Download and extract subtitles
                    import requests
                    response = requests.get(subtitle_url)
                    if response.status_code == 200:
                        return response.text  # Returns the actual transcription

                return "No transcriptions available."
        except Exception as e:
            return f"Error fetching transcription: {str(e)}"