SherwinDesouza commited on
Commit
0054c4b
·
verified ·
1 Parent(s): 0a4ca22

Update tools/transcription_tool.py

Browse files
Files changed (1) hide show
  1. tools/transcription_tool.py +30 -12
tools/transcription_tool.py CHANGED
@@ -1,25 +1,43 @@
1
- from youtube_transcript_api import YouTubeTranscriptApi
2
  import re
3
- from smolagents.tools import Tool
4
- class YoutubeTranscriptions(Tool):
5
- name = "YoutubeTranscriptions"
6
- description = "Generates transcription for a YouTube video and summarize them. if transcription cannot be generated, just say no transcriptions available. Do not call any other tools"
7
- inputs = {'URL': {'type': 'string', 'description': 'URL of the video to transcribe'}}
8
- output_type = "string"
9
 
 
 
 
 
 
10
  def forward(self, URL: str) -> str:
11
  # Extract video ID from URL
12
  video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
13
  if not video_id_match:
14
  return "Invalid YouTube URL. Please provide a valid URL."
15
 
16
- video_id = video_id_match.group(1)
 
 
 
 
 
 
 
 
 
17
 
18
  try:
19
- transcription = YouTubeTranscriptApi.get_transcript(video_id)
20
- if transcription:
21
- return transcription
 
 
 
 
 
 
 
 
 
 
 
22
  except Exception as e:
23
  return f"Error fetching transcription: {str(e)}"
24
 
25
- return "Sorry, the transcription was not available."
 
1
+ import yt_dlp
2
  import re
 
 
 
 
 
 
3
 
4
+ class YoutubeTranscriptions:
5
+ def __init__(self):
6
+ self.name = "YoutubeTranscriptions"
7
+ self.description = "Generates transcription for a YouTube video. If transcription cannot be generated, return 'No transcriptions available.'"
8
+
9
  def forward(self, URL: str) -> str:
10
  # Extract video ID from URL
11
  video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
12
  if not video_id_match:
13
  return "Invalid YouTube URL. Please provide a valid URL."
14
 
15
+ video_url = URL
16
+
17
+ # yt-dlp options to get subtitles
18
+ ydl_opts = {
19
+ 'quiet': True,
20
+ 'skip_download': True,
21
+ 'writesubtitles': True,
22
+ 'subtitleslangs': ['en'], # Adjust if you want other languages
23
+ 'format': 'bestaudio/best'
24
+ }
25
 
26
  try:
27
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
+ info = ydl.extract_info(video_url, download=False)
29
+ subtitles = info.get('subtitles', {})
30
+
31
+ if 'en' in subtitles:
32
+ subtitle_url = subtitles['en'][0]['url']
33
+
34
+ # Download and extract subtitles
35
+ import requests
36
+ response = requests.get(subtitle_url)
37
+ if response.status_code == 200:
38
+ return response.text # Returns the actual transcription
39
+
40
+ return "No transcriptions available."
41
  except Exception as e:
42
  return f"Error fetching transcription: {str(e)}"
43