Spaces:
Runtime error
Runtime error
Update tools/transcription_tool.py
Browse files- tools/transcription_tool.py +30 -12
tools/transcription_tool.py
CHANGED
|
@@ -1,25 +1,43 @@
|
|
| 1 |
-
|
| 2 |
import re
|
| 3 |
-
from smolagents.tools import Tool
|
| 4 |
-
class YoutubeTranscriptions(Tool):
|
| 5 |
-
name = "YoutubeTranscriptions"
|
| 6 |
-
description = "Generates transcription for a YouTube video and summarize them. if transcription cannot be generated, just say no transcriptions available. Do not call any other tools"
|
| 7 |
-
inputs = {'URL': {'type': 'string', 'description': 'URL of the video to transcribe'}}
|
| 8 |
-
output_type = "string"
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def forward(self, URL: str) -> str:
|
| 11 |
# Extract video ID from URL
|
| 12 |
video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
|
| 13 |
if not video_id_match:
|
| 14 |
return "Invalid YouTube URL. Please provide a valid URL."
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
try:
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
except Exception as e:
|
| 23 |
return f"Error fetching transcription: {str(e)}"
|
| 24 |
|
| 25 |
-
return "Sorry, the transcription was not available."
|
|
|
|
| 1 |
+
import yt_dlp
|
| 2 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
class YoutubeTranscriptions:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.name = "YoutubeTranscriptions"
|
| 7 |
+
self.description = "Generates transcription for a YouTube video. If transcription cannot be generated, return 'No transcriptions available.'"
|
| 8 |
+
|
| 9 |
def forward(self, URL: str) -> str:
|
| 10 |
# Extract video ID from URL
|
| 11 |
video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", URL)
|
| 12 |
if not video_id_match:
|
| 13 |
return "Invalid YouTube URL. Please provide a valid URL."
|
| 14 |
|
| 15 |
+
video_url = URL
|
| 16 |
+
|
| 17 |
+
# yt-dlp options to get subtitles
|
| 18 |
+
ydl_opts = {
|
| 19 |
+
'quiet': True,
|
| 20 |
+
'skip_download': True,
|
| 21 |
+
'writesubtitles': True,
|
| 22 |
+
'subtitleslangs': ['en'], # Adjust if you want other languages
|
| 23 |
+
'format': 'bestaudio/best'
|
| 24 |
+
}
|
| 25 |
|
| 26 |
try:
|
| 27 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 28 |
+
info = ydl.extract_info(video_url, download=False)
|
| 29 |
+
subtitles = info.get('subtitles', {})
|
| 30 |
+
|
| 31 |
+
if 'en' in subtitles:
|
| 32 |
+
subtitle_url = subtitles['en'][0]['url']
|
| 33 |
+
|
| 34 |
+
# Download and extract subtitles
|
| 35 |
+
import requests
|
| 36 |
+
response = requests.get(subtitle_url)
|
| 37 |
+
if response.status_code == 200:
|
| 38 |
+
return response.text # Returns the actual transcription
|
| 39 |
+
|
| 40 |
+
return "No transcriptions available."
|
| 41 |
except Exception as e:
|
| 42 |
return f"Error fetching transcription: {str(e)}"
|
| 43 |
|
|
|