Francesco-A's picture
Updated space
15a3001
from smolagents import tool
import tempfile
import os
@tool
def youtube_to_text(url: str) -> str:
"""
Transcribe a YouTube video.
First tries to retrieve official captions.
Falls back to Whisper transcription if captions are unavailable.
Args:
url: Full YouTube video URL
Returns:
Transcribed text
"""
# ---- Step 1: Try official YouTube transcripts ----
try:
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
query = parse_qs(orlparse(url).query)
video_id = query.get("v", [None])[0]
if video_id:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = " ".join([chunk["text"] for chunk in transcript])
return text
except Exception:
pass # Silent fallback to Whisper
# ---- Step 2: Fallback to Whisper transcription ----
try:
import whisper
from pytubefix import YouTube
yt = YouTube(url)
audio_stream = yt.streams.get_audio_only()
temp_dir = tempfile.gettempdir()
audio_path = audio_stream.download(output_path=temp_dir)
model = whisper.load_model("base")
result = model.transcribe(audio_path)
return result["text"]
except Exception as e:
return f"Error transcribing YouTube video: {str(e)}"
@tool
def transcribe_audio(file_path: str) -> str:
"""
Transcribes audio files into text using the Whisper model.
Supports multiple formats including .mp3, .wav, .m4a, .flac, and .ogg.
Args:
file_path: The local path to the audio file to be transcribed.
Returns:
The transcribed text as a string.
"""
try:
import whisper
model = whisper.load_model("base")
result = model.transcribe(file_path)
return result["text"]
except ImportError:
return (
"Whisper is not installed. "
"Install it with `pip install openai-whisper` and ensure ffmpeg is available."
)
except Exception as e:
return f"Error transcribing audio file: {str(e)}"