|
|
from smolagents import tool |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
@tool |
|
|
def youtube_to_text(url: str) -> str: |
|
|
""" |
|
|
Transcribe a YouTube video. |
|
|
First tries to retrieve official captions. |
|
|
Falls back to Whisper transcription if captions are unavailable. |
|
|
|
|
|
Args: |
|
|
url: Full YouTube video URL |
|
|
|
|
|
Returns: |
|
|
Transcribed text |
|
|
""" |
|
|
|
|
|
try: |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
from urllib.parse import urlparse, parse_qs |
|
|
|
|
|
query = parse_qs(orlparse(url).query) |
|
|
video_id = query.get("v", [None])[0] |
|
|
|
|
|
if video_id: |
|
|
transcript = YouTubeTranscriptApi.get_transcript(video_id) |
|
|
text = " ".join([chunk["text"] for chunk in transcript]) |
|
|
return text |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
import whisper |
|
|
from pytubefix import YouTube |
|
|
|
|
|
yt = YouTube(url) |
|
|
audio_stream = yt.streams.get_audio_only() |
|
|
|
|
|
temp_dir = tempfile.gettempdir() |
|
|
audio_path = audio_stream.download(output_path=temp_dir) |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
result = model.transcribe(audio_path) |
|
|
|
|
|
return result["text"] |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error transcribing YouTube video: {str(e)}" |
|
|
|
|
|
@tool |
|
|
def transcribe_audio(file_path: str) -> str: |
|
|
""" |
|
|
Transcribes audio files into text using the Whisper model. |
|
|
Supports multiple formats including .mp3, .wav, .m4a, .flac, and .ogg. |
|
|
|
|
|
Args: |
|
|
file_path: The local path to the audio file to be transcribed. |
|
|
|
|
|
Returns: |
|
|
The transcribed text as a string. |
|
|
""" |
|
|
try: |
|
|
import whisper |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
result = model.transcribe(file_path) |
|
|
|
|
|
return result["text"] |
|
|
|
|
|
except ImportError: |
|
|
return ( |
|
|
"Whisper is not installed. " |
|
|
"Install it with `pip install openai-whisper` and ensure ffmpeg is available." |
|
|
) |
|
|
except Exception as e: |
|
|
return f"Error transcribing audio file: {str(e)}" |
|
|
|