yt / app.py
ruv's picture
Update app.py
5356b01 verified
import gradio as gr
import os
import yt_dlp
from io import StringIO
import sys
import whisper
import re
from moviepy.editor import VideoFileClip
from youtube_transcript_api import YouTubeTranscriptApi
class OutputLogger:
def __init__(self):
self.output = ""
def write(self, msg):
self.output += msg
def flush(self):
pass
def download_video(url):
logger = OutputLogger()
sys.stdout = logger
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
'outtmpl': '%(title)s.%(ext)s',
'verbose': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
sys.stdout = sys.__stdout__
return gr.File(filename), "Video downloaded successfully! Click the download button to save the file.", logger.output
# Load Whisper model
model = whisper.load_model("base")
# Define the function to extract video ID from URL
def extract_video_id(url):
video_id = None
if "youtube.com" in url:
video_id = re.findall(r"v=([^&]+)", url)[0]
elif "youtu.be" in url:
video_id = url.split("/")[-1]
return video_id
# Function to extract audio from video and save as WAV file
def extract_audio_from_video(video_path):
clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav" # Temporary audio file name
clip.audio.write_audiofile(audio_path, codec='pcm_s16le') # Whisper needs 16-bit PCM audio
clip.close()
return audio_path
# Define the transcription function
def transcribe_video(video_source, example_choice, youtube_url, video_file):
transcription = ""
if video_source == "Example YouTube Presentations":
if example_choice in example_videos:
video_url = example_videos[example_choice]
video_id = extract_video_id(video_url)
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcription = " ".join([d['text'] for d in transcript])
else:
transcription = "Invalid example presentation choice."
elif video_source == "Custom YouTube URL":
video_id = extract_video_id(youtube_url)
if video_id:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcription = " ".join([d['text'] for d in transcript])
else:
transcription = "Invalid YouTube URL."
elif video_source == "Upload Video File":
try:
audio_path = extract_audio_from_video(video_file) # Use video_file directly as the path
result = model.transcribe(audio_path) # Transcribe the extracted audio
transcription = result["text"]
except Exception as e:
transcription = f"Error processing video file: {e}"
else:
transcription = "Invalid video source choice."
return transcription
# Dictionary of example YouTube presentations
example_videos = {
"I Have a Dream - Martin Luther King Jr.": "https://www.youtube.com/watch?v=vP4iY1TtS3s",
"We choose to go to the Moon - JFK": "https://www.youtube.com/watch?v=WZyRbnpGyzQ",
"Steve Jobs' 2005 Stanford Commencement Address": "https://www.youtube.com/watch?v=UF8uR6Z6KLc",
"Elon Musk's Starship Update Presentation": "https://www.youtube.com/watch?v=sOpMrVnjYeY",
"The Future of Humanity - Michio Kaku": "https://www.youtube.com/watch?v=zTkf2sCjYrU"
}
# Create the Gradio interface
video_source = gr.Radio(["Example YouTube Presentations", "Custom YouTube URL", "Upload Video File"], label="Select Video Source")
example_choice = gr.Dropdown(list(example_videos.keys()), label="Example YouTube Presentations")
youtube_url = gr.Textbox(label="Enter YouTube Video URL")
video_file = gr.Video(label="Upload Video File") # Corrected, without 'type' parameter
output_text = gr.Textbox(label="Transcription Output")
iface = gr.Interface(
fn=transcribe_video,
inputs=[video_source, example_choice, youtube_url, video_file],
outputs=output_text,
title="YouTube Video Transcription",
description="Transcribe YouTube videos using Whisper or YouTube Transcript API."
)
# Launch the interface
iface.launch(debug=True, share=True)