Spaces:

ruv
/

yt

Runtime error

File size: 4,253 Bytes

import gradio as gr
import os
import yt_dlp
from io import StringIO
import sys
import whisper
import re
from moviepy.editor import VideoFileClip
from youtube_transcript_api import YouTubeTranscriptApi

class OutputLogger:
    def __init__(self):
        self.output = ""

    def write(self, msg):
        self.output += msg

    def flush(self):
        pass

def download_video(url):
    logger = OutputLogger()
    sys.stdout = logger

    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
        'outtmpl': '%(title)s.%(ext)s',
        'verbose': True,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        filename = ydl.prepare_filename(info)

    sys.stdout = sys.__stdout__

    return gr.File(filename), "Video downloaded successfully! Click the download button to save the file.", logger.output

# Load Whisper model
model = whisper.load_model("base")

# Define the function to extract video ID from URL
def extract_video_id(url):
    video_id = None
    if "youtube.com" in url:
        video_id = re.findall(r"v=([^&]+)", url)[0]
    elif "youtu.be" in url:
        video_id = url.split("/")[-1]
    return video_id

# Function to extract audio from video and save as WAV file
def extract_audio_from_video(video_path):
    clip = VideoFileClip(video_path)
    audio_path = "temp_audio.wav"  # Temporary audio file name
    clip.audio.write_audiofile(audio_path, codec='pcm_s16le')  # Whisper needs 16-bit PCM audio
    clip.close()
    return audio_path

# Define the transcription function
def transcribe_video(video_source, example_choice, youtube_url, video_file):
    transcription = ""
    if video_source == "Example YouTube Presentations":
        if example_choice in example_videos:
            video_url = example_videos[example_choice]
            video_id = extract_video_id(video_url)
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcription = " ".join([d['text'] for d in transcript])
        else:
            transcription = "Invalid example presentation choice."
    elif video_source == "Custom YouTube URL":
        video_id = extract_video_id(youtube_url)
        if video_id:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcription = " ".join([d['text'] for d in transcript])
        else:
            transcription = "Invalid YouTube URL."
    elif video_source == "Upload Video File":
        try:
            audio_path = extract_audio_from_video(video_file)  # Use video_file directly as the path
            result = model.transcribe(audio_path)  # Transcribe the extracted audio
            transcription = result["text"]
        except Exception as e:
            transcription = f"Error processing video file: {e}"
    else:
        transcription = "Invalid video source choice."

    return transcription

# Dictionary of example YouTube presentations
example_videos = {
    "I Have a Dream - Martin Luther King Jr.": "https://www.youtube.com/watch?v=vP4iY1TtS3s",
    "We choose to go to the Moon - JFK": "https://www.youtube.com/watch?v=WZyRbnpGyzQ",
    "Steve Jobs' 2005 Stanford Commencement Address": "https://www.youtube.com/watch?v=UF8uR6Z6KLc",
    "Elon Musk's Starship Update Presentation": "https://www.youtube.com/watch?v=sOpMrVnjYeY",
    "The Future of Humanity - Michio Kaku": "https://www.youtube.com/watch?v=zTkf2sCjYrU"
}

# Create the Gradio interface
video_source = gr.Radio(["Example YouTube Presentations", "Custom YouTube URL", "Upload Video File"], label="Select Video Source")
example_choice = gr.Dropdown(list(example_videos.keys()), label="Example YouTube Presentations")
youtube_url = gr.Textbox(label="Enter YouTube Video URL")
video_file = gr.Video(label="Upload Video File")  # Corrected, without 'type' parameter
output_text = gr.Textbox(label="Transcription Output")

iface = gr.Interface(
    fn=transcribe_video,
    inputs=[video_source, example_choice, youtube_url, video_file],
    outputs=output_text,
    title="YouTube Video Transcription",
    description="Transcribe YouTube videos using Whisper or YouTube Transcript API."
)

# Launch the interface
iface.launch(debug=True, share=True)