import gradio as gr import os import yt_dlp from io import StringIO import sys import whisper import re from moviepy.editor import VideoFileClip from youtube_transcript_api import YouTubeTranscriptApi class OutputLogger: def __init__(self): self.output = "" def write(self, msg): self.output += msg def flush(self): pass def download_video(url): logger = OutputLogger() sys.stdout = logger ydl_opts = { 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', 'outtmpl': '%(title)s.%(ext)s', 'verbose': True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) filename = ydl.prepare_filename(info) sys.stdout = sys.__stdout__ return gr.File(filename), "Video downloaded successfully! Click the download button to save the file.", logger.output # Load Whisper model model = whisper.load_model("base") # Define the function to extract video ID from URL def extract_video_id(url): video_id = None if "youtube.com" in url: video_id = re.findall(r"v=([^&]+)", url)[0] elif "youtu.be" in url: video_id = url.split("/")[-1] return video_id # Function to extract audio from video and save as WAV file def extract_audio_from_video(video_path): clip = VideoFileClip(video_path) audio_path = "temp_audio.wav" # Temporary audio file name clip.audio.write_audiofile(audio_path, codec='pcm_s16le') # Whisper needs 16-bit PCM audio clip.close() return audio_path # Define the transcription function def transcribe_video(video_source, example_choice, youtube_url, video_file): transcription = "" if video_source == "Example YouTube Presentations": if example_choice in example_videos: video_url = example_videos[example_choice] video_id = extract_video_id(video_url) transcript = YouTubeTranscriptApi.get_transcript(video_id) transcription = " ".join([d['text'] for d in transcript]) else: transcription = "Invalid example presentation choice." elif video_source == "Custom YouTube URL": video_id = extract_video_id(youtube_url) if video_id: transcript = YouTubeTranscriptApi.get_transcript(video_id) transcription = " ".join([d['text'] for d in transcript]) else: transcription = "Invalid YouTube URL." elif video_source == "Upload Video File": try: audio_path = extract_audio_from_video(video_file) # Use video_file directly as the path result = model.transcribe(audio_path) # Transcribe the extracted audio transcription = result["text"] except Exception as e: transcription = f"Error processing video file: {e}" else: transcription = "Invalid video source choice." return transcription # Dictionary of example YouTube presentations example_videos = { "I Have a Dream - Martin Luther King Jr.": "https://www.youtube.com/watch?v=vP4iY1TtS3s", "We choose to go to the Moon - JFK": "https://www.youtube.com/watch?v=WZyRbnpGyzQ", "Steve Jobs' 2005 Stanford Commencement Address": "https://www.youtube.com/watch?v=UF8uR6Z6KLc", "Elon Musk's Starship Update Presentation": "https://www.youtube.com/watch?v=sOpMrVnjYeY", "The Future of Humanity - Michio Kaku": "https://www.youtube.com/watch?v=zTkf2sCjYrU" } # Create the Gradio interface video_source = gr.Radio(["Example YouTube Presentations", "Custom YouTube URL", "Upload Video File"], label="Select Video Source") example_choice = gr.Dropdown(list(example_videos.keys()), label="Example YouTube Presentations") youtube_url = gr.Textbox(label="Enter YouTube Video URL") video_file = gr.Video(label="Upload Video File") # Corrected, without 'type' parameter output_text = gr.Textbox(label="Transcription Output") iface = gr.Interface( fn=transcribe_video, inputs=[video_source, example_choice, youtube_url, video_file], outputs=output_text, title="YouTube Video Transcription", description="Transcribe YouTube videos using Whisper or YouTube Transcript API." ) # Launch the interface iface.launch(debug=True, share=True)