import streamlit as st import whisper from moviepy.editor import VideoFileClip import torch import os import yt_dlp # Updated import from transformers import pipeline # Load the Whisper model once with GPU support device = "cuda" if torch.cuda.is_available() else "cpu" model = whisper.load_model("base", device=device) # Choose appropriate model size # Load the summarization pipeline summarizer = pipeline("summarization") # Define chunk length in seconds chunk_len_s = 10 def download_video(youtube_url, audio_file_path): """ Downloads a YouTube video and extracts audio, saving it as an MP3 file. """ try: ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': audio_file_path.replace('.mp3', '') + '.%(ext)s', # Ensure correct extension handling 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) final_audio_file_path = audio_file_path if audio_file_path.endswith('.mp3') else audio_file_path + '.mp3' print(f"Audio downloaded and saved as {final_audio_file_path}") return final_audio_file_path except Exception as e: print(f"Error downloading video: {e}") return None def transcribe_audio_in_chunks(audio_file_path, chunk_len_s): """ Transcribes a provided audio file in chunks using the loaded Whisper model. """ try: if not os.path.exists(audio_file_path): print(f"Error: MP3 file {audio_file_path} not found") return None # Load and preprocess the audio file audio = whisper.load_audio(audio_file_path) audio_length = len(audio) / whisper.audio.SAMPLE_RATE # Transcribe the audio in chunks transcription = "" for start in range(0, int(audio_length), chunk_len_s): end = min(start + chunk_len_s, int(audio_length)) chunk = audio[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)] chunk = whisper.pad_or_trim(chunk) result = model.transcribe(chunk) transcription += result['text'] + " " return transcription.strip() except Exception as e: print(f"Error transcribing audio: {e}") return None def summarize_text(text): """ Summarizes the provided text by splitting it into smaller chunks if necessary. """ try: # Split the text into chunks of 1024 tokens max_chunk_size = 1024 text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)] # Summarize each chunk and combine the summaries summaries = [] for chunk in text_chunks: summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text'] summaries.append(summary) # Combine all summaries into one combined_summary = " ".join(summaries) return combined_summary except Exception as e: print(f"Error summarizing text: {e}") return None def main(youtube_url): """ Main workflow: Downloads audio from YouTube video, transcribes it in chunks, and summarizes the transcription. """ audio_file_path = "audio.mp3" # Download video and extract audio downloaded_audio_path = download_video(youtube_url, audio_file_path) if downloaded_audio_path: # Transcribe the MP3 file in chunks transcription = transcribe_audio_in_chunks(downloaded_audio_path, chunk_len_s) if transcription: print("Transcription:", transcription) # Summarize the transcription summary = summarize_text(transcription) if summary: return transcription, summary return None, None # Streamlit interface st.title("YouTube Video Transcription and Summarization") youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=your_video_id") if st.button("Submit"): transcription, summary = main(youtube_url) if transcription: st.subheader("Transcription") st.text_area("Transcription", transcription, height=300) else: st.error("Transcription failed.") if summary: st.subheader("Summary") st.text_area("Summary", summary, height=150) else: st.error("Summary failed.")