leninjr's picture
Create app.py
22d27ba verified
import streamlit as st
import whisper
from moviepy.editor import VideoFileClip
import torch
import os
import yt_dlp # Updated import
from transformers import pipeline
# Load the Whisper model once with GPU support
device = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model("base", device=device) # Choose appropriate model size
# Load the summarization pipeline
summarizer = pipeline("summarization")
# Define chunk length in seconds
chunk_len_s = 10
def download_video(youtube_url, audio_file_path):
"""
Downloads a YouTube video and extracts audio, saving it as an MP3 file.
"""
try:
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': audio_file_path.replace('.mp3', '') + '.%(ext)s', # Ensure correct extension handling
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([youtube_url])
final_audio_file_path = audio_file_path if audio_file_path.endswith('.mp3') else audio_file_path + '.mp3'
print(f"Audio downloaded and saved as {final_audio_file_path}")
return final_audio_file_path
except Exception as e:
print(f"Error downloading video: {e}")
return None
def transcribe_audio_in_chunks(audio_file_path, chunk_len_s):
"""
Transcribes a provided audio file in chunks using the loaded Whisper model.
"""
try:
if not os.path.exists(audio_file_path):
print(f"Error: MP3 file {audio_file_path} not found")
return None
# Load and preprocess the audio file
audio = whisper.load_audio(audio_file_path)
audio_length = len(audio) / whisper.audio.SAMPLE_RATE
# Transcribe the audio in chunks
transcription = ""
for start in range(0, int(audio_length), chunk_len_s):
end = min(start + chunk_len_s, int(audio_length))
chunk = audio[int(start * whisper.audio.SAMPLE_RATE):int(end * whisper.audio.SAMPLE_RATE)]
chunk = whisper.pad_or_trim(chunk)
result = model.transcribe(chunk)
transcription += result['text'] + " "
return transcription.strip()
except Exception as e:
print(f"Error transcribing audio: {e}")
return None
def summarize_text(text):
"""
Summarizes the provided text by splitting it into smaller chunks if necessary.
"""
try:
# Split the text into chunks of 1024 tokens
max_chunk_size = 1024
text_chunks = [text[i:i + max_chunk_size] for i in range(0, len(text), max_chunk_size)]
# Summarize each chunk and combine the summaries
summaries = []
for chunk in text_chunks:
summary = summarizer(chunk, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
summaries.append(summary)
# Combine all summaries into one
combined_summary = " ".join(summaries)
return combined_summary
except Exception as e:
print(f"Error summarizing text: {e}")
return None
def main(youtube_url):
"""
Main workflow: Downloads audio from YouTube video, transcribes it in chunks, and summarizes the transcription.
"""
audio_file_path = "audio.mp3"
# Download video and extract audio
downloaded_audio_path = download_video(youtube_url, audio_file_path)
if downloaded_audio_path:
# Transcribe the MP3 file in chunks
transcription = transcribe_audio_in_chunks(downloaded_audio_path, chunk_len_s)
if transcription:
print("Transcription:", transcription)
# Summarize the transcription
summary = summarize_text(transcription)
if summary:
return transcription, summary
return None, None
# Streamlit interface
st.title("YouTube Video Transcription and Summarization")
youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=your_video_id")
if st.button("Submit"):
transcription, summary = main(youtube_url)
if transcription:
st.subheader("Transcription")
st.text_area("Transcription", transcription, height=300)
else:
st.error("Transcription failed.")
if summary:
st.subheader("Summary")
st.text_area("Summary", summary, height=150)
else:
st.error("Summary failed.")