Spaces:

divython
/

yt-video-summariser

Sleeping

App Files Files Community

divython commited on Jun 30, 2025

Commit

a509a5a

verified ·

1 Parent(s): 3009fd5

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -82

app.py CHANGED Viewed

@@ -6,40 +6,80 @@ import subprocess
 from transformers import pipeline
 # --- Configuration ---
-# Choose a smaller Whisper model for Hugging Face Spaces to avoid out-of-memory errors.
-# 'base' or 'small' are good starting points. 'medium' or 'large' might require more resources.
 WHISPER_MODEL_SIZE = "base"
 # Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
 # of performance and size for summarization.
 SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
-# Define the path to the cookies file. Make sure you have uploaded 'cookies.txt'
-# to the root directory of your Hugging Face Space.
 COOKIES_FILE_PATH = "cookies.txt"
-# --- Load Models (once at startup) ---
-# Load the Whisper ASR model
-print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
-whisper_model = whisper.load_model(WHISPER_MODEL_SIZE)
-print("Whisper model loaded.")
-# Load the summarization pipeline
-print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
-summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
-print("Summarization model loaded.")
-# --- Core Functions ---
-def download_and_extract_audio(youtube_url):
     """
-    Downloads a YouTube video and extracts its audio.
-    Returns a tuple: (path to extracted audio file or None, error message or None).
     """
-    video_id = youtube_url.split("v=")[-1].split("&")[0] # Extract video ID
     audio_path = f"/tmp/{video_id}.mp3"
-    # yt-dlp options to download best audio only
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
@@ -51,46 +91,45 @@ def download_and_extract_audio(youtube_url):
         'noplaylist': True,
         'quiet': True,
         'no_warnings': True,
     }
-    # Add cookies if the file exists
     if os.path.exists(COOKIES_FILE_PATH):
         ydl_opts['cookiefile'] = COOKIES_FILE_PATH
-        print(f"Using cookies from {COOKIES_FILE_PATH}")
     else:
-        print(f"Cookies file not found at {COOKIES_FILE_PATH}. Proceeding without cookies.")
     try:
-        print(f"Downloading audio for {youtube_url} to {audio_path}...")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([youtube_url])
-        print("Audio download and extraction complete.")
-        return audio_path, None
     except yt_dlp.utils.DownloadError as e:
-        error_message = f"Download Error: {e.exc_info[1].msg if e.exc_info else str(e)}"
         print(error_message)
         return None, error_message
     except Exception as e:
-        error_message = f"An unexpected error occurred during download: {str(e)}"
         print(error_message)
         return None, error_message
-def transcribe_audio(audio_file_path):
-    """
-    Transcribes the given audio file using the loaded Whisper model.
-    Returns the transcribed text.
-    """
-    print(f"Transcribing audio from {audio_file_path} using Whisper...")
-    try:
-        # Transcribe using the loaded Whisper model
-        result = whisper_model.transcribe(audio_file_path, fp16=False) # fp16=False for CPU inference
-        transcript = result["text"]
-        print("Transcription complete.")
-        return transcript
-    except Exception as e:
-        print(f"Error during transcription: {e}")
-        return "Transcription failed."
 def summarize_text(text):
     """
@@ -99,64 +138,75 @@ def summarize_text(text):
     """
     print("Summarizing text...")
     try:
-        # The summarizer pipeline can handle long texts by chunking them internally,
-        # but for very long videos, it might still struggle or be slow.
-        # min_length and max_length control the summary length.
-        summary = summarizer(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
         print("Summarization complete.")
         return summary
     except Exception as e:
         print(f"Error during summarization: {e}")
         return "Summarization failed."
 def process_youtube_video(youtube_url):
     """
-    Main function to process the YouTube video: download, transcribe, and summarize.
     """
-    # 1. Download and Extract Audio
-    audio_file_path, download_error = download_and_extract_audio(youtube_url)
-    if download_error:
-        return f"Failed to download or extract audio: {download_error}", "N/A"
-    if not audio_file_path or not os.path.exists(audio_file_path):
-        return "Failed to download or extract audio due to an unknown reason.", "N/A"
-    # 2. Transcribe Audio
-    transcript = transcribe_audio(audio_file_path)
-    # 3. Summarize Transcript
-    summary = summarize_text(transcript)
-    # 4. Clean up temporary audio file
-    if os.path.exists(audio_file_path):
-        os.remove(audio_file_path)
-        print(f"Cleaned up {audio_file_path}")
-    return transcript, summary
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=process_youtube_video,
     inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
     outputs=[
-        gr.Textbox(label="Full Transcript", lines=15),
-        gr.Textbox(label="Summary/Notes", lines=10)
     ],
-    title="Mini NotebookLM: YouTube Video Summarizer",
     description=(
-        "Enter a YouTube video URL, and this tool will download its audio, "
-        "transcribe it using OpenAI Whisper, and then generate a summary/notes."
-        "<br><b>Note:</b> If you encounter download issues (e.g., 'Sign in to confirm you’re not a bot'), "
-        "ensure you have uploaded a `cookies.txt` file (exported from your browser) to the root of this Hugging Face Space. "
-        "You can typically export cookies using browser extensions like 'Get cookies.txt' or similar."
-        "<br><b>Performance:</b> Analyzing long videos (e.g., 1 hour+) can take a significant amount of time "
-        "and consume considerable resources, especially on CPU-only Hugging Face Spaces. "
-        "For faster processing of long videos, consider upgrading your Hugging Face Space to a GPU instance."
     ),
-    allow_flagging="auto", # Allows users to flag results for review
     examples=[
-        ["https://www.youtube.com/watch?v=jNQXAC9IVRw"], # Updated example: A short educational video
     ]
 )
-# Launch the Gradio app
-iface.launch()

 from transformers import pipeline
 # --- Configuration ---
+# Using 'base' Whisper model for significantly reduced resource usage.
+# This is ideal for free Colab tiers or Hugging Face Spaces with limited CPU/GPU.
 WHISPER_MODEL_SIZE = "base"
 # Choose a summarization model. 'sshleifer/distilbart-cnn-12-6' is a good balance
 # of performance and size for summarization.
 SUMMARIZATION_MODEL = "sshleifer/distilbart-cnn-12-6"
+# Path to your downloaded cookies.txt file.
+# IMPORTANT: You MUST upload 'cookies.txt' (exported from your browser after logging into YouTube)
+# to the root directory of your Colab notebook or Hugging Face Space for this to work.
 COOKIES_FILE_PATH = "cookies.txt"
+# --- Global Variables for Models (loaded once) ---
+whisper_model = None
+summarizer_pipeline = None
+# --- Setup Function to Install Libraries and Load Models ---
+def setup_environment():
+    """Installs necessary libraries and loads AI models."""
+    print("Installing required libraries...")
+    # Use !pip install for Colab
+    !pip install -q gradio yt-dlp openai-whisper transformers ffmpeg-python
+    global whisper_model, summarizer_pipeline
+    if whisper_model is None:
+        print(f"Loading Whisper model: {WHISPER_MODEL_SIZE}...")
+        try:
+            # Check for GPU and set device
+            import torch
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            print(f"Using device: {device}")
+            whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device=device)
+            print("Whisper model loaded.")
+        except Exception as e:
+            print(f"Error loading Whisper model: {e}. Falling back to CPU.")
+            whisper_model = whisper.load_model(WHISPER_MODEL_SIZE, device="cpu")
+            print("Whisper model loaded on CPU.")
+    if summarizer_pipeline is None:
+        print(f"Loading summarization model: {SUMMARIZATION_MODEL}...")
+        summarizer_pipeline = pipeline("summarization", model=SUMMARIZATION_MODEL)
+        print("Summarization model loaded.")
+# Call setup function once at the start of the Colab session
+setup_environment()
+# --- Audio Download and Transcription ---
+def download_and_transcribe_audio(youtube_url):
     """
+    Downloads audio from YouTube and transcribes it using Whisper.
+    Returns transcript or error message.
     """
+    video_id = None
+    try:
+        from urllib.parse import urlparse, parse_qs
+        parsed_url = urlparse(youtube_url)
+        if parsed_url.hostname in ['www.youtube.com', 'youtube.com', 'm.youtube.com']:
+            video_id = parse_qs(parsed_url.query).get('v')
+            if video_id:
+                video_id = video_id[0]
+        elif parsed_url.hostname == 'youtu.be':
+            video_id = parsed_url.path[1:]
+        if not video_id:
+            return None, "Invalid YouTube URL provided. Please check the format."
+    except Exception as e:
+        return None, f"Error parsing YouTube URL: {e}"
     audio_path = f"/tmp/{video_id}.mp3"
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
         'noplaylist': True,
         'quiet': True,
         'no_warnings': True,
+        'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36',
     }
     if os.path.exists(COOKIES_FILE_PATH):
         ydl_opts['cookiefile'] = COOKIES_FILE_PATH
+        print(f"Using cookies from {COOKIES_FILE_PATH} for yt-dlp download.")
     else:
+        print(f"WARNING: {COOKIES_FILE_PATH} not found. Proceeding without cookies. "
+              "Downloads may fail due to bot detection. Please upload a valid cookies.txt.")
     try:
+        print(f"Downloading audio for {youtube_url} to {audio_path} using yt-dlp...")
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([youtube_url])
+        print("Audio download complete.")
+        print(f"Transcribing audio from {audio_path} using Whisper ({WHISPER_MODEL_SIZE})...")
+        if whisper_model is None:
+            setup_environment()
+        result = whisper_model.transcribe(audio_path, fp16=False)
+        transcript = result["text"]
+        print("Transcription complete.")
+        return transcript, None
     except yt_dlp.utils.DownloadError as e:
+        error_message = f"Download Error (yt-dlp): {e.exc_info[1].msg if e.exc_info else str(e)}"
         print(error_message)
         return None, error_message
     except Exception as e:
+        error_message = f"An unexpected error occurred during audio processing: {str(e)}"
         print(error_message)
         return None, error_message
+    finally:
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
+            print(f"Cleaned up {audio_path}")
+# --- Text Summarization ---
 def summarize_text(text):
     """
     """
     print("Summarizing text...")
     try:
+        if summarizer_pipeline is None:
+            setup_environment()
+        summary = summarizer_pipeline(text, max_length=500, min_length=50, do_sample=False)[0]['summary_text']
         print("Summarization complete.")
         return summary
     except Exception as e:
         print(f"Error during summarization: {e}")
         return "Summarization failed."
+# --- Main Processing Function ---
 def process_youtube_video(youtube_url):
     """
+    Main function to process the YouTube video: download audio, transcribe, and summarize.
     """
+    full_transcript = "N/A"
+    summary_notes = "N/A"
+    if whisper_model is None or summarizer_pipeline is None:
+        setup_environment()
+        if whisper_model is None or summarizer_pipeline is None:
+            return "Error: Failed to load AI models. Please check Colab environment.", "N/A"
+    transcribed_text, audio_error = download_and_transcribe_audio(youtube_url)
+    if transcribed_text:
+        full_transcript = transcribed_text
+    else:
+        full_transcript = f"Failed to get transcript: {audio_error}"
+        return full_transcript, summary_notes
+    if full_transcript and not full_transcript.startswith("Failed to get transcript"):
+        summary_notes = summarize_text(full_transcript)
+    else:
+        summary_notes = "Cannot summarize due to failed transcription."
+    return full_transcript, summary_notes
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=process_youtube_video,
     inputs=gr.Textbox(label="Enter YouTube Video URL (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ)"),
     outputs=[
+        gr.Textbox(label="Full Transcript", lines=15, interactive=False),
+        gr.Textbox(label="Summary/Notes", lines=10, interactive=False)
     ],
+    title="Mini-Mini NotebookLM: YouTube Video Summarizer (Colab/Hugging Face)",
     description=(
+        "This is a smaller, more resource-efficient version of NotebookLM. "
+        "Enter a YouTube video URL. This tool will download its audio using `yt-dlp`, "
+        "transcribe it using OpenAI Whisper (using the smaller 'base' model), "
+        "and then generate a summary/notes."
+        "<br><br><b>Important Setup Steps (One-Time in Colab/Hugging Face Spaces):</b>"
+        "<ol>"
+        "<li><b>Export `cookies.txt` from your browser:</b> Use a browser extension like 'Get cookies.txt' (for Chrome/Firefox) "
+        "after logging into YouTube. This file contains your session cookies, which `yt-dlp` needs to bypass YouTube's bot detection.</li>"
+        "<li><b>Upload `cookies.txt` to the root directory of your Colab notebook or Hugging Face Space.</b></li>"
+        "</ol>"
+        "<b>Performance Note:</b> While this version is optimized, analyzing long videos (e.g., 1 hour+) can still take a significant amount of time "
+        "and consume considerable resources, especially on free tiers. For faster results, try shorter videos."
+        "<br><b>Troubleshooting Downloads:</b> If downloads still fail with 'Sign in to confirm you’re not a bot', "
+        "your `cookies.txt` might be invalid or expired, or YouTube's detection has become more aggressive. "
+        "There are no other direct, free, and reliable methods to bypass YouTube's restrictions without using their official APIs."
     ),
+    allow_flagging="auto",
     examples=[
+        ["https://www.youtube.com/watch?v=jNQXAC9IVRw"], # Short educational video
+        ["https://www.youtube.com/watch?v=kfS7W0-JtQo"] # Another example
     ]
 )
+iface.launch(debug=True)