sbby's picture
Update app.py
7b9dbaf verified
import re
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import torch
import gradio as gr
from transformers import pipeline
# Auto-select device
device = 0 if torch.cuda.is_available() else -1
# Load summarization model
text_summary = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-12-6",
device=device
)
def chunk_text(text, max_chunk_length=800):
"""
Splits text into chunks without breaking sentences.
"""
chunks = []
while len(text) > 0:
part = text[:max_chunk_length]
last_period = part.rfind(".")
if last_period != -1:
part = text[:last_period + 1]
chunks.append(part.strip())
text = text[len(part):].strip()
return chunks
def summary(input_text):
"""
Summarizes long text by breaking into chunks and summarizing each.
"""
chunks = chunk_text(input_text)
summaries = [text_summary(chunk)[0]['summary_text'] for chunk in chunks]
return " ".join(summaries)
def extract_video_id(url):
"""
Extract YouTube video ID from various formats of YouTube URLs.
"""
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(regex, url)
return match.group(1) if match else None
def get_youtube_transcript(video_url):
"""
Fetch transcript from YouTube and summarize it.
"""
video_id = extract_video_id(video_url)
if not video_id:
return "Video ID could not be extracted."
try:
api = YouTubeTranscriptApi()
transcript = api.fetch(video_id) # FIXED for new API
formatter = TextFormatter()
text_transcript = formatter.format_transcript(transcript)
return summary(text_transcript)
except Exception as e:
return f"An error occurred: {e}"
# Build Gradio Interface
demo = gr.Interface(
fn=get_youtube_transcript,
inputs=[gr.Textbox(label="Input YouTube URL to summarize", lines=1)],
outputs=[gr.Textbox(label="Summarized text", lines=6)],
title="SBBY Project 2: YouTube Script Summarizer",
description="Summarize any YouTube video's transcript into a concise version."
)
# Launch with public link enabled
demo.launch(share=True)