import re from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter import torch import gradio as gr from transformers import pipeline # Auto-select device device = 0 if torch.cuda.is_available() else -1 # Load summarization model text_summary = pipeline( "summarization", model="sshleifer/distilbart-cnn-12-6", device=device ) def chunk_text(text, max_chunk_length=800): """ Splits text into chunks without breaking sentences. """ chunks = [] while len(text) > 0: part = text[:max_chunk_length] last_period = part.rfind(".") if last_period != -1: part = text[:last_period + 1] chunks.append(part.strip()) text = text[len(part):].strip() return chunks def summary(input_text): """ Summarizes long text by breaking into chunks and summarizing each. """ chunks = chunk_text(input_text) summaries = [text_summary(chunk)[0]['summary_text'] for chunk in chunks] return " ".join(summaries) def extract_video_id(url): """ Extract YouTube video ID from various formats of YouTube URLs. """ regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" match = re.search(regex, url) return match.group(1) if match else None def get_youtube_transcript(video_url): """ Fetch transcript from YouTube and summarize it. """ video_id = extract_video_id(video_url) if not video_id: return "Video ID could not be extracted." try: api = YouTubeTranscriptApi() transcript = api.fetch(video_id) # FIXED for new API formatter = TextFormatter() text_transcript = formatter.format_transcript(transcript) return summary(text_transcript) except Exception as e: return f"An error occurred: {e}" # Build Gradio Interface demo = gr.Interface( fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input YouTube URL to summarize", lines=1)], outputs=[gr.Textbox(label="Summarized text", lines=6)], title="SBBY Project 2: YouTube Script Summarizer", description="Summarize any YouTube video's transcript into a concise version." ) # Launch with public link enabled demo.launch(share=True)