Spaces:
Sleeping
Sleeping
| import re | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Auto-select device | |
| device = 0 if torch.cuda.is_available() else -1 | |
| # Load summarization model | |
| text_summary = pipeline( | |
| "summarization", | |
| model="sshleifer/distilbart-cnn-12-6", | |
| device=device | |
| ) | |
| def chunk_text(text, max_chunk_length=800): | |
| """ | |
| Splits text into chunks without breaking sentences. | |
| """ | |
| chunks = [] | |
| while len(text) > 0: | |
| part = text[:max_chunk_length] | |
| last_period = part.rfind(".") | |
| if last_period != -1: | |
| part = text[:last_period + 1] | |
| chunks.append(part.strip()) | |
| text = text[len(part):].strip() | |
| return chunks | |
| def summary(input_text): | |
| """ | |
| Summarizes long text by breaking into chunks and summarizing each. | |
| """ | |
| chunks = chunk_text(input_text) | |
| summaries = [text_summary(chunk)[0]['summary_text'] for chunk in chunks] | |
| return " ".join(summaries) | |
| def extract_video_id(url): | |
| """ | |
| Extract YouTube video ID from various formats of YouTube URLs. | |
| """ | |
| regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
| match = re.search(regex, url) | |
| return match.group(1) if match else None | |
| def get_youtube_transcript(video_url): | |
| """ | |
| Fetch transcript from YouTube and summarize it. | |
| """ | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| return "Video ID could not be extracted." | |
| try: | |
| api = YouTubeTranscriptApi() | |
| transcript = api.fetch(video_id) # FIXED for new API | |
| formatter = TextFormatter() | |
| text_transcript = formatter.format_transcript(transcript) | |
| return summary(text_transcript) | |
| except Exception as e: | |
| return f"An error occurred: {e}" | |
| # Build Gradio Interface | |
| demo = gr.Interface( | |
| fn=get_youtube_transcript, | |
| inputs=[gr.Textbox(label="Input YouTube URL to summarize", lines=1)], | |
| outputs=[gr.Textbox(label="Summarized text", lines=6)], | |
| title="SBBY Project 2: YouTube Script Summarizer", | |
| description="Summarize any YouTube video's transcript into a concise version." | |
| ) | |
| # Launch with public link enabled | |
| demo.launch(share=True) | |