Spaces:
Sleeping
Sleeping
| # app.py | |
| # A feature-rich web app that summarizes YouTube videos using multiple models, | |
| # chunking for long transcripts, and user-controlled output length. | |
| import gradio as gr | |
| from transformers import pipeline | |
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| import time | |
| # Model and Splitter Setup | |
| MODELS = { | |
| "DistilBART (Default, Fast)": "sshleifer/distilbart-cnn-12-6", | |
| "BART (Large, More Accurate)": "facebook/bart-large-cnn", | |
| "Pegasus (Best for News/Articles)": "google/pegasus-xsum", | |
| } | |
| # Model cache to store loaded models | |
| loaded_models = {} | |
| def get_model(model_name): | |
| """ | |
| Loads a model pipeline if not already loaded and returns it. | |
| Uses 'loaded_models' dictionary as a cache for performance. | |
| """ | |
| if model_name not in MODELS: | |
| raise ValueError(f"Unknown Model: {model_name}") | |
| if model_name in loaded_models: | |
| print(f"Returning cached model: {model_name}") | |
| return loaded_models[model_name] | |
| print(f"Loading new model: {model_name} (this may take a minute)...") | |
| start_time = time.time() | |
| model_id = MODELS[model_name] | |
| summarizer = pipeline("summarization", model=model_id) | |
| loaded_models[model_name] = summarizer | |
| end_time = time.time() | |
| print(f"Model '{model_name}' loaded in {end_time - start_time:.2f} seconds.") | |
| return summarizer | |
| # Instantiate the Text Splitter | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, | |
| chunk_overlap=100 | |
| ) | |
| # Core Summarization Function | |
| def youtube_summarizer(model_name, youtube_url, min_len, max_len): | |
| """ | |
| Main function to orchestrate the summarization process. | |
| """ | |
| try: | |
| summarizer = get_model(model_name) | |
| except Exception as e: | |
| return f"Error loading model: {e}" | |
| if not youtube_url or "youtube.com/watch?v=" not in youtube_url: | |
| return "Please enter a valid YouTube video URL." | |
| try: | |
| video_id = youtube_url.split("v=")[1].split("&")[0] | |
| print(f"Fetching transcript for video ID: {video_id}") | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript = " ".join([d['text'] for d in transcript_list]) | |
| print("Transcript fetched successfully.") | |
| print("Splitting transcript into chunks...") | |
| chunks = text_splitter.split_text(transcript) | |
| print("Summarizing individual chunks...") | |
| initial_summaries = [] | |
| for chunk in chunks: | |
| chunk_summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False) | |
| initial_summaries.append(chunk_summary[0]['summary_text']) | |
| print("Creating final summary...") | |
| combined_summary_text = " ".join(initial_summaries) | |
| final_summary = summarizer( | |
| combined_summary_text, | |
| max_length=max_len, | |
| min_length=min_len, | |
| do_sample=False | |
| ) | |
| return final_summary[0]['summary_text'] | |
| except TranscriptsDisabled: | |
| return "Error: Transcripts are disabled for this video." | |
| except NoTranscriptFound: | |
| return "Error: No English transcript found for this video. The API may not support auto-generated captions." | |
| except Exception as e: | |
| return f"An unexpected error occurred: {e}" | |
| # Gradio Interface | |
| print("Creating Gradio interface...") | |
| demo = gr.Interface( | |
| fn=youtube_summarizer, | |
| inputs=[ | |
| gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="DistilBART (Default, Fast)", | |
| label="Select Summarization Model" | |
| ), | |
| gr.Textbox(label="YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=..."), | |
| gr.Slider(minimum=30, maximum=200, value=70, step=10, label="Minimum Final Summary Length"), | |
| gr.Slider(minimum=100, maximum=500, value=350, step=10, label="Maximum Final Summary Length") | |
| ], | |
| outputs=gr.Textbox(label="Video Summary", lines=10), | |
| title="Advanced YouTube Video Summarizer", | |
| description=""" | |
| **Summarize any YouTube video with your choice of AI model!** | |
| 1. Select a summarization model from the dropdown. | |
| 2. Paste a YouTube video URL. | |
| 3. Adjust the sliders to control the length of the final summary. | |
| *Note: Loading a new model for the first time may take a minute or two. Subsequent uses will be much faster.* | |
| """, | |
| allow_flagging="never", | |
| examples=[ | |
| ["DistilBART (Default, Fast)", "https://www.youtube.com/watch?v=jaYN-iwgw2g", 50, 150], | |
| ["BART (Large, More Accurate)", "https://www.youtube.com/watch?v=Yo22h_i_5kY", 100, 300] | |
| ] | |
| ) | |
| print("Launching Gradio app...") | |
| demo.launch() | |