Spaces:
Sleeping
Sleeping
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled | |
| import re | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| #test | |
| # Load the summarization model | |
| text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.float32) | |
| def summary(input_text): | |
| max_input_length = 1024 | |
| if len(input_text) > max_input_length: | |
| input_text = input_text[:max_input_length] | |
| try: | |
| output = text_summary(input_text) | |
| return output[0]['summary_text'] | |
| except IndexError as e: | |
| print(f"Error summarizing text: {e}") | |
| return "Error summarizing text." | |
| # Function to extract video ID from YouTube URL | |
| def get_video_id(url): | |
| video_id = re.search(r"(?<=v=)[^&#]+", url) or re.search(r"(?<=be/)[^&#]+", url) | |
| return video_id.group(0) if video_id else None | |
| # Function to fetch transcript with error handling | |
| def fetch_transcript(url): | |
| video_id = get_video_id(url) | |
| if not video_id: | |
| return "Invalid YouTube URL" | |
| try: | |
| # Fetch the transcript | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript_text = "\n".join([entry['text'] for entry in transcript]) | |
| return summary(transcript_text) | |
| except TranscriptsDisabled: | |
| return "Transcripts are disabled for this video." | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| return "An error occurred while fetching the transcript." | |
| # Close any existing Gradio instances | |
| gr.close_all() | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=fetch_transcript, | |
| inputs=[gr.Textbox(lines=2, label="Input YouTube URL to summarize")], | |
| outputs=[gr.Textbox(lines=7, label="Summarized Text")], | |
| title="YouTube Script Summarization", | |
| theme="soft", | |
| description="Summarize any YouTube video in seconds! " | |
| ) | |
| # Launch app | |
| demo.launch() | |