Spaces:
Running
Running
| import re | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Initialize the model for summarization | |
| text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) | |
| # Function to summarize text | |
| def summary(input_text): | |
| output = text_summary(input_text) | |
| return output[0]['summary_text'] | |
| # Function to extract YouTube video ID from URL | |
| def extract_video_id(url): | |
| regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
| match = re.search(regex, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| # Function to get YouTube transcript and summary | |
| def get_youtube_transcript(video_url, summarize=True): | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| return "Video ID could not be extracted." | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| formatter = TextFormatter() | |
| text_transcript = formatter.format_transcript(transcript) | |
| if summarize: | |
| return summary(text_transcript) | |
| else: | |
| return text_transcript | |
| except Exception as e: | |
| return f"An error occurred: {e}" | |
| # Define the Gradio interface with customization | |
| def create_gradio_interface(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# YouTube Script Summarizer π€") | |
| gr.Markdown(""" | |
| Enter the video URL, and choose whether you'd like the full Script or just the summary. | |
| ### Credits: | |
| Created by **Taizun** β Providing a simple solution for video summarization! | |
| """) | |
| # Input for YouTube URL | |
| video_url_input = gr.Textbox(label="Input YouTube URL", lines=1) | |
| # Radio button for choosing output type (summary or full transcript) | |
| output_type = gr.Radio(choices=["Summary", "Full Transcript"], label="Choose Output Type", value="Summary") | |
| # Output for summarized or full transcript text | |
| output_text = gr.Textbox(label="Result", lines=6) | |
| # Submit button | |
| submit_button = gr.Button("Generate", variant="primary") | |
| # Define the action for the button press | |
| submit_button.click(fn=get_youtube_transcript, | |
| inputs=[video_url_input, output_type], | |
| outputs=[output_text]) | |
| return demo | |
| # Launch the interface with user credit | |
| demo = create_gradio_interface() | |
| demo.launch(share=True) | |