humbleakh's picture
Update app.py
7411260 verified
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
import re
import torch
import gradio as gr
from transformers import pipeline
#test
# Load the summarization model
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.float32)
def summary(input_text):
max_input_length = 1024
if len(input_text) > max_input_length:
input_text = input_text[:max_input_length]
try:
output = text_summary(input_text)
return output[0]['summary_text']
except IndexError as e:
print(f"Error summarizing text: {e}")
return "Error summarizing text."
# Function to extract video ID from YouTube URL
def get_video_id(url):
video_id = re.search(r"(?<=v=)[^&#]+", url) or re.search(r"(?<=be/)[^&#]+", url)
return video_id.group(0) if video_id else None
# Function to fetch transcript with error handling
def fetch_transcript(url):
video_id = get_video_id(url)
if not video_id:
return "Invalid YouTube URL"
try:
# Fetch the transcript
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = "\n".join([entry['text'] for entry in transcript])
return summary(transcript_text)
except TranscriptsDisabled:
return "Transcripts are disabled for this video."
except Exception as e:
print(f"Error: {e}")
return "An error occurred while fetching the transcript."
# Close any existing Gradio instances
gr.close_all()
# Create Gradio interface
demo = gr.Interface(
fn=fetch_transcript,
inputs=[gr.Textbox(lines=2, label="Input YouTube URL to summarize")],
outputs=[gr.Textbox(lines=7, label="Summarized Text")],
title="YouTube Script Summarization",
theme="soft",
description="Summarize any YouTube video in seconds! "
)
# Launch app
demo.launch()