shil51's picture
Create app.py
4b63428 verified
import torch
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable, NoTranscriptFound
from transformers import pipeline
# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
# Load the summarization model
# text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.float32)
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
def split_text(text, max_tokens=1024):
words = text.split()
for i in range(0, len(words), max_tokens):
yield " ".join(words[i:i + max_tokens])
def summary(input_text):
summarized_chunks = []
for chunk in split_text(input_text):
output = text_summary(chunk)
summarized_chunks.append(output[0]['summary_text'])
return " ".join(summarized_chunks)
def extract_video_id(url):
if "youtu.be" in url:
return url.split("/")[-1]
elif "youtube.com/watch?v=" in url:
return url.split("v=")[-1].split("&")[0]
elif "youtube.com/shorts/" in url:
return url.split("/shorts/")[-1].split("?")[0]
else:
raise ValueError("Invalid YouTube URL format")
def get_youtube_transcript(video_url):
try:
video_id = extract_video_id(video_url)
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text_transcript = "\n".join([entry['text'] for entry in transcript])
if not text_transcript.strip():
return "The transcript is empty or unavailable."
return summary(text_transcript)
except ValueError as e:
return f"Error: {e}"
except TranscriptsDisabled:
return "Transcript is disabled for this video."
except VideoUnavailable:
return "Video is unavailable."
except NoTranscriptFound:
return "No transcript found for this video."
# if __name__ == "__main__":
# youtube_url = input("Enter YouTube URL: ").strip()
# transcript_summary = get_youtube_transcript(youtube_url)
# print("\n=== Summary ===\n")
# print(transcript_summary)
gr.close_all()
demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input Youtube video url to summarize", lines=2)],
outputs=[gr.Textbox(label="Summarized text",lines=6)],
title="GenAI Project 2: Video to Text Summarizer",
description="This application is use to summarized the text from youtube video")
demo.launch()