pavan-genai's picture
Update app.py
00cac89 verified
# Code Generated by Sidekick is for learning and experimentation purposes only.
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
import torch
from transformers import pipeline
import gradio as gr
# model_path = "models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
# Code Generated by Sidekick is for learning and experimentation purposes only.
def summary(input_text, max_chunk_length=800):
chunks = []
while len(input_text) > max_chunk_length:
split_point = input_text[:max_chunk_length].rfind('.')
if split_point == -1:
split_point = max_chunk_length
chunks.append(input_text[:split_point+1])
input_text = input_text[split_point+1:]
if input_text:
chunks.append(input_text)
summaries = []
for i, chunk in enumerate(chunks):
try:
output = text_summary(chunk)
if output and 'summary_text' in output[0]:
summaries.append(output[0]['summary_text'])
else:
print(f"Warning: No summary returned for chunk {i}.")
except Exception as e:
print(f"Error summarizing chunk {i}: {e}")
return " ".join(summaries)
def get_video_id(youtube_url):
query = urlparse(youtube_url)
if query.hostname == 'youtu.be':
return query.path[1:]
if query.hostname in ('www.youtube.com', 'youtube.com'):
if query.path == '/watch':
return parse_qs(query.query)['v'][0]
elif query.path[:7] == '/embed/':
return query.path.split('/')[2]
elif query.path[:3] == '/v/':
return query.path.split('/')[2]
return None
def get_transcript(youtube_url):
video_id = get_video_id(youtube_url)
if not video_id:
print("Invalid YouTube URL.")
return
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
full_transcript = " ".join([entry['text'] for entry in transcript])
# print(full_transcript)
summary_text = summary(full_transcript)
return summary_text
except Exception as e:
print(f"Could not retrieve transcript: {e}")
gr.close_all()
# demo = gr.Interface(fn=summary, inputs="text",outputs="text")
demo = gr.Interface(fn=get_transcript,
inputs=[gr.Textbox(label="Input Youtube URL to summarize", lines=1)],
outputs=[gr.Textbox(label="Summarized text", lines=4)],
title="@pavan-genai Project 2: Youtube Script Summarizer",
description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO")
demo.launch()