# Code Generated by Sidekick is for learning and experimentation purposes only. from youtube_transcript_api import YouTubeTranscriptApi from urllib.parse import urlparse, parse_qs import torch from transformers import pipeline import gradio as gr # model_path = "models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) # Code Generated by Sidekick is for learning and experimentation purposes only. def summary(input_text, max_chunk_length=800): chunks = [] while len(input_text) > max_chunk_length: split_point = input_text[:max_chunk_length].rfind('.') if split_point == -1: split_point = max_chunk_length chunks.append(input_text[:split_point+1]) input_text = input_text[split_point+1:] if input_text: chunks.append(input_text) summaries = [] for i, chunk in enumerate(chunks): try: output = text_summary(chunk) if output and 'summary_text' in output[0]: summaries.append(output[0]['summary_text']) else: print(f"Warning: No summary returned for chunk {i}.") except Exception as e: print(f"Error summarizing chunk {i}: {e}") return " ".join(summaries) def get_video_id(youtube_url): query = urlparse(youtube_url) if query.hostname == 'youtu.be': return query.path[1:] if query.hostname in ('www.youtube.com', 'youtube.com'): if query.path == '/watch': return parse_qs(query.query)['v'][0] elif query.path[:7] == '/embed/': return query.path.split('/')[2] elif query.path[:3] == '/v/': return query.path.split('/')[2] return None def get_transcript(youtube_url): video_id = get_video_id(youtube_url) if not video_id: print("Invalid YouTube URL.") return try: transcript = YouTubeTranscriptApi.get_transcript(video_id) full_transcript = " ".join([entry['text'] for entry in transcript]) # print(full_transcript) summary_text = summary(full_transcript) return summary_text except Exception as e: print(f"Could not retrieve transcript: {e}") gr.close_all() # demo = gr.Interface(fn=summary, inputs="text",outputs="text") demo = gr.Interface(fn=get_transcript, inputs=[gr.Textbox(label="Input Youtube URL to summarize", lines=1)], outputs=[gr.Textbox(label="Summarized text", lines=4)], title="@pavan-genai Project 2: Youtube Script Summarizer", description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO") demo.launch()