Spaces:
Sleeping
Sleeping
| # Code Generated by Sidekick is for learning and experimentation purposes only. | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from urllib.parse import urlparse, parse_qs | |
| import torch | |
| from transformers import pipeline | |
| import gradio as gr | |
| # model_path = "models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" | |
| text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) | |
| # Code Generated by Sidekick is for learning and experimentation purposes only. | |
| def summary(input_text, max_chunk_length=800): | |
| chunks = [] | |
| while len(input_text) > max_chunk_length: | |
| split_point = input_text[:max_chunk_length].rfind('.') | |
| if split_point == -1: | |
| split_point = max_chunk_length | |
| chunks.append(input_text[:split_point+1]) | |
| input_text = input_text[split_point+1:] | |
| if input_text: | |
| chunks.append(input_text) | |
| summaries = [] | |
| for i, chunk in enumerate(chunks): | |
| try: | |
| output = text_summary(chunk) | |
| if output and 'summary_text' in output[0]: | |
| summaries.append(output[0]['summary_text']) | |
| else: | |
| print(f"Warning: No summary returned for chunk {i}.") | |
| except Exception as e: | |
| print(f"Error summarizing chunk {i}: {e}") | |
| return " ".join(summaries) | |
| def get_video_id(youtube_url): | |
| query = urlparse(youtube_url) | |
| if query.hostname == 'youtu.be': | |
| return query.path[1:] | |
| if query.hostname in ('www.youtube.com', 'youtube.com'): | |
| if query.path == '/watch': | |
| return parse_qs(query.query)['v'][0] | |
| elif query.path[:7] == '/embed/': | |
| return query.path.split('/')[2] | |
| elif query.path[:3] == '/v/': | |
| return query.path.split('/')[2] | |
| return None | |
| def get_transcript(youtube_url): | |
| video_id = get_video_id(youtube_url) | |
| if not video_id: | |
| print("Invalid YouTube URL.") | |
| return | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| full_transcript = " ".join([entry['text'] for entry in transcript]) | |
| # print(full_transcript) | |
| summary_text = summary(full_transcript) | |
| return summary_text | |
| except Exception as e: | |
| print(f"Could not retrieve transcript: {e}") | |
| gr.close_all() | |
| # demo = gr.Interface(fn=summary, inputs="text",outputs="text") | |
| demo = gr.Interface(fn=get_transcript, | |
| inputs=[gr.Textbox(label="Input Youtube URL to summarize", lines=1)], | |
| outputs=[gr.Textbox(label="Summarized text", lines=4)], | |
| title="@pavan-genai Project 2: Youtube Script Summarizer", | |
| description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO") | |
| demo.launch() |