Spaces:
Build error
Build error
| import torch | |
| import gradio as gr | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable, NoTranscriptFound | |
| from transformers import pipeline | |
| # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" | |
| # Load the summarization model | |
| # text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.float32) | |
| text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) | |
| def split_text(text, max_tokens=1024): | |
| words = text.split() | |
| for i in range(0, len(words), max_tokens): | |
| yield " ".join(words[i:i + max_tokens]) | |
| def summary(input_text): | |
| summarized_chunks = [] | |
| for chunk in split_text(input_text): | |
| output = text_summary(chunk) | |
| summarized_chunks.append(output[0]['summary_text']) | |
| return " ".join(summarized_chunks) | |
| def extract_video_id(url): | |
| if "youtu.be" in url: | |
| return url.split("/")[-1] | |
| elif "youtube.com/watch?v=" in url: | |
| return url.split("v=")[-1].split("&")[0] | |
| elif "youtube.com/shorts/" in url: | |
| return url.split("/shorts/")[-1].split("?")[0] | |
| else: | |
| raise ValueError("Invalid YouTube URL format") | |
| def get_youtube_transcript(video_url): | |
| try: | |
| video_id = extract_video_id(video_url) | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| text_transcript = "\n".join([entry['text'] for entry in transcript]) | |
| if not text_transcript.strip(): | |
| return "The transcript is empty or unavailable." | |
| return summary(text_transcript) | |
| except ValueError as e: | |
| return f"Error: {e}" | |
| except TranscriptsDisabled: | |
| return "Transcript is disabled for this video." | |
| except VideoUnavailable: | |
| return "Video is unavailable." | |
| except NoTranscriptFound: | |
| return "No transcript found for this video." | |
| # if __name__ == "__main__": | |
| # youtube_url = input("Enter YouTube URL: ").strip() | |
| # transcript_summary = get_youtube_transcript(youtube_url) | |
| # print("\n=== Summary ===\n") | |
| # print(transcript_summary) | |
| gr.close_all() | |
| demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input Youtube video url to summarize", lines=2)], | |
| outputs=[gr.Textbox(label="Summarized text",lines=6)], | |
| title="GenAI Project 2: Video to Text Summarizer", | |
| description="This application is use to summarized the text from youtube video") | |
| demo.launch() |