Spaces:
Build error
Build error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import gradio as gr
|
| 3 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 4 |
+
from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable, NoTranscriptFound
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
|
| 7 |
+
# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
|
| 8 |
+
|
| 9 |
+
# Load the summarization model
|
| 10 |
+
# text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.float32)
|
| 11 |
+
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
|
| 12 |
+
|
| 13 |
+
def split_text(text, max_tokens=1024):
|
| 14 |
+
words = text.split()
|
| 15 |
+
for i in range(0, len(words), max_tokens):
|
| 16 |
+
yield " ".join(words[i:i + max_tokens])
|
| 17 |
+
|
| 18 |
+
def summary(input_text):
|
| 19 |
+
summarized_chunks = []
|
| 20 |
+
for chunk in split_text(input_text):
|
| 21 |
+
output = text_summary(chunk)
|
| 22 |
+
summarized_chunks.append(output[0]['summary_text'])
|
| 23 |
+
return " ".join(summarized_chunks)
|
| 24 |
+
|
| 25 |
+
def extract_video_id(url):
|
| 26 |
+
if "youtu.be" in url:
|
| 27 |
+
return url.split("/")[-1]
|
| 28 |
+
elif "youtube.com/watch?v=" in url:
|
| 29 |
+
return url.split("v=")[-1].split("&")[0]
|
| 30 |
+
elif "youtube.com/shorts/" in url:
|
| 31 |
+
return url.split("/shorts/")[-1].split("?")[0]
|
| 32 |
+
else:
|
| 33 |
+
raise ValueError("Invalid YouTube URL format")
|
| 34 |
+
|
| 35 |
+
def get_youtube_transcript(video_url):
|
| 36 |
+
try:
|
| 37 |
+
video_id = extract_video_id(video_url)
|
| 38 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 39 |
+
text_transcript = "\n".join([entry['text'] for entry in transcript])
|
| 40 |
+
|
| 41 |
+
if not text_transcript.strip():
|
| 42 |
+
return "The transcript is empty or unavailable."
|
| 43 |
+
|
| 44 |
+
return summary(text_transcript)
|
| 45 |
+
|
| 46 |
+
except ValueError as e:
|
| 47 |
+
return f"Error: {e}"
|
| 48 |
+
except TranscriptsDisabled:
|
| 49 |
+
return "Transcript is disabled for this video."
|
| 50 |
+
except VideoUnavailable:
|
| 51 |
+
return "Video is unavailable."
|
| 52 |
+
except NoTranscriptFound:
|
| 53 |
+
return "No transcript found for this video."
|
| 54 |
+
|
| 55 |
+
# if __name__ == "__main__":
|
| 56 |
+
# youtube_url = input("Enter YouTube URL: ").strip()
|
| 57 |
+
# transcript_summary = get_youtube_transcript(youtube_url)
|
| 58 |
+
# print("\n=== Summary ===\n")
|
| 59 |
+
# print(transcript_summary)
|
| 60 |
+
|
| 61 |
+
gr.close_all()
|
| 62 |
+
|
| 63 |
+
demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input Youtube video url to summarize", lines=2)],
|
| 64 |
+
outputs=[gr.Textbox(label="Summarized text",lines=6)],
|
| 65 |
+
title="GenAI Project 2: Video to Text Summarizer",
|
| 66 |
+
description="This application is use to summarized the text from youtube video")
|
| 67 |
+
|
| 68 |
+
demo.launch()
|