shil51 commited on
Commit
4b63428
·
verified ·
1 Parent(s): 3d8bed6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable, NoTranscriptFound
5
+ from transformers import pipeline
6
+
7
+ # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
8
+
9
+ # Load the summarization model
10
+ # text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.float32)
11
+ text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
12
+
13
+ def split_text(text, max_tokens=1024):
14
+ words = text.split()
15
+ for i in range(0, len(words), max_tokens):
16
+ yield " ".join(words[i:i + max_tokens])
17
+
18
+ def summary(input_text):
19
+ summarized_chunks = []
20
+ for chunk in split_text(input_text):
21
+ output = text_summary(chunk)
22
+ summarized_chunks.append(output[0]['summary_text'])
23
+ return " ".join(summarized_chunks)
24
+
25
+ def extract_video_id(url):
26
+ if "youtu.be" in url:
27
+ return url.split("/")[-1]
28
+ elif "youtube.com/watch?v=" in url:
29
+ return url.split("v=")[-1].split("&")[0]
30
+ elif "youtube.com/shorts/" in url:
31
+ return url.split("/shorts/")[-1].split("?")[0]
32
+ else:
33
+ raise ValueError("Invalid YouTube URL format")
34
+
35
+ def get_youtube_transcript(video_url):
36
+ try:
37
+ video_id = extract_video_id(video_url)
38
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
39
+ text_transcript = "\n".join([entry['text'] for entry in transcript])
40
+
41
+ if not text_transcript.strip():
42
+ return "The transcript is empty or unavailable."
43
+
44
+ return summary(text_transcript)
45
+
46
+ except ValueError as e:
47
+ return f"Error: {e}"
48
+ except TranscriptsDisabled:
49
+ return "Transcript is disabled for this video."
50
+ except VideoUnavailable:
51
+ return "Video is unavailable."
52
+ except NoTranscriptFound:
53
+ return "No transcript found for this video."
54
+
55
+ # if __name__ == "__main__":
56
+ # youtube_url = input("Enter YouTube URL: ").strip()
57
+ # transcript_summary = get_youtube_transcript(youtube_url)
58
+ # print("\n=== Summary ===\n")
59
+ # print(transcript_summary)
60
+
61
+ gr.close_all()
62
+
63
+ demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input Youtube video url to summarize", lines=2)],
64
+ outputs=[gr.Textbox(label="Summarized text",lines=6)],
65
+ title="GenAI Project 2: Video to Text Summarizer",
66
+ description="This application is use to summarized the text from youtube video")
67
+
68
+ demo.launch()