enasyazzs commited on
Commit
040aeeb
·
verified ·
1 Parent(s): 7437f5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py CHANGED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ from youtube_transcript_api.formatters import TextFormatter
4
+ import torch
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+
8
+ text_summary = pipeline('summarization', model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
9
+
10
+ # model_path = ("../.venv/Modal/models--sshleifer--distilbart-cnn-12-6/snapshots"
11
+ # "/a4f8f3ea906ed274767e9906dbaede7531d660ff")
12
+ # text_summary = pipeline("summarization", model=model_path,
13
+ # torch_dtype=torch.bfloat16)
14
+
15
+ def summary (input):
16
+ output = text_summary(input)
17
+ return output[0]['summary_text']
18
+
19
+
20
+ def extract_video_id(url):
21
+ # Regex to extract the video ID from various YouTube URL formats
22
+ regex = r"(?:youtube\.com/(?:[^/\n\s]+/.+/|(?:v|e(?:mbed)?)/|.*[?&]v=)|youtu\.be/)([a-zA-Z0-9_-]{11})"
23
+ match = re.search(regex, url)
24
+ if match:
25
+ return match.group(1)
26
+ else:
27
+ print("❌ Could not extract video ID from the URL.")
28
+ return None
29
+
30
+
31
+ def get_youtube_transcript(video_url):
32
+ video_id = extract_video_id(video_url)
33
+ if not video_id:
34
+ return "Video ID could not be extracted."
35
+
36
+ try:
37
+ # Get the transcript using the video ID
38
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
39
+
40
+ # Format the transcript into plain text
41
+ formatter = TextFormatter()
42
+ text_transcript = formatter.format_transcript(transcript)
43
+ summary_text = summary(text_transcript)
44
+
45
+ return summary_text
46
+ except Exception as e:
47
+ print(f"⚠️ Error retrieving transcript: {e}")
48
+ return None
49
+
50
+
51
+ # Example URL (Replace this with the actual URL when using the script)
52
+ # video_url = "https://youtu.be/SP1bknhIsTc"
53
+ # print(get_youtube_transcript(video_url))
54
+
55
+ gr.close_all()
56
+
57
+ # demo = gr.Interface(fn=summary, inputs="text", outputs="text")
58
+ demo = gr.Interface(fn=get_youtube_transcript,
59
+ inputs=[gr.Textbox(label="Input YouTube Url to summarize", lines=6)],
60
+ outputs=[gr.Textbox(label="Summarized text", lines=4)],
61
+ title="@GenAILearning Project 2: YouTube Script Summarizer",
62
+ description="THIS APPLICATIONS WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO SCRIPT")
63
+
64
+ demo.launch()