sbby commited on
Commit
eae03cd
·
verified ·
1 Parent(s): 3e0e1f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -33
app.py CHANGED
@@ -5,55 +5,71 @@ import torch
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
- text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
 
9
 
10
- # model_path = ("../Models/models--sshleifer--distilbart-cnn-12-6/snapshots"
11
- # "/a4f8f3ea906ed274767e9906dbaede7531d660ff")
12
- # text_summary = pipeline("summarization", model=model_path,
13
- # torch_dtype=torch.bfloat16)
 
 
14
 
15
- def summary (input):
16
- output = text_summary(input)
17
- return output[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def extract_video_id(url):
20
- # Regex to extract the video ID from various YouTube URL formats
 
 
21
  regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
22
  match = re.search(regex, url)
23
- if match:
24
- return match.group(1)
25
- return None
26
-
27
 
28
  def get_youtube_transcript(video_url):
 
 
 
29
  video_id = extract_video_id(video_url)
30
  if not video_id:
31
  return "Video ID could not be extracted."
32
 
33
  try:
34
- # Fetch the transcript
35
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
36
-
37
- # Format the transcript into plain text
38
  formatter = TextFormatter()
39
  text_transcript = formatter.format_transcript(transcript)
40
- summary_text = summary(text_transcript)
41
-
42
- return summary_text
43
  except Exception as e:
44
  return f"An error occurred: {e}"
45
 
 
 
 
 
 
 
 
 
46
 
47
- # Example URL (Replace this with the actual URL when using the script)
48
- # video_url = "https://youtu.be/5PibknhIsTc"
49
- # print(get_youtube_transcript(video_url))
50
-
51
- gr.close_all()
52
-
53
- # demo = gr.Interface(fn=summary, inputs="text",outputs="text")
54
- demo = gr.Interface(fn=get_youtube_transcript,
55
- inputs=[gr.Textbox(label="Input YouTube Url to summarize",lines=1)],
56
- outputs=[gr.Textbox(label="Summarized text",lines=4)],
57
- title="@GenAILearniverse Project 2: YouTube Script Summarizer",
58
- description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO SCRIPT.")
59
- demo.launch()
 
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
+ # Auto-select device (GPU if available)
9
+ device = 0 if torch.cuda.is_available() else -1
10
 
11
+ # Load summarization model
12
+ text_summary = pipeline(
13
+ "summarization",
14
+ model="sshleifer/distilbart-cnn-12-6",
15
+ device=device
16
+ )
17
 
18
+ def chunk_text(text, max_chunk_length=800):
19
+ """
20
+ Splits text into chunks without breaking sentences.
21
+ """
22
+ chunks = []
23
+ while len(text) > 0:
24
+ part = text[:max_chunk_length]
25
+ last_period = part.rfind(".")
26
+ if last_period != -1:
27
+ part = text[:last_period + 1]
28
+ chunks.append(part.strip())
29
+ text = text[len(part):].strip()
30
+ return chunks
31
+
32
+ def summary(input_text):
33
+ """
34
+ Summarizes long text by breaking into chunks and summarizing each.
35
+ """
36
+ chunks = chunk_text(input_text)
37
+ summaries = [text_summary(chunk)[0]['summary_text'] for chunk in chunks]
38
+ return " ".join(summaries)
39
 
40
  def extract_video_id(url):
41
+ """
42
+ Extract YouTube video ID from various formats of YouTube URLs.
43
+ """
44
  regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
45
  match = re.search(regex, url)
46
+ return match.group(1) if match else None
 
 
 
47
 
48
  def get_youtube_transcript(video_url):
49
+ """
50
+ Fetch transcript from YouTube and summarize it.
51
+ """
52
  video_id = extract_video_id(video_url)
53
  if not video_id:
54
  return "Video ID could not be extracted."
55
 
56
  try:
57
+ # Use .fetch() instead of get_transcript (new API method)
58
+ transcript = YouTubeTranscriptApi.fetch(video_id)
 
 
59
  formatter = TextFormatter()
60
  text_transcript = formatter.format_transcript(transcript)
61
+ return summary(text_transcript)
 
 
62
  except Exception as e:
63
  return f"An error occurred: {e}"
64
 
65
+ # Build Gradio Interface
66
+ demo = gr.Interface(
67
+ fn=get_youtube_transcript,
68
+ inputs=[gr.Textbox(label="Input YouTube URL to summarize", lines=1)],
69
+ outputs=[gr.Textbox(label="Summarized text", lines=6)],
70
+ title="SBBY Project 2: YouTube Script Summarizer",
71
+ description="Summarize any YouTube video's transcript into a concise version."
72
+ )
73
 
74
+ if __name__ == "__main__":
75
+ demo.launch()