pavan-genai commited on
Commit
0fc48fe
·
verified ·
1 Parent(s): a0ad3fc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code Generated by Sidekick is for learning and experimentation purposes only.
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ from urllib.parse import urlparse, parse_qs
4
+ import torch
5
+ from transformers import pipeline
6
+ import gradio as gr
7
+
8
+ model_path = "models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff"
9
+ text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16)
10
+
11
+ # Code Generated by Sidekick is for learning and experimentation purposes only.
12
+ def summary(input_text, max_chunk_length=800):
13
+ chunks = []
14
+ while len(input_text) > max_chunk_length:
15
+ split_point = input_text[:max_chunk_length].rfind('.')
16
+ if split_point == -1:
17
+ split_point = max_chunk_length
18
+ chunks.append(input_text[:split_point+1])
19
+ input_text = input_text[split_point+1:]
20
+ if input_text:
21
+ chunks.append(input_text)
22
+ summaries = []
23
+ for i, chunk in enumerate(chunks):
24
+ try:
25
+ output = text_summary(chunk)
26
+ if output and 'summary_text' in output[0]:
27
+ summaries.append(output[0]['summary_text'])
28
+ else:
29
+ print(f"Warning: No summary returned for chunk {i}.")
30
+ except Exception as e:
31
+ print(f"Error summarizing chunk {i}: {e}")
32
+ return " ".join(summaries)
33
+
34
+
35
+ def get_video_id(youtube_url):
36
+ query = urlparse(youtube_url)
37
+ if query.hostname == 'youtu.be':
38
+ return query.path[1:]
39
+ if query.hostname in ('www.youtube.com', 'youtube.com'):
40
+ if query.path == '/watch':
41
+ return parse_qs(query.query)['v'][0]
42
+ elif query.path[:7] == '/embed/':
43
+ return query.path.split('/')[2]
44
+ elif query.path[:3] == '/v/':
45
+ return query.path.split('/')[2]
46
+ return None
47
+
48
+ def get_transcript(youtube_url):
49
+ video_id = get_video_id(youtube_url)
50
+ if not video_id:
51
+ print("Invalid YouTube URL.")
52
+ return
53
+ try:
54
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
55
+ full_transcript = " ".join([entry['text'] for entry in transcript])
56
+ # print(full_transcript)
57
+ summary_text = summary(full_transcript)
58
+ return summary_text
59
+ except Exception as e:
60
+ print(f"Could not retrieve transcript: {e}")
61
+
62
+ if __name__ == "__main__":
63
+ # url = input("Enter YouTube video URL: ")
64
+ # url = "https://www.youtube.com/watch?v=1CwHTRGLyjE"
65
+ # summarized_text = get_transcript(url)
66
+ # if summarized_text:
67
+ # print("Summary:\n", summarized_text)
68
+ gr.close_all()
69
+
70
+ # demo = gr.Interface(fn=summary, inputs="text",outputs="text")
71
+ demo = gr.Interface(fn=get_transcript,
72
+ inputs=[gr.Textbox(label="Input Youtube URL to summarize", lines=1)],
73
+ outputs=[gr.Textbox(label="Summarized text", lines=4)],
74
+ title="@pavan-genai Project 2: Youtube Script Summarizer",
75
+ description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO")
76
+ demo.launch()