AkashKhamkar commited on
Commit
bec231c
·
1 Parent(s): 4029d38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -23,7 +23,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
23
  tokenizer = AutoTokenizer.from_pretrained("t5-base")
24
  os.makedirs('./transcripts/')
25
 
26
- def clean_text(link):
27
  sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
28
  dictionary_path = pkg_resources.resource_filename(
29
  "symspellpy", "frequency_dictionary_en_82_765.txt"
@@ -33,15 +33,9 @@ def clean_text(link):
33
  def id_ts_grabber(link):
34
  youtube_video = link.split("=")
35
  video_id = youtube_video[1]
36
- if len(youtube_video) > 2:
37
- time_stamp = youtube_video[2]
38
- end_pt = youtube_video[3]
39
- return video_id, time_stamp, end_pt
40
- #print(f""" This is the video ID: {video_id} and this is the Timestamp: {time_stamp}""")
41
- else:
42
- time_stamp = None
43
- return video_id, time_stamp
44
- #print(f""" This is the video ID: {video_id} and no Timestamp was found""")
45
 
46
  def seg_getter(data,ts,es):
47
  starts = []
@@ -117,14 +111,14 @@ def clean_text(link):
117
  transcripts.append(ccs['text'])
118
  return transcripts
119
 
120
- def transcript_collector(link):
121
- vid, ts, es = id_ts_grabber(link)
122
  print(f""" Fetching the transcript """)
123
  filename = get_cc(vid)
124
  return transcript_creator(filename, ts, es), vid
125
 
126
  transcript = pd.DataFrame(columns=['text', 'video_id'])
127
- transcript.loc[0,'text'],transcript.loc[0,'video_id'] = transcript_collector(link)
128
 
129
  def segment(corpus):
130
  text_data = [re.sub(r'\[.*?\]', '', x).strip() for x in corpus]
@@ -198,6 +192,8 @@ def t5_summarizer(link,start, end):
198
  sumry = list(summary[0].values())
199
  input_text.loc[i,'Generated Summary'] = sumry[0]
200
  return (input_text.at[i, 'Generated Summary'])
201
-
202
- interface = gr.Interface(fn=t5_summarizer,inputs=["text","text","text"],outputs=["text"]).launch(debug=True)
 
 
203
  interface.launch()
 
23
  tokenizer = AutoTokenizer.from_pretrained("t5-base")
24
  os.makedirs('./transcripts/')
25
 
26
+ def clean_text(link,start,end):
27
  sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
28
  dictionary_path = pkg_resources.resource_filename(
29
  "symspellpy", "frequency_dictionary_en_82_765.txt"
 
33
  def id_ts_grabber(link):
34
  youtube_video = link.split("=")
35
  video_id = youtube_video[1]
36
+ #print(f""" This is the video ID: {video_id} and this is the Timestamp: {time_stamp}""")
37
+ return video_id
38
+ #print(f""" This is the video ID: {video_id} and no Timestamp was found""")
 
 
 
 
 
 
39
 
40
  def seg_getter(data,ts,es):
41
  starts = []
 
111
  transcripts.append(ccs['text'])
112
  return transcripts
113
 
114
+ def transcript_collector(link,ts,es):
115
+ vid = id_ts_grabber(link)
116
  print(f""" Fetching the transcript """)
117
  filename = get_cc(vid)
118
  return transcript_creator(filename, ts, es), vid
119
 
120
  transcript = pd.DataFrame(columns=['text', 'video_id'])
121
+ transcript.loc[0,'text'],transcript.loc[0,'video_id'] = transcript_collector(link,start,end)
122
 
123
  def segment(corpus):
124
  text_data = [re.sub(r'\[.*?\]', '', x).strip() for x in corpus]
 
192
  sumry = list(summary[0].values())
193
  input_text.loc[i,'Generated Summary'] = sumry[0]
194
  return (input_text.at[i, 'Generated Summary'])
195
+
196
+ textbox = gr.Textbox(label="Enter the link here !!! ", placeholder="Input text here !!! ", lines=2)
197
+ outbox = gr.Textbox(label = "Below is the generated summary !", placeholder="Enter a link to see a summary over here !", lines =5)
198
+ interface = gr.Interface(fn=t5_summarizer,inputs=textbox,outputs=outbox).launch(debug=True)
199
  interface.launch()