AkashKhamkar commited on
Commit
25bf457
·
1 Parent(s): 3943aac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -5,7 +5,6 @@ from youtube_transcript_api import YouTubeTranscriptApi
5
  import os
6
  import ast
7
  import pandas as pd
8
- import before_run
9
  from segmentation import SemanticTextSegmentation
10
  import re
11
  from symspellpy import SymSpell
@@ -13,9 +12,12 @@ import pkg_resources
13
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
14
  from torch import cuda
15
  from transformers import pipeline
 
 
16
  from PIL import Image
17
  from PIL import ImageDraw
18
  from PIL import ImageFont
 
19
 
20
 
21
  if not os.path.exists('./transcripts'):
@@ -46,7 +48,6 @@ def clean_text(link,start,end):
46
  #print(starts)
47
  #ts_ = float(ts.strip("s&end"))
48
  #es_ = float(es.strip(es[-1]))
49
- st.write('this is the value of es: ',es)
50
  if not(es) :
51
  e_val = starts[-1]
52
  else:
@@ -146,13 +147,12 @@ def clean_text(link,start,end):
146
  # remove Nan
147
  df.dropna(inplace=True)
148
  sts = SemanticTextSegmentation(df)
149
- texts,i_r = sts.get_segments()
150
- return texts,i_r
151
 
152
  sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
153
 
154
- text,i_array = segment(transcript.at[0,'text'])
155
- st.write(i_array)
156
  for i in range(len(text)):
157
  sf.loc[i, 'Segmented_Text'] = text[i]
158
  sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
@@ -201,8 +201,11 @@ def clean_text(link,start,end):
201
  def t5_summarizer(link,start, end):
202
  input_text = clean_text(link,start,end)
203
  lst_outputs = []
204
- tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5-large_3e-4")
205
- model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5-large_3e-4")
 
 
 
206
  summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
207
  print(f""" Entered summarizer ! """)
208
  st.write('Below is the summary of the given URL: ')
 
5
  import os
6
  import ast
7
  import pandas as pd
 
8
  from segmentation import SemanticTextSegmentation
9
  import re
10
  from symspellpy import SymSpell
 
12
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
13
  from torch import cuda
14
  from transformers import pipeline
15
+ import nltk
16
+ nltk.download('stopwords')
17
  from PIL import Image
18
  from PIL import ImageDraw
19
  from PIL import ImageFont
20
+ import time
21
 
22
 
23
  if not os.path.exists('./transcripts'):
 
48
  #print(starts)
49
  #ts_ = float(ts.strip("s&end"))
50
  #es_ = float(es.strip(es[-1]))
 
51
  if not(es) :
52
  e_val = starts[-1]
53
  else:
 
147
  # remove Nan
148
  df.dropna(inplace=True)
149
  sts = SemanticTextSegmentation(df)
150
+ texts = sts.get_segments()
151
+ return texts
152
 
153
  sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
154
 
155
+ text = segment(transcript.at[0,'text'])
 
156
  for i in range(len(text)):
157
  sf.loc[i, 'Segmented_Text'] = text[i]
158
  sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
 
201
  def t5_summarizer(link,start, end):
202
  input_text = clean_text(link,start,end)
203
  lst_outputs = []
204
+ tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
205
+ st.write('Loading the model!')
206
+ start_time = time.time()
207
+ model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
208
+ st.write('Model loading compelete, time taken: ',time.time()-start_time)
209
  summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
210
  print(f""" Entered summarizer ! """)
211
  st.write('Below is the summary of the given URL: ')