Spaces:
Runtime error
Runtime error
Commit
·
521e17f
1
Parent(s):
ad19100
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,7 +17,6 @@ nltk.download('stopwords')
|
|
| 17 |
from PIL import Image
|
| 18 |
from PIL import ImageDraw
|
| 19 |
from PIL import ImageFont
|
| 20 |
-
import time
|
| 21 |
|
| 22 |
|
| 23 |
if not os.path.exists('./transcripts'):
|
|
@@ -151,10 +150,9 @@ def clean_text(link,start,end):
|
|
| 151 |
return texts
|
| 152 |
|
| 153 |
sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
|
|
|
|
| 154 |
text = segment(transcript.at[0,'text'])
|
| 155 |
-
|
| 156 |
for i in range(len(text)):
|
| 157 |
-
#st.write('iteration no: ',i)
|
| 158 |
sf.loc[i, 'Segmented_Text'] = text[i]
|
| 159 |
sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
|
| 160 |
|
|
@@ -166,7 +164,6 @@ def clean_text(link,start,end):
|
|
| 166 |
return texts
|
| 167 |
|
| 168 |
for i in range(len(sf)):
|
| 169 |
-
st.write(sf.at[i, 'Segmented_Text'])
|
| 170 |
sf.loc[i, 'Segmented_Text'] = word_seg(sf.at[i, 'Segmented_Text'])
|
| 171 |
sf.loc[i, 'Lengths'] = len(tokenizer(sf.at[i, 'Segmented_Text'])['input_ids'])
|
| 172 |
|
|
@@ -203,11 +200,8 @@ def clean_text(link,start,end):
|
|
| 203 |
def t5_summarizer(link,start, end):
|
| 204 |
input_text = clean_text(link,start,end)
|
| 205 |
lst_outputs = []
|
| 206 |
-
tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/
|
| 207 |
-
|
| 208 |
-
start_time = time.time()
|
| 209 |
-
model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
|
| 210 |
-
st.write('Model loading compelete, time taken: ',time.time()-start_time)
|
| 211 |
summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
|
| 212 |
print(f""" Entered summarizer ! """)
|
| 213 |
st.write('Below is the summary of the given URL: ')
|
|
|
|
| 17 |
from PIL import Image
|
| 18 |
from PIL import ImageDraw
|
| 19 |
from PIL import ImageFont
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
if not os.path.exists('./transcripts'):
|
|
|
|
| 150 |
return texts
|
| 151 |
|
| 152 |
sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
|
| 153 |
+
|
| 154 |
text = segment(transcript.at[0,'text'])
|
|
|
|
| 155 |
for i in range(len(text)):
|
|
|
|
| 156 |
sf.loc[i, 'Segmented_Text'] = text[i]
|
| 157 |
sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
|
| 158 |
|
|
|
|
| 164 |
return texts
|
| 165 |
|
| 166 |
for i in range(len(sf)):
|
|
|
|
| 167 |
sf.loc[i, 'Segmented_Text'] = word_seg(sf.at[i, 'Segmented_Text'])
|
| 168 |
sf.loc[i, 'Lengths'] = len(tokenizer(sf.at[i, 'Segmented_Text'])['input_ids'])
|
| 169 |
|
|
|
|
| 200 |
def t5_summarizer(link,start, end):
|
| 201 |
input_text = clean_text(link,start,end)
|
| 202 |
lst_outputs = []
|
| 203 |
+
tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5-large_3e-4")
|
| 204 |
+
model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5-large_3e-4")
|
|
|
|
|
|
|
|
|
|
| 205 |
summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
|
| 206 |
print(f""" Entered summarizer ! """)
|
| 207 |
st.write('Below is the summary of the given URL: ')
|