Spaces:
Runtime error
Runtime error
Commit ·
25bf457
1
Parent(s): 3943aac
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,6 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
| 5 |
import os
|
| 6 |
import ast
|
| 7 |
import pandas as pd
|
| 8 |
-
import before_run
|
| 9 |
from segmentation import SemanticTextSegmentation
|
| 10 |
import re
|
| 11 |
from symspellpy import SymSpell
|
|
@@ -13,9 +12,12 @@ import pkg_resources
|
|
| 13 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 14 |
from torch import cuda
|
| 15 |
from transformers import pipeline
|
|
|
|
|
|
|
| 16 |
from PIL import Image
|
| 17 |
from PIL import ImageDraw
|
| 18 |
from PIL import ImageFont
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
if not os.path.exists('./transcripts'):
|
|
@@ -46,7 +48,6 @@ def clean_text(link,start,end):
|
|
| 46 |
#print(starts)
|
| 47 |
#ts_ = float(ts.strip("s&end"))
|
| 48 |
#es_ = float(es.strip(es[-1]))
|
| 49 |
-
st.write('this is the value of es: ',es)
|
| 50 |
if not(es) :
|
| 51 |
e_val = starts[-1]
|
| 52 |
else:
|
|
@@ -146,13 +147,12 @@ def clean_text(link,start,end):
|
|
| 146 |
# remove Nan
|
| 147 |
df.dropna(inplace=True)
|
| 148 |
sts = SemanticTextSegmentation(df)
|
| 149 |
-
texts
|
| 150 |
-
return texts
|
| 151 |
|
| 152 |
sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
|
| 153 |
|
| 154 |
-
text
|
| 155 |
-
st.write(i_array)
|
| 156 |
for i in range(len(text)):
|
| 157 |
sf.loc[i, 'Segmented_Text'] = text[i]
|
| 158 |
sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
|
|
@@ -201,8 +201,11 @@ def clean_text(link,start,end):
|
|
| 201 |
def t5_summarizer(link,start, end):
|
| 202 |
input_text = clean_text(link,start,end)
|
| 203 |
lst_outputs = []
|
| 204 |
-
tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
| 206 |
summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
|
| 207 |
print(f""" Entered summarizer ! """)
|
| 208 |
st.write('Below is the summary of the given URL: ')
|
|
|
|
| 5 |
import os
|
| 6 |
import ast
|
| 7 |
import pandas as pd
|
|
|
|
| 8 |
from segmentation import SemanticTextSegmentation
|
| 9 |
import re
|
| 10 |
from symspellpy import SymSpell
|
|
|
|
| 12 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 13 |
from torch import cuda
|
| 14 |
from transformers import pipeline
|
| 15 |
+
import nltk
|
| 16 |
+
nltk.download('stopwords')
|
| 17 |
from PIL import Image
|
| 18 |
from PIL import ImageDraw
|
| 19 |
from PIL import ImageFont
|
| 20 |
+
import time
|
| 21 |
|
| 22 |
|
| 23 |
if not os.path.exists('./transcripts'):
|
|
|
|
| 48 |
#print(starts)
|
| 49 |
#ts_ = float(ts.strip("s&end"))
|
| 50 |
#es_ = float(es.strip(es[-1]))
|
|
|
|
| 51 |
if not(es) :
|
| 52 |
e_val = starts[-1]
|
| 53 |
else:
|
|
|
|
| 147 |
# remove Nan
|
| 148 |
df.dropna(inplace=True)
|
| 149 |
sts = SemanticTextSegmentation(df)
|
| 150 |
+
texts = sts.get_segments()
|
| 151 |
+
return texts
|
| 152 |
|
| 153 |
sf = pd.DataFrame(columns=['Segmented_Text','video_id'])
|
| 154 |
|
| 155 |
+
text = segment(transcript.at[0,'text'])
|
|
|
|
| 156 |
for i in range(len(text)):
|
| 157 |
sf.loc[i, 'Segmented_Text'] = text[i]
|
| 158 |
sf.loc[i, 'video_id'] = transcript.at[0,'video_id']
|
|
|
|
| 201 |
def t5_summarizer(link,start, end):
|
| 202 |
input_text = clean_text(link,start,end)
|
| 203 |
lst_outputs = []
|
| 204 |
+
tokenizer1 = AutoTokenizer.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
|
| 205 |
+
st.write('Loading the model!')
|
| 206 |
+
start_time = time.time()
|
| 207 |
+
model1 = AutoModelForSeq2SeqLM.from_pretrained("CareerNinja/t5_large_3e-4_on_v2_dataset")
|
| 208 |
+
st.write('Model loading compelete, time taken: ',time.time()-start_time)
|
| 209 |
summarizer1 = pipeline("summarization", model=model1, tokenizer=tokenizer1)
|
| 210 |
print(f""" Entered summarizer ! """)
|
| 211 |
st.write('Below is the summary of the given URL: ')
|