Spaces:
Runtime error
Runtime error
Commit
·
bec231c
1
Parent(s):
4029d38
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
|
|
| 23 |
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
| 24 |
os.makedirs('./transcripts/')
|
| 25 |
|
| 26 |
-
def clean_text(link):
|
| 27 |
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
|
| 28 |
dictionary_path = pkg_resources.resource_filename(
|
| 29 |
"symspellpy", "frequency_dictionary_en_82_765.txt"
|
|
@@ -33,15 +33,9 @@ def clean_text(link):
|
|
| 33 |
def id_ts_grabber(link):
|
| 34 |
youtube_video = link.split("=")
|
| 35 |
video_id = youtube_video[1]
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
return video_id, time_stamp, end_pt
|
| 40 |
-
#print(f""" This is the video ID: {video_id} and this is the Timestamp: {time_stamp}""")
|
| 41 |
-
else:
|
| 42 |
-
time_stamp = None
|
| 43 |
-
return video_id, time_stamp
|
| 44 |
-
#print(f""" This is the video ID: {video_id} and no Timestamp was found""")
|
| 45 |
|
| 46 |
def seg_getter(data,ts,es):
|
| 47 |
starts = []
|
|
@@ -117,14 +111,14 @@ def clean_text(link):
|
|
| 117 |
transcripts.append(ccs['text'])
|
| 118 |
return transcripts
|
| 119 |
|
| 120 |
-
def transcript_collector(link):
|
| 121 |
-
vid
|
| 122 |
print(f""" Fetching the transcript """)
|
| 123 |
filename = get_cc(vid)
|
| 124 |
return transcript_creator(filename, ts, es), vid
|
| 125 |
|
| 126 |
transcript = pd.DataFrame(columns=['text', 'video_id'])
|
| 127 |
-
transcript.loc[0,'text'],transcript.loc[0,'video_id'] = transcript_collector(link)
|
| 128 |
|
| 129 |
def segment(corpus):
|
| 130 |
text_data = [re.sub(r'\[.*?\]', '', x).strip() for x in corpus]
|
|
@@ -198,6 +192,8 @@ def t5_summarizer(link,start, end):
|
|
| 198 |
sumry = list(summary[0].values())
|
| 199 |
input_text.loc[i,'Generated Summary'] = sumry[0]
|
| 200 |
return (input_text.at[i, 'Generated Summary'])
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
| 203 |
interface.launch()
|
|
|
|
| 23 |
tokenizer = AutoTokenizer.from_pretrained("t5-base")
|
| 24 |
os.makedirs('./transcripts/')
|
| 25 |
|
| 26 |
+
def clean_text(link,start,end):
|
| 27 |
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7)
|
| 28 |
dictionary_path = pkg_resources.resource_filename(
|
| 29 |
"symspellpy", "frequency_dictionary_en_82_765.txt"
|
|
|
|
| 33 |
def id_ts_grabber(link):
|
| 34 |
youtube_video = link.split("=")
|
| 35 |
video_id = youtube_video[1]
|
| 36 |
+
#print(f""" This is the video ID: {video_id} and this is the Timestamp: {time_stamp}""")
|
| 37 |
+
return video_id
|
| 38 |
+
#print(f""" This is the video ID: {video_id} and no Timestamp was found""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
def seg_getter(data,ts,es):
|
| 41 |
starts = []
|
|
|
|
| 111 |
transcripts.append(ccs['text'])
|
| 112 |
return transcripts
|
| 113 |
|
| 114 |
+
def transcript_collector(link,ts,es):
|
| 115 |
+
vid = id_ts_grabber(link)
|
| 116 |
print(f""" Fetching the transcript """)
|
| 117 |
filename = get_cc(vid)
|
| 118 |
return transcript_creator(filename, ts, es), vid
|
| 119 |
|
| 120 |
transcript = pd.DataFrame(columns=['text', 'video_id'])
|
| 121 |
+
transcript.loc[0,'text'],transcript.loc[0,'video_id'] = transcript_collector(link,start,end)
|
| 122 |
|
| 123 |
def segment(corpus):
|
| 124 |
text_data = [re.sub(r'\[.*?\]', '', x).strip() for x in corpus]
|
|
|
|
| 192 |
sumry = list(summary[0].values())
|
| 193 |
input_text.loc[i,'Generated Summary'] = sumry[0]
|
| 194 |
return (input_text.at[i, 'Generated Summary'])
|
| 195 |
+
|
| 196 |
+
textbox = gr.Textbox(label="Enter the link here !!! ", placeholder="Input text here !!! ", lines=2)
|
| 197 |
+
outbox = gr.Textbox(label = "Below is the generated summary !", placeholder="Enter a link to see a summary over here !", lines =5)
|
| 198 |
+
interface = gr.Interface(fn=t5_summarizer,inputs=textbox,outputs=outbox).launch(debug=True)
|
| 199 |
interface.launch()
|