Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ def cap(match):
|
|
| 7 |
return(match.group().capitalize())
|
| 8 |
|
| 9 |
|
| 10 |
-
def predict(input_text):
|
| 11 |
|
| 12 |
model = PunctuationModel()
|
| 13 |
output_text = model.restore_punctuation(input_text)
|
|
@@ -16,28 +16,33 @@ def predict(input_text):
|
|
| 16 |
srt_file = input_text
|
| 17 |
punctuated = output_text
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
pcnt_file_array=punctuated.split(' ')
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
regex1 = r"\bi\b"
|
| 43 |
regex2 = r"(?<=[.?!;])\s*\w"
|
|
@@ -56,7 +61,8 @@ Model restores punctuation and case i.e. of the following punctuations -- [! ? .
|
|
| 56 |
examples = ["my name is clara i live in berkeley california"]
|
| 57 |
|
| 58 |
interface = gr.Interface(fn = predict,
|
| 59 |
-
inputs = ["
|
|
|
|
| 60 |
outputs = ["text"],
|
| 61 |
title = title,
|
| 62 |
description = description,
|
|
|
|
| 7 |
return(match.group().capitalize())
|
| 8 |
|
| 9 |
|
| 10 |
+
def predict(input_text, brakes):
|
| 11 |
|
| 12 |
model = PunctuationModel()
|
| 13 |
output_text = model.restore_punctuation(input_text)
|
|
|
|
| 16 |
srt_file = input_text
|
| 17 |
punctuated = output_text
|
| 18 |
|
| 19 |
+
# if any of the line brake methods are implemented,
|
| 20 |
+
# return the text as a single line
|
| 21 |
+
pcnt_file_cr = output_text
|
|
|
|
| 22 |
|
| 23 |
+
if 'timelines' in brakes:
|
| 24 |
+
srt_file_strip=srt_file.strip()
|
| 25 |
+
srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
|
| 26 |
+
srt_file_array=srt_file_sub.split(' ')
|
| 27 |
+
pcnt_file_array=punctuated.split(' ')
|
| 28 |
+
|
| 29 |
+
# goal: restore the break points i.e. the same number of lines as the srt file
|
| 30 |
+
# this is necessary, because each line in the srt file corresponds to a frame from the video
|
| 31 |
+
if len(srt_file_array)!=len(pcnt_file_array):
|
| 32 |
+
return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
|
| 33 |
+
pcnt_file_array_hash = []
|
| 34 |
+
for idx, item in enumerate(srt_file_array):
|
| 35 |
+
if item.endswith('#'):
|
| 36 |
+
pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
|
| 37 |
+
else:
|
| 38 |
+
pcnt_file_array_hash.append(pcnt_file_array[idx])
|
| 39 |
+
|
| 40 |
+
# assemble the array back to a string
|
| 41 |
+
pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
|
| 42 |
|
| 43 |
+
|
| 44 |
+
if 'sentences' in brakes:
|
| 45 |
+
pass
|
| 46 |
|
| 47 |
regex1 = r"\bi\b"
|
| 48 |
regex2 = r"(?<=[.?!;])\s*\w"
|
|
|
|
| 61 |
examples = ["my name is clara i live in berkeley california"]
|
| 62 |
|
| 63 |
interface = gr.Interface(fn = predict,
|
| 64 |
+
inputs = [gr.CheckboxGroup(["sentences", "timelines"], label="brakes"),
|
| 65 |
+
"text"],
|
| 66 |
outputs = ["text"],
|
| 67 |
title = title,
|
| 68 |
description = description,
|