Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -153,38 +153,7 @@ def WikiSearch(term):
|
|
| 153 |
for item in termtoks:
|
| 154 |
# Search for the term on Wikipedia and get the first result
|
| 155 |
result = wikipedia.search(item, results=20)
|
| 156 |
-
return result
|
| 157 |
-
|
| 158 |
-
def find_string_positions(s, string):
|
| 159 |
-
positions = []
|
| 160 |
-
start = 0
|
| 161 |
-
while True:
|
| 162 |
-
position = s.find(string, start)
|
| 163 |
-
if position == -1:
|
| 164 |
-
break
|
| 165 |
-
positions.append(position)
|
| 166 |
-
start = position + len(string)
|
| 167 |
-
return positions
|
| 168 |
-
|
| 169 |
-
def splittext(string, split_positions):
|
| 170 |
-
split_strings = []
|
| 171 |
-
prepos = 0
|
| 172 |
-
for pos in split_positions:
|
| 173 |
-
pos -= 12
|
| 174 |
-
split_strings.append((string[prepos:pos])) #, string[pos:]))
|
| 175 |
-
prepos = pos
|
| 176 |
-
|
| 177 |
-
FinalOutput = ""
|
| 178 |
-
stoutput = ""
|
| 179 |
-
linenumber = 1
|
| 180 |
-
print(linenumber)
|
| 181 |
-
for item in split_strings[1:]:
|
| 182 |
-
stoutput = item[0:29] + "\n" + item[30:]
|
| 183 |
-
stspaces = find_string_positions(stoutput, " ")
|
| 184 |
-
FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
|
| 185 |
-
FinalOutput += "\n"
|
| 186 |
-
linenumber += 1
|
| 187 |
-
return FinalOutput[2:]
|
| 188 |
|
| 189 |
def create_dictionary(word_list, word_dict = {}):
|
| 190 |
word_list = set(word_list.split(" "))
|
|
@@ -262,6 +231,62 @@ def split_verbs_nouns(text):
|
|
| 262 |
|
| 263 |
return verbs_nouns_text, other_words_text
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
groupinput_text = gr.inputs.Textbox(lines=2, label="Enter a list of words")
|
| 266 |
groupoutput_text = gr.outputs.Textbox(label="Grouped words")
|
| 267 |
|
|
@@ -311,7 +336,8 @@ with gr.Blocks() as lliface:
|
|
| 311 |
with gr.Tab("Advanced - LingQ Addons ideas"):
|
| 312 |
gr.HTML("Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information")
|
| 313 |
with gr.Tab("Merged Subtitles"):
|
| 314 |
-
gr.
|
|
|
|
| 315 |
with gr.Row():
|
| 316 |
RomanFile = gr.File(label="Paste Roman")
|
| 317 |
W4WFile = gr.File(label="Paste Word 4 Word")
|
|
|
|
| 153 |
for item in termtoks:
|
| 154 |
# Search for the term on Wikipedia and get the first result
|
| 155 |
result = wikipedia.search(item, results=20)
|
| 156 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
def create_dictionary(word_list, word_dict = {}):
|
| 159 |
word_list = set(word_list.split(" "))
|
|
|
|
| 231 |
|
| 232 |
return verbs_nouns_text, other_words_text
|
| 233 |
|
| 234 |
+
def split_srt_file(file_path):
|
| 235 |
+
# Open the SRT file and read its contents
|
| 236 |
+
with open(file_path, 'r') as f:
|
| 237 |
+
srt_contents = f.read()
|
| 238 |
+
|
| 239 |
+
# Split the SRT file by timestamp
|
| 240 |
+
srt_sections = srt_contents.split('\n\n')
|
| 241 |
+
|
| 242 |
+
# Loop through each section of the SRT file
|
| 243 |
+
for i in range(len(srt_sections)):
|
| 244 |
+
# Split the section into its timestamp and subtitle text
|
| 245 |
+
section_lines = srt_sections[i].split('\n')
|
| 246 |
+
timestamp = section_lines[1]
|
| 247 |
+
subtitle_text = ' | '.join(section_lines[2:])
|
| 248 |
+
|
| 249 |
+
# Replace spaces in the subtitle text with " | "
|
| 250 |
+
subtitle_text = subtitle_text.replace(' ', ' | ')
|
| 251 |
+
|
| 252 |
+
# Reconstruct the section with the updated subtitle text
|
| 253 |
+
srt_sections[i] = f"{section_lines[0]}\n{timestamp}\n{subtitle_text[3:]}"
|
| 254 |
+
|
| 255 |
+
# Join the SRT sections back together into a single string
|
| 256 |
+
return '\n\n'.join(srt_sections)
|
| 257 |
+
|
| 258 |
+
def find_string_positions(s, string):
|
| 259 |
+
positions = []
|
| 260 |
+
start = 0
|
| 261 |
+
while True:
|
| 262 |
+
position = s.find(string, start)
|
| 263 |
+
if position == -1:
|
| 264 |
+
break
|
| 265 |
+
positions.append(position)
|
| 266 |
+
start = position + len(string)
|
| 267 |
+
return positions
|
| 268 |
+
|
| 269 |
+
def splittext(string):
|
| 270 |
+
split_positions = find_string_positions(string, " --> ")
|
| 271 |
+
split_strings = []
|
| 272 |
+
prepos = 0
|
| 273 |
+
for pos in split_positions:
|
| 274 |
+
pos -= 12
|
| 275 |
+
split_strings.append((string[prepos:pos])) #, string[pos:]))
|
| 276 |
+
prepos = pos
|
| 277 |
+
|
| 278 |
+
FinalOutput = ""
|
| 279 |
+
stoutput = ""
|
| 280 |
+
linenumber = 1
|
| 281 |
+
print(linenumber)
|
| 282 |
+
for item in split_strings[1:]:
|
| 283 |
+
stoutput = item[0:29] + "\n" + item[30:]
|
| 284 |
+
stspaces = find_string_positions(stoutput, " ")
|
| 285 |
+
FinalOutput += str(linenumber) + "\n" + stoutput[:stspaces[-2]] + "\n"
|
| 286 |
+
FinalOutput += "\n"
|
| 287 |
+
linenumber += 1
|
| 288 |
+
return FinalOutput[2:]
|
| 289 |
+
|
| 290 |
groupinput_text = gr.inputs.Textbox(lines=2, label="Enter a list of words")
|
| 291 |
groupoutput_text = gr.outputs.Textbox(label="Grouped words")
|
| 292 |
|
|
|
|
| 336 |
with gr.Tab("Advanced - LingQ Addons ideas"):
|
| 337 |
gr.HTML("Extra functions needed - Persitent Sentence translation, UNWFWO, POS tagging and Word Count per user of words in their account. Macaronic Text is also another way to practice only the important information")
|
| 338 |
with gr.Tab("Merged Subtitles"):
|
| 339 |
+
gr.Interface(fn=split_srt_file, inputs="file", outputs="text", title="Text for w4w creation in G Translate")
|
| 340 |
+
gr.Interface(fn=splittext, inputs="file", outputs="text", title="Text for w4w creation in G Translate"))
|
| 341 |
with gr.Row():
|
| 342 |
RomanFile = gr.File(label="Paste Roman")
|
| 343 |
W4WFile = gr.File(label="Paste Word 4 Word")
|