Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ from gtts import gTTS
|
|
| 15 |
from collections import Counter
|
| 16 |
from PIL import Image, ImageDraw, ImageFont
|
| 17 |
import numpy as np
|
| 18 |
-
|
| 19 |
|
| 20 |
#Uncomment these for Huggingface
|
| 21 |
nltk.download('maxent_ne_chunker') #Chunker
|
|
@@ -189,8 +189,8 @@ def merge_lines(roman_file, w4w_file, full_mean_file, macaronic_file):
|
|
| 189 |
|
| 190 |
return "\n".join(merged_lines)
|
| 191 |
|
| 192 |
-
TTSLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 193 |
-
TTSLangOptions2 = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 194 |
|
| 195 |
def TTSforListeningPractice(text, language = "en"):
|
| 196 |
speech = gTTS(text=text, lang=language, slow="False")
|
|
@@ -290,6 +290,16 @@ def split_verbs_nouns(text):
|
|
| 290 |
|
| 291 |
SRTLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
def split_srt_file(text, lang): #file_path):
|
| 294 |
# Open the SRT file and read its contents
|
| 295 |
#with open(file_path, 'r') as f:
|
|
@@ -305,6 +315,7 @@ def split_srt_file(text, lang): #file_path):
|
|
| 305 |
# Split the SRT file by timestamp
|
| 306 |
srt_sections = srt_contents.split('\n\n')
|
| 307 |
srt_sections_POSversion = []
|
|
|
|
| 308 |
|
| 309 |
# Loop through each section of the SRT file
|
| 310 |
for i in range(len(srt_sections)):
|
|
@@ -319,14 +330,27 @@ def split_srt_file(text, lang): #file_path):
|
|
| 319 |
#subtitle_text = subtitle_text.replace(' ', ' | ')
|
| 320 |
for token in sub_split_line:
|
| 321 |
subtitle_text += token.text + " | "
|
|
|
|
| 322 |
subtitle_textPOSversion += token.pos_ + " | "
|
| 323 |
|
| 324 |
# Reconstruct the section with the updated subtitle text
|
| 325 |
srt_sections[i] = f"{section_lines[0]}\n{timestamp}\n{subtitle_text[3:]}"
|
| 326 |
srt_sections_POSversion.append(f"{section_lines[0]}\n{timestamp}\n{subtitle_textPOSversion[3:]}\n\n")
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
# Join the SRT sections back together into a single string
|
| 329 |
-
return
|
| 330 |
|
| 331 |
def find_string_positions(s, string):
|
| 332 |
positions = []
|
|
@@ -463,18 +487,191 @@ def add_text_to_image(input_image, text, output_image_path="output.png", border_
|
|
| 463 |
img.save(output_image_path, "PNG")
|
| 464 |
return "output.png"
|
| 465 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 466 |
# Define the Gradio interface inputs and outputs for video split
|
| 467 |
spvvideo_file_input = gr.File(label='Video File')
|
| 468 |
spvsubtitle_file_input = gr.File(label='Subtitle File')
|
| 469 |
spvdownload_output = gr.File(label='Download Segmented Files')
|
| 470 |
|
|
|
|
|
|
|
| 471 |
|
| 472 |
-
groupinput_text = gr.
|
| 473 |
-
groupoutput_text = gr.
|
| 474 |
|
| 475 |
with gr.Blocks() as lliface:
|
| 476 |
gr.HTML("<p> Target 1: Dual audio at word Level while using repitition to train random recall --> Word level Time <br> Target 2: Video --> Split by sentence --> each word repeated (60) + each phrase (10) + each sentence (10) --> TTS file for practice --> State Management/Known word Tracker <hr> The trick is minimum one minute of focus on a new word --> Listening is hard because there are new word within seconds and you need repeated focus on each to learn </p> <p>Audio = best long form attention mechanism AS it is ANTICIPATION (Awareness of something before it happens like knowing song Lyrics) FOCUSED - Attention (Focused Repitition) + Exposure (Random Repitition) </p>")
|
| 477 |
-
gr.HTML("""<hr> <a href="https://translate.google.com/?hl=en&tab=TT"> -- Google Translate -- </a> | <a href='https://huggingface.co/spaces/damo-vilab/modelscope-text-to-video-synthesis'> -- Modelscope Text to Video -- </a> | <a href='https://huggingface.co/spaces/stabilityai/stable-diffusion'> -- stable-diffusion 2 -- </a> | <a href='https://huggingface.co/spaces/stabilityai/stable-diffusion-1'> -- stable-diffusion 1 -- </a>""")
|
| 478 |
with gr.Tab("Welcome"):
|
| 479 |
gr.HTML("""<p>Spaces Test - Still Undercontruction | Knowledge is a Language but productive knowledge is find replace as well | LingQ is good option for per word state management</p> <p> Arrows app json creator for easy knowledge graphing and spacy POS graph? --> Questions? -->
|
| 480 |
<p> ChatGPT Turns Learning into a read only what you dont know ask only what you dont know feedback loop --> All you have to do is keep track of what prompts you have asked in the past</p> """)
|
|
@@ -482,9 +679,14 @@ with gr.Blocks() as lliface:
|
|
| 482 |
gr.Interface(fn=group_words, inputs=groupinput_text, outputs=groupoutput_text, description="Word Grouping and Rotation - Group a list of words into sets of 10 and rotate them every 60 seconds.") #.queue()
|
| 483 |
gr.HTML("""HTML Version <hr> <iframe height="1200" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
|
| 484 |
See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj"> Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>) on <a href="https://codepen.io">CodePen</a>. </iframe>""")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
with gr.Tab("Unknown Tracker"):
|
| 486 |
gr.HTML("Repitition of things you know is a waste of time when theres stuff you dont know <p> In Language the goal is bigger vocab --> Knowledge equivalent = question answer pairs but to get to those you need related information pairs</p> <p> Vocab = Glossary + all non text wall(lists, diagrams, etc.)</p>")
|
| 487 |
gr.Textbox("Placeholder for a function that creates a set list and can takes a list for known words and auto find replaces the stuff you know out of the content")
|
|
|
|
| 488 |
with gr.Tab("Unique word ID - use in Infranodus"):
|
| 489 |
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", description="Wordcounter")
|
| 490 |
gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], description="Word suggestions - Analyse the unique words in infranodus")
|
|
@@ -504,6 +706,7 @@ with gr.Blocks() as lliface:
|
|
| 504 |
gr.HTML("""<a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator"> --Huggingface CLIP-Interrogator Space-- </a><br> """)
|
| 505 |
gr.Interface(fn=removeTonalMarks, inputs="text", outputs="text", description="For text with characters use this function to remove any conflicting characters (if error below)")
|
| 506 |
gr.Interface(fn=add_text_to_image , inputs=["image", "text"], outputs="image", description="Create Annotated images (Can create using stable diffusion and use the prompt)")
|
|
|
|
| 507 |
#with gr.Tab("Transcribe - RASMUS Whisper"):
|
| 508 |
#gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
|
| 509 |
with gr.Tab("Advanced - LingQ Addon Ideas"):
|
|
@@ -516,7 +719,7 @@ with gr.Blocks() as lliface:
|
|
| 516 |
#gr.HTML("<p>If Space not loaded its because of offline devopment errors please message for edit</p> <hr>")
|
| 517 |
with gr.Tab("Merged Subtitles"):
|
| 518 |
gr.HTML("Step 1 - Word for Word Translation Creation in both Directions (Paste Google Translation here)")
|
| 519 |
-
gr.Interface(fn=split_srt_file, inputs=["text", SRTLangOptions] , outputs=["text", "text"], description="SRT Contents to W4W Split SRT for Google Translate")
|
| 520 |
gr.HTML("Step 2 - Pronounciation (Roman) to Subtitle Format --> GTranslate returns unformatted string")
|
| 521 |
gr.Interface(fn=splittext, inputs="text", outputs="text", description="Text for w4w creation in G Translate")
|
| 522 |
gr.HTML("Step 3 - Merge into one file")
|
|
|
|
| 15 |
from collections import Counter
|
| 16 |
from PIL import Image, ImageDraw, ImageFont
|
| 17 |
import numpy as np
|
| 18 |
+
from docx import Document
|
| 19 |
|
| 20 |
#Uncomment these for Huggingface
|
| 21 |
nltk.download('maxent_ne_chunker') #Chunker
|
|
|
|
| 189 |
|
| 190 |
return "\n".join(merged_lines)
|
| 191 |
|
| 192 |
+
TTSLangOptions = gr.Dropdown(choices=["en", "de", "es", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 193 |
+
TTSLangOptions2 = gr.Dropdown(choices=["en", "de", "es", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 194 |
|
| 195 |
def TTSforListeningPractice(text, language = "en"):
|
| 196 |
speech = gTTS(text=text, lang=language, slow="False")
|
|
|
|
| 290 |
|
| 291 |
SRTLangOptions = gr.Dropdown(choices=["en", "ja", "ko", "zh-cn"], value="en", label="choose the language of the srt")
|
| 292 |
|
| 293 |
+
def save_string_to_file(string_to_save, file_name, srtdocx):
|
| 294 |
+
with open(file_name, 'w', encoding='utf-8') as file:
|
| 295 |
+
file.write(string_to_save)
|
| 296 |
+
if srtdocx == "True":
|
| 297 |
+
with open(file_name.split('.')[0] + '.srt', 'w', encoding='utf-8') as file:
|
| 298 |
+
file.write(string_to_save)
|
| 299 |
+
srtdocument = Document()
|
| 300 |
+
srtdocument.add_paragraph(string_to_save)
|
| 301 |
+
srtdocument.save('SplitSRT.docx')
|
| 302 |
+
|
| 303 |
def split_srt_file(text, lang): #file_path):
|
| 304 |
# Open the SRT file and read its contents
|
| 305 |
#with open(file_path, 'r') as f:
|
|
|
|
| 315 |
# Split the SRT file by timestamp
|
| 316 |
srt_sections = srt_contents.split('\n\n')
|
| 317 |
srt_sections_POSversion = []
|
| 318 |
+
subaswordlist = ""
|
| 319 |
|
| 320 |
# Loop through each section of the SRT file
|
| 321 |
for i in range(len(srt_sections)):
|
|
|
|
| 330 |
#subtitle_text = subtitle_text.replace(' ', ' | ')
|
| 331 |
for token in sub_split_line:
|
| 332 |
subtitle_text += token.text + " | "
|
| 333 |
+
subaswordlist += token.text + " "
|
| 334 |
subtitle_textPOSversion += token.pos_ + " | "
|
| 335 |
|
| 336 |
# Reconstruct the section with the updated subtitle text
|
| 337 |
srt_sections[i] = f"{section_lines[0]}\n{timestamp}\n{subtitle_text[3:]}"
|
| 338 |
srt_sections_POSversion.append(f"{section_lines[0]}\n{timestamp}\n{subtitle_textPOSversion[3:]}\n\n")
|
| 339 |
|
| 340 |
+
SplitSRT = '\n\n'.join(srt_sections)
|
| 341 |
+
SplitPOSsrt = ''.join(srt_sections_POSversion)
|
| 342 |
+
save_string_to_file(SplitSRT, "SplitSRT.txt", "True")
|
| 343 |
+
save_string_to_file(SplitPOSsrt, "SplitPOSsrt.txt", "False")
|
| 344 |
+
subaswordlist = set(subaswordlist.split(" "))
|
| 345 |
+
subaswordlistOutput = ""
|
| 346 |
+
|
| 347 |
+
for word in subaswordlist:
|
| 348 |
+
subaswordlistOutput += "\n | " + word
|
| 349 |
+
|
| 350 |
+
subaswordlistOutput = str(len(subaswordlist)) + "\n" + subaswordlistOutput
|
| 351 |
+
|
| 352 |
# Join the SRT sections back together into a single string
|
| 353 |
+
return subaswordlistOutput, ["SplitSRT.docx", "SplitSRT.txt", "SplitSRT.srt", "SplitPOSsrt.txt"], SplitSRT, SplitPOSsrt
|
| 354 |
|
| 355 |
def find_string_positions(s, string):
|
| 356 |
positions = []
|
|
|
|
| 487 |
img.save(output_image_path, "PNG")
|
| 488 |
return "output.png"
|
| 489 |
|
| 490 |
+
def UnknownTrackTexttoApp(text): #Copy of def OptimisedTtAppForUNWFWO(text):
|
| 491 |
+
#Buttons and labels autocreation
|
| 492 |
+
#Change this to spacy version so that data is from one library
|
| 493 |
+
#Javascript videos on youtube - KodeBase - Change button color Onclick; bro code - button in 5 minutes
|
| 494 |
+
#GPT3 helped guide the highlighting if statements
|
| 495 |
+
|
| 496 |
+
FinalOutput = ""
|
| 497 |
+
#sentence = "One Piece chapter 1049 spoilers Thanks to Etenboby from WG forums Chapter 1049: **\"The world we should aspire to\"** * In the cover, someone burned Niji and Yonji\u2019s book * Kaido flashback time. We see his childhood in Vodka Kingdom, and where a few years later he met Whitebeard who told him that Rocks wants to meet him * In the present, part of Raizo\u2019s water leaves the castle and flame clouds disappear. But Momo makes a new one. * Luffy says he will create a world where none of his friends would starve, then he hits Kaido and Kaido falls to the ground of the flower capital. * In another flashback, Kaido tells King that Joy Boy will be the man that can defeat him. **Additional info** *Flashback to Kaidou as a kid* *- His country tries to sell him to the marines but he escapes* *- He rampages in Hachinosu(i think it's blackbeard's island) and Rocks invites him to his crew* *- Young WB appears* *- Rocks flashback suddenly ends* *- Higurashi invites Kaidou* *- The flashback ends with Kaidou telling King he knows who Joy Boy is.* *Back to the present* \\- *Denjirou hugs Hiyori* \\- *Luffy's punch hits Kaidou* *Flashback continues* \\- *King asks: Who is it then?* \\- *Kaidou: The one who will defeat me* \\- *King: Then he will not appear* \\- *Onigashima falls near the capital* \\- *Momo falls* **BREAK NEXT WEEK** https://www.reddit.com/r/OnePiece/comments/umu2h0/one_piece_chapter_1049_spoilers/" #@param {type: "string"}
|
| 498 |
+
HTMLMainbody = ""
|
| 499 |
+
|
| 500 |
+
doc = nlp(text)
|
| 501 |
+
iIDNumber = 0
|
| 502 |
+
iVerbCount = 0
|
| 503 |
+
iNounCount = 0
|
| 504 |
+
iWords = 0
|
| 505 |
+
allverbs = ""
|
| 506 |
+
allverbslist = ""
|
| 507 |
+
allverbids = ""
|
| 508 |
+
allverbidslist = ""
|
| 509 |
+
|
| 510 |
+
for token in doc:
|
| 511 |
+
if (token.pos_ == "VERB") or (token.pos_ == "AUX"):
|
| 512 |
+
HTMLMainbody = HTMLMainbody + "<button id='btn" + str(iVerbCount) + "' onclick=HighlightWord('btn" + str(iVerbCount) + "')> " + token.text + "</button> "
|
| 513 |
+
allverbids = allverbids + str(iVerbCount) + " "
|
| 514 |
+
iVerbCount += 1
|
| 515 |
+
iWords += 1
|
| 516 |
+
allverbs = allverbs + token.text + " "
|
| 517 |
+
elif token.pos_ == "NOUN":
|
| 518 |
+
HTMLMainbody = HTMLMainbody + "<label class='Nouns' id='lbl" + token.text + "'>" + token.text + " </label>"
|
| 519 |
+
iNounCount += 1
|
| 520 |
+
iWords += 1
|
| 521 |
+
elif token.pos_ == "PUNCT":
|
| 522 |
+
HTMLMainbody = HTMLMainbody + token.text
|
| 523 |
+
else:
|
| 524 |
+
HTMLMainbody = HTMLMainbody + token.text + " "
|
| 525 |
+
iWords += 1
|
| 526 |
+
iIDNumber += 1
|
| 527 |
+
|
| 528 |
+
allverbslist = allverbs.split()
|
| 529 |
+
allverbidslist = allverbids.split()
|
| 530 |
+
|
| 531 |
+
FinalHTML = ""
|
| 532 |
+
FinalCSS = ""
|
| 533 |
+
FinalJS = ""
|
| 534 |
+
|
| 535 |
+
FinalCSS = FinalCSS + ''' <style>
|
| 536 |
+
body {
|
| 537 |
+
background-color: darksalmon;
|
| 538 |
+
}
|
| 539 |
+
|
| 540 |
+
.Nouns {
|
| 541 |
+
color: red;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
.clunknown{
|
| 545 |
+
background-color: gainsboro;
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
.clknownl1{
|
| 549 |
+
background-color: yellow;
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
.clknownl2{
|
| 553 |
+
background-color: gold;
|
| 554 |
+
}
|
| 555 |
+
|
| 556 |
+
.clknownl3{
|
| 557 |
+
background-color: orange;
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
.PD1 {
|
| 561 |
+
text-align: center;
|
| 562 |
+
font-size: larger;
|
| 563 |
+
font-family: cursive;
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
.PD2 {
|
| 567 |
+
font-family: monospace;
|
| 568 |
+
}
|
| 569 |
+
</style>
|
| 570 |
+
'''
|
| 571 |
+
|
| 572 |
+
#style='background-color:Gainsboro; There is no general style attribute for buttons but you can make a class and put the style conditions
|
| 573 |
+
|
| 574 |
+
iSents = 0
|
| 575 |
+
for sent in doc.sents:
|
| 576 |
+
iSents += 1
|
| 577 |
+
|
| 578 |
+
FinalHTML = FinalHTML + "\n<div id='PD1'>Picture on mouse hover = Visual<br> Speed = End Goal ==> App Timer Functions ||| \nSentences: " + str(iSents) + " | Words: " + str(iWords) + " | App elements: " + str(iNounCount + iVerbCount) + " | Verbs: " + str(iVerbCount) + "</div>"
|
| 579 |
+
FinalHTML = FinalHTML + "\n<div><hr><progress id='myVerbProgress' value='0' max='" + str(iVerbCount) + "'></progress></div>"
|
| 580 |
+
FinalJS = FinalJS + '''\n
|
| 581 |
+
<script>
|
| 582 |
+
function HighlightWord(Button){
|
| 583 |
+
if (document.getElementById(Button).style.backgroundColor === 'orange') {
|
| 584 |
+
document.getElementById(Button).style.backgroundColor=''
|
| 585 |
+
}
|
| 586 |
+
else if (document.getElementById(Button).style.backgroundColor === 'gold') {
|
| 587 |
+
document.getElementById(Button).style.backgroundColor='orange'
|
| 588 |
+
}
|
| 589 |
+
else if (document.getElementById(Button).style.backgroundColor === 'yellow') {
|
| 590 |
+
document.getElementById(Button).style.backgroundColor='gold'
|
| 591 |
+
}
|
| 592 |
+
else {document.getElementById(Button).style.backgroundColor='yellow'
|
| 593 |
+
}
|
| 594 |
+
OnlyUnknownVerbs()
|
| 595 |
+
}
|
| 596 |
+
'''
|
| 597 |
+
|
| 598 |
+
FinalHTML = FinalHTML + "\n<div><hr>\n" + HTMLMainbody + "\n"
|
| 599 |
+
#FinalHTML = FinalHTML + '''</div><hr>
|
| 600 |
+
#<button onclick=OnlyUnknownSentences() id="btnOnlyUnknownSentences">Only Unknown Sentences Put this function in a timer to keep up to date without input</button>
|
| 601 |
+
#'''
|
| 602 |
+
FinalJS = FinalJS + '''
|
| 603 |
+
function OnlyUnknownVerbs(){
|
| 604 |
+
AllButtons = ''' + str(allverbidslist) + '''
|
| 605 |
+
AllButtonsText = ''' + str(allverbslist) + '''
|
| 606 |
+
UnknownOutput = ""
|
| 607 |
+
iUnknownCount = 0
|
| 608 |
+
AllButtons.forEach(function(item){
|
| 609 |
+
if (document.getElementById('btn'+item).style.backgroundColor === ''){
|
| 610 |
+
UnknownOutput += AllButtonsText[item] + " "
|
| 611 |
+
iUnknownCount += 1
|
| 612 |
+
}
|
| 613 |
+
document.getElementById('myVerbProgress').value = ''' + str(iVerbCount) + ''' - iUnknownCount
|
| 614 |
+
})
|
| 615 |
+
document.getElementById('PD2').textContent = 'Only Unknwon words list: ' + UnknownOutput
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
|
| 619 |
+
</script>
|
| 620 |
+
'''
|
| 621 |
+
|
| 622 |
+
FinalHTML = FinalHTML + '''<br><hr><br>
|
| 623 |
+
<div id='PD2'> Only Unknown List</div>
|
| 624 |
+
\n
|
| 625 |
+
'''
|
| 626 |
+
|
| 627 |
+
FinalOutput = FinalHTML + FinalCSS + FinalJS
|
| 628 |
+
return FinalOutput, FinalOutput
|
| 629 |
+
|
| 630 |
+
#Kathryn Lingel - Pyambic Pentameter Example - PyCon US
|
| 631 |
+
#Basic Language Model Code
|
| 632 |
+
def build_model(source_text):
|
| 633 |
+
list_of_words = source_text.split()
|
| 634 |
+
model = {} #initialise model to empty dictionary
|
| 635 |
+
|
| 636 |
+
for i, word in enumerate(list_of_words[:-1]): #every word except last word
|
| 637 |
+
if not word in model: #If word not already in dictionary as a key we add it and initialise to empty array
|
| 638 |
+
model[word] = []
|
| 639 |
+
next_word = list_of_words[i+1]
|
| 640 |
+
model[word].append(next_word) #model = dictionary per word containing previously seen next words from ANY given text ==> even lyrics
|
| 641 |
+
|
| 642 |
+
translatestring = str(model)
|
| 643 |
+
translatestring = translatestring.replace("'", "")
|
| 644 |
+
return model, translatestring
|
| 645 |
+
|
| 646 |
+
def markov_generate(source_text, num_words = 20):
|
| 647 |
+
model = build_model(source_text)
|
| 648 |
+
seed = random.choice(list(model.keys())) #Randomly pick a word ==> Heading of the dictionary are keys aka the words
|
| 649 |
+
output = [seed] #output initialisation using random word
|
| 650 |
+
for i in range(num_words):
|
| 651 |
+
last_word = output[-1] #of the output list
|
| 652 |
+
next_word = random.choice(model[last_word]) # next word to the above word
|
| 653 |
+
output.append(next_word) #new last word in the output list
|
| 654 |
+
if next_word not in model:
|
| 655 |
+
break
|
| 656 |
+
|
| 657 |
+
return ' '.join(output) #New list into a string aka (hopefully) sentence
|
| 658 |
+
# print(markov_generate("I am the egg man they are the egg men I am the wallrus goo goo g' joob"))
|
| 659 |
+
|
| 660 |
+
|
| 661 |
# Define the Gradio interface inputs and outputs for video split
|
| 662 |
spvvideo_file_input = gr.File(label='Video File')
|
| 663 |
spvsubtitle_file_input = gr.File(label='Subtitle File')
|
| 664 |
spvdownload_output = gr.File(label='Download Segmented Files')
|
| 665 |
|
| 666 |
+
Markovlength = gr.Number(value=30, label='Length of generation')
|
| 667 |
+
|
| 668 |
|
| 669 |
+
groupinput_text = gr.Textbox(lines=2, label="Enter a list of words")
|
| 670 |
+
groupoutput_text = gr.Textbox(label="Grouped words")
|
| 671 |
|
| 672 |
with gr.Blocks() as lliface:
|
| 673 |
gr.HTML("<p> Target 1: Dual audio at word Level while using repitition to train random recall --> Word level Time <br> Target 2: Video --> Split by sentence --> each word repeated (60) + each phrase (10) + each sentence (10) --> TTS file for practice --> State Management/Known word Tracker <hr> The trick is minimum one minute of focus on a new word --> Listening is hard because there are new word within seconds and you need repeated focus on each to learn </p> <p>Audio = best long form attention mechanism AS it is ANTICIPATION (Awareness of something before it happens like knowing song Lyrics) FOCUSED - Attention (Focused Repitition) + Exposure (Random Repitition) </p>")
|
| 674 |
+
gr.HTML("""<hr> <a href="https://translate.google.com/?hl=en&tab=TT"> -- Google Translate -- </a> | <a href='https://huggingface.co/spaces/damo-vilab/modelscope-text-to-video-synthesis'> -- Modelscope Text to Video -- </a> | <a href='https://huggingface.co/spaces/stabilityai/stable-diffusion'> -- stable-diffusion 2 -- </a> | <a href='https://huggingface.co/spaces/stabilityai/stable-diffusion-1'> -- stable-diffusion 1 -- </a> | <a href='https://huggingface.co/spaces/kakaobrain/karlo'> -- karlo 1 -- </a>""")
|
| 675 |
with gr.Tab("Welcome"):
|
| 676 |
gr.HTML("""<p>Spaces Test - Still Undercontruction | Knowledge is a Language but productive knowledge is find replace as well | LingQ is good option for per word state management</p> <p> Arrows app json creator for easy knowledge graphing and spacy POS graph? --> Questions? -->
|
| 677 |
<p> ChatGPT Turns Learning into a read only what you dont know ask only what you dont know feedback loop --> All you have to do is keep track of what prompts you have asked in the past</p> """)
|
|
|
|
| 679 |
gr.Interface(fn=group_words, inputs=groupinput_text, outputs=groupoutput_text, description="Word Grouping and Rotation - Group a list of words into sets of 10 and rotate them every 60 seconds.") #.queue()
|
| 680 |
gr.HTML("""HTML Version <hr> <iframe height="1200" style="width: 100%;" scrolling="no" title="Memorisation Aid" src="https://codepen.io/kwabs22/embed/preview/GRXKQgj?default-tab=result&editable=true" frameborder="no" loading="lazy" allowtransparency="true" allowfullscreen="true">
|
| 681 |
See the Pen <a href="https://codepen.io/kwabs22/pen/GRXKQgj"> Memorisation Aid</a> by kwabs22 (<a href="https://codepen.io/kwabs22">@kwabs22</a>) on <a href="https://codepen.io">CodePen</a>. </iframe>""")
|
| 682 |
+
with gr.Tab("Transition is the end goal"):
|
| 683 |
+
gr.HTML("Transition is the true nature of logic i.e. like some form of non-semantic embedding that is semantic?")
|
| 684 |
+
gr.Interface(fn=build_model, inputs="text", outputs=["text", "text"], description="Create Collocation Dictionary --> Google Kathryn Lingel - Pyambic Pentameter Example - PyCon US for more")
|
| 685 |
+
gr.Interface(fn=markov_generate, inputs=["text", Markovlength], outputs="text", description="Generate Text based on the collocations in the text")
|
| 686 |
with gr.Tab("Unknown Tracker"):
|
| 687 |
gr.HTML("Repitition of things you know is a waste of time when theres stuff you dont know <p> In Language the goal is bigger vocab --> Knowledge equivalent = question answer pairs but to get to those you need related information pairs</p> <p> Vocab = Glossary + all non text wall(lists, diagrams, etc.)</p>")
|
| 688 |
gr.Textbox("Placeholder for a function that creates a set list and can takes a list for known words and auto find replaces the stuff you know out of the content")
|
| 689 |
+
gr.Interface(fn=UnknownTrackTexttoApp, inputs="text", outputs=["html", "text"], description="Use the text from here to create lists you use for the TTS section")
|
| 690 |
with gr.Tab("Unique word ID - use in Infranodus"):
|
| 691 |
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", description="Wordcounter")
|
| 692 |
gr.Interface(fn=SepHypandSynExpansion, inputs="text", outputs=["text", "text"], description="Word suggestions - Analyse the unique words in infranodus")
|
|
|
|
| 706 |
gr.HTML("""<a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator"> --Huggingface CLIP-Interrogator Space-- </a><br> """)
|
| 707 |
gr.Interface(fn=removeTonalMarks, inputs="text", outputs="text", description="For text with characters use this function to remove any conflicting characters (if error below)")
|
| 708 |
gr.Interface(fn=add_text_to_image , inputs=["image", "text"], outputs="image", description="Create Annotated images (Can create using stable diffusion and use the prompt)")
|
| 709 |
+
gr.HTML("Use Shift Enter To put text on new lines if the text doesnt fit <hr>")
|
| 710 |
#with gr.Tab("Transcribe - RASMUS Whisper"):
|
| 711 |
#gr.Interface.load("spaces/RASMUS/Whisper-youtube-crosslingual-subtitles", title="Subtitles")
|
| 712 |
with gr.Tab("Advanced - LingQ Addon Ideas"):
|
|
|
|
| 719 |
#gr.HTML("<p>If Space not loaded its because of offline devopment errors please message for edit</p> <hr>")
|
| 720 |
with gr.Tab("Merged Subtitles"):
|
| 721 |
gr.HTML("Step 1 - Word for Word Translation Creation in both Directions (Paste Google Translation here)")
|
| 722 |
+
gr.Interface(fn=split_srt_file, inputs=["text", SRTLangOptions] , outputs=["text", "file", "text", "text"], description="SRT Contents to W4W Split SRT for Google Translate")
|
| 723 |
gr.HTML("Step 2 - Pronounciation (Roman) to Subtitle Format --> GTranslate returns unformatted string")
|
| 724 |
gr.Interface(fn=splittext, inputs="text", outputs="text", description="Text for w4w creation in G Translate")
|
| 725 |
gr.HTML("Step 3 - Merge into one file")
|