Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from googletrans import Translator
|
| 2 |
+
import spacy
|
| 3 |
+
import gradio as gr
|
| 4 |
+
|
| 5 |
+
spacy.cli.download("en_core_web_sm")
|
| 6 |
+
|
| 7 |
+
nlp = spacy.load('en_core_web_sm')
|
| 8 |
+
translator = Translator()
|
| 9 |
+
|
| 10 |
+
def Sentencechunker(sentence):
|
| 11 |
+
Sentchunks = sentence.split(" ")
|
| 12 |
+
chunks = []
|
| 13 |
+
for i in range(len(Sentchunks)):
|
| 14 |
+
chunks.append(" ".join(Sentchunks[:i+1]))
|
| 15 |
+
return " | ".join(chunks)
|
| 16 |
+
|
| 17 |
+
def ReverseSentenceChunker(sentence):
|
| 18 |
+
reversed_sentence = " ".join(reversed(sentence.split()))
|
| 19 |
+
chunks = Sentencechunker(reversed_sentence)
|
| 20 |
+
return chunks
|
| 21 |
+
|
| 22 |
+
def three_words_chunk(sentence):
|
| 23 |
+
words = sentence.split()
|
| 24 |
+
chunks = [words[i:i+3] for i in range(len(words)-2)]
|
| 25 |
+
chunks = [" ".join(chunk) for chunk in chunks]
|
| 26 |
+
return " | ".join(chunks)
|
| 27 |
+
|
| 28 |
+
def keep_nouns_verbs(sentence):
|
| 29 |
+
doc = nlp(sentence)
|
| 30 |
+
nouns_verbs = []
|
| 31 |
+
for token in doc:
|
| 32 |
+
if token.pos_ in ['NOUN','VERB','PUNCT']:
|
| 33 |
+
nouns_verbs.append(token.text)
|
| 34 |
+
return " ".join(nouns_verbs)
|
| 35 |
+
|
| 36 |
+
def unique_word_count(text="", state=None):
|
| 37 |
+
if state is None:
|
| 38 |
+
state = {}
|
| 39 |
+
words = text.split()
|
| 40 |
+
word_counts = state
|
| 41 |
+
for word in words:
|
| 42 |
+
if word in word_counts:
|
| 43 |
+
word_counts[word] += 1
|
| 44 |
+
else:
|
| 45 |
+
word_counts[word] = 1
|
| 46 |
+
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
|
| 47 |
+
return sorted_word_counts,
|
| 48 |
+
|
| 49 |
+
"""
|
| 50 |
+
sentence = "Please help me create a sentence chunker"
|
| 51 |
+
sentencechunks = Sentencechunker(sentence)
|
| 52 |
+
reversed_chunks = ReverseSentenceChunker(sentence)
|
| 53 |
+
TWchunks = three_words_chunk(sentence)
|
| 54 |
+
nouns_verbs = keep_nouns_verbs(sentence)
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
# Translate from English to French
|
| 58 |
+
|
| 59 |
+
langdest = gr.Dropdown(choices=["af", "de", "es", "ko", "ja", "zh-cn"], label="Choose Language", value="de")
|
| 60 |
+
|
| 61 |
+
"""
|
| 62 |
+
def VarTrans(text, langdest):
|
| 63 |
+
translated = translator.translate(text, dest=langdest)
|
| 64 |
+
SCtranslated = translator.translate(sentencechunks, dest=langdest)
|
| 65 |
+
RCtranslated = translator.translate(reversed_chunks, dest=langdest)
|
| 66 |
+
TWCtranslated = translator.translate(TWchunks, dest=langdest)
|
| 67 |
+
return translated, SCtranslated, RCtranslated, TWCtranslated
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
ChunkModeDrop = gr.Dropdown(choices=["Chunks", "Reverse", "Three Word Chunks"], label="Choose Chunk Type")
|
| 71 |
+
|
| 72 |
+
def FrontRevSentChunk (Chunkmode, Translate, Text, langdest):
|
| 73 |
+
FinalOutput = ""
|
| 74 |
+
TransFinalOutput = ""
|
| 75 |
+
if Chunkmode=="Chunks":
|
| 76 |
+
FinalOutput += Sentencechunker(Text)
|
| 77 |
+
if Chunkmode=="Reverse":
|
| 78 |
+
FinalOutput += ReverseSentenceChunker(Text)
|
| 79 |
+
if Chunkmode=="Three Word Chunks":
|
| 80 |
+
FinalOutput += three_words_chunk(Text)
|
| 81 |
+
|
| 82 |
+
if Translate:
|
| 83 |
+
TransFinalOutput = FinalOutput
|
| 84 |
+
translated = translator.translate(TransFinalOutput, dest=langdest)
|
| 85 |
+
FinalOutput += "\n" + translated.text
|
| 86 |
+
return FinalOutput
|
| 87 |
+
|
| 88 |
+
"""
|
| 89 |
+
print(translated.text)
|
| 90 |
+
print(sentencechunks)
|
| 91 |
+
print(SCtranslated.text)
|
| 92 |
+
print(reversed_chunks)
|
| 93 |
+
print(RCtranslated.text)
|
| 94 |
+
print(TWchunks)
|
| 95 |
+
print(TWCtranslated.text)
|
| 96 |
+
print(nouns_verbs)
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
def Wordchunker(word):
|
| 100 |
+
chunks = []
|
| 101 |
+
for i in range(len(word)):
|
| 102 |
+
chunks.append(word[:i+1])
|
| 103 |
+
return chunks
|
| 104 |
+
|
| 105 |
+
word = "please"
|
| 106 |
+
wordchunks = Wordchunker(word)
|
| 107 |
+
print("\n")
|
| 108 |
+
print(wordchunks)
|
| 109 |
+
|
| 110 |
+
#random_chunk_display(TWCtranslated.text)
|
| 111 |
+
|
| 112 |
+
with gr.Blocks() as lliface:
|
| 113 |
+
gr.HTML("<p> Still Undercontruction </p> <> Arrows app json creator for easy knowledge graphing and spacy POS graph? </p> <p> https://huggingface.co/spaces/RASMUS/Whisper-youtube-crosslingual-subtitles, https://huggingface.co/spaces/vumichien/whisper-speaker-diarization, Maybe duplicate these, private them and then load into spaces? --> Whisper space for youtube, Clip Interrogator, load here and all my random functions esp. text to HTML </p>")
|
| 114 |
+
gr.Interface(fn=FrontRevSentChunk, inputs=[ChunkModeDrop, "checkbox", "text", langdest], outputs="text")
|
| 115 |
+
gr.Interface(fn=keep_nouns_verbs, inputs=["text"], outputs="text", title="Noun and Verbs only (Plus punctuation")
|
| 116 |
+
gr.HTML("Add a codepen pen page here")
|
| 117 |
+
gr.Interface(fn=unique_word_count, inputs="text", outputs="text", title="Wordcounter")
|
| 118 |
+
|
| 119 |
+
lliface.launch()
|