Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nltk
|
| 2 |
+
from nltk.tokenize import sent_tokenize
|
| 3 |
+
import re
|
| 4 |
+
import stanfordnlp
|
| 5 |
+
from bleu import list_bleu
|
| 6 |
+
|
| 7 |
+
@st.cache(allow_output_mutation=True)
|
| 8 |
+
def get_model():
|
| 9 |
+
nltk.download('punkt')
|
| 10 |
+
nltk.download('averaged_perceptron_tagger')
|
| 11 |
+
stanfordnlp.download('en')
|
| 12 |
+
stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos')
|
| 13 |
+
return stf_nlp
|
| 14 |
+
|
| 15 |
+
def truecasing(input_text):
|
| 16 |
+
# split the text into sentences
|
| 17 |
+
sentences = sent_tokenize(input_text, language='english')
|
| 18 |
+
# capitalize the sentences
|
| 19 |
+
sentences_capitalized = [s.capitalize() for s in sentences]
|
| 20 |
+
# join the capitalized sentences
|
| 21 |
+
text_truecase = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
|
| 22 |
+
# capitalize words according to part-of-speech tagging (POS)
|
| 23 |
+
doc = stf_nlp(text_truecase)
|
| 24 |
+
text_truecase = ' '.join([w.text.capitalize() if w.upos in ["PROPN","NNS"] \
|
| 25 |
+
else w.text for sent in doc.sentences \
|
| 26 |
+
for w in sent.words])
|
| 27 |
+
text_truecase = re.sub(r'\s([?.!"](?:\s|$))', r'\1', text_truecase)
|
| 28 |
+
text_truecase = str(text_truecase)
|
| 29 |
+
text_truecase = text_truecase.replace(" ,", ",").replace(" -", "-").replace(" 's", "'s")
|
| 30 |
+
return text_truecase
|
| 31 |
+
|
| 32 |
+
with st.form(key='my_form'):
|
| 33 |
+
prompt = st.text_area(label='Enter sentence', value=" ")
|
| 34 |
+
submit_button = st.form_submit_button(label='Submit')
|
| 35 |
+
if submit_button:
|
| 36 |
+
m = truecasing(prompt)
|
| 37 |
+
st.write(m)
|