BigSalmon commited on
Commit
f6cc9b4
·
1 Parent(s): 6bdbda2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import sent_tokenize
3
+ import re
4
+ import stanfordnlp
5
+ from bleu import list_bleu
6
+
7
+ @st.cache(allow_output_mutation=True)
8
+ def get_model():
9
+ nltk.download('punkt')
10
+ nltk.download('averaged_perceptron_tagger')
11
+ stanfordnlp.download('en')
12
+ stf_nlp = stanfordnlp.Pipeline(processors='tokenize,mwt,pos')
13
+ return stf_nlp
14
+
15
+ def truecasing(input_text):
16
+ # split the text into sentences
17
+ sentences = sent_tokenize(input_text, language='english')
18
+ # capitalize the sentences
19
+ sentences_capitalized = [s.capitalize() for s in sentences]
20
+ # join the capitalized sentences
21
+ text_truecase = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized))
22
+ # capitalize words according to part-of-speech tagging (POS)
23
+ doc = stf_nlp(text_truecase)
24
+ text_truecase = ' '.join([w.text.capitalize() if w.upos in ["PROPN","NNS"] \
25
+ else w.text for sent in doc.sentences \
26
+ for w in sent.words])
27
+ text_truecase = re.sub(r'\s([?.!"](?:\s|$))', r'\1', text_truecase)
28
+ text_truecase = str(text_truecase)
29
+ text_truecase = text_truecase.replace(" ,", ",").replace(" -", "-").replace(" 's", "'s")
30
+ return text_truecase
31
+
32
+ with st.form(key='my_form'):
33
+ prompt = st.text_area(label='Enter sentence', value=" ")
34
+ submit_button = st.form_submit_button(label='Submit')
35
+ if submit_button:
36
+ m = truecasing(prompt)
37
+ st.write(m)