PankajNk commited on
Commit
6bf80e5
·
1 Parent(s): 0826783

Upload TextGapPro.py

Browse files
Files changed (1) hide show
  1. pages/TextGapPro.py +132 -0
pages/TextGapPro.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import textwrap
3
+ import json
4
+ import requests
5
+ import string
6
+ import re
7
+ import nltk
8
+ import string
9
+ import itertools
10
+
11
+ import pke
12
+ from nltk.corpus import stopwords
13
+ from nltk.corpus import wordnet
14
+ import traceback
15
+ from nltk.tokenize import sent_tokenize
16
+ from flashtext import KeywordProcessor
17
+ from pprint import pprint
18
+ import random
19
+
20
+ st.header(" TextGapPro")
21
+ st.subheader("TextGapPro is a cutting-edge Natural Language Processing (NLP) application designed to empower users with the ability to generate fill-in-the-blank sentences effortlessly. In a world increasingly reliant on effective communication, TextGapPro stands out as a powerful tool for writers, educators, and content creators seeking to enhance their content's readability and engagement.")
22
+ text = st.text_area("Input the text to get the fill in the blanks",placeholder="Enter the text", height=200)
23
+ button = st.button("Generate Fill-in-The-Blank")
24
+
25
+ def tokenize_sentences(text):
26
+ sentences = sent_tokenize(text)
27
+ sentences = [sentence.strip() for sentence in sentences if len(sentence) > 20]
28
+ return sentences
29
+
30
+ def get_noun_adj_verb(text):
31
+ out=[]
32
+ try:
33
+ extractor = pke.unsupervised.MultipartiteRank()
34
+ extractor.load_document(input=text,language='en')
35
+ # not contain punctuation marks or stopwords as candidates.
36
+ pos = {'VERB', 'ADJ', 'NOUN'}
37
+ stoplist = list(string.punctuation)
38
+ stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
39
+ stoplist += stopwords.words('english')
40
+ # extractor.candidate_selection(pos=pos, stoplist=stoplist)
41
+ extractor.candidate_selection(pos=pos)
42
+ # 4. build the Multipartite graph and rank candidates using random walk,
43
+ # alpha controls the weight adjustment mechanism, see TopicRank for
44
+ # threshold/method parameters.
45
+ extractor.candidate_weighting(alpha=1.1,
46
+ threshold=0.75,
47
+ method='average')
48
+ keyphrases = extractor.get_n_best(n=30)
49
+
50
+
51
+ for val in keyphrases:
52
+ out.append(val[0])
53
+ except:
54
+ out = []
55
+ traceback.print_exc()
56
+
57
+ return out
58
+
59
+
60
+ def get_sentences_for_keyword(keywords, sentences):
61
+ keyword_processor = KeywordProcessor()
62
+ keyword_sentences = {}
63
+ for word in keywords:
64
+ keyword_sentences[word] = []
65
+ keyword_processor.add_keyword(word)
66
+ for sentence in sentences:
67
+ keywords_found = keyword_processor.extract_keywords(sentence)
68
+ for key in keywords_found:
69
+ keyword_sentences[key].append(sentence)
70
+
71
+ for key in keyword_sentences.keys():
72
+ values = keyword_sentences[key]
73
+ values = sorted(values, key=len, reverse=True)
74
+ keyword_sentences[key] = values
75
+ return keyword_sentences
76
+
77
+ def get_fill_in_the_blanks(sentence_mapping):
78
+ out={"title":"Fill in the blanks for these sentences with matching words at the top"}
79
+ blank_sentences = []
80
+ processed = []
81
+ keys=[]
82
+ for key in sentence_mapping:
83
+ if len(sentence_mapping[key])>0:
84
+ sent = sentence_mapping[key][0]
85
+ # Compile a regular expression pattern into a regular expression object, which can be used for matching and other methods
86
+ insensitive_sent = re.compile(re.escape(key), re.IGNORECASE)
87
+ no_of_replacements = len(re.findall(re.escape(key),sent,re.IGNORECASE))
88
+ line = insensitive_sent.sub(' _________ ', sent)
89
+ if (sentence_mapping[key][0] not in processed) and no_of_replacements<2:
90
+ blank_sentences.append(line)
91
+ processed.append(sentence_mapping[key][0])
92
+ keys.append(key)
93
+ out["sentences"]=blank_sentences[:10]
94
+ out["keys"]=keys[:10]
95
+ return out
96
+
97
+ if text and button:
98
+ wrapper = textwrap.TextWrapper(width=150)
99
+ word_list = wrapper.wrap(text=text)
100
+ #for element in word_list:
101
+ #print(element)
102
+ #st.write(word_list)
103
+ sentences = tokenize_sentences(text)
104
+ #st.write(sentences)
105
+ noun_verbs_adj = get_noun_adj_verb(text)
106
+ #st.write(noun_verbs_adj)
107
+ keyword_sentence_mapping_noun_verbs_adj = get_sentences_for_keyword(noun_verbs_adj, sentences)
108
+ #st.write(keyword_sentence_mapping_noun_verbs_adj)
109
+ fill_in_the_blanks = get_fill_in_the_blanks(keyword_sentence_mapping_noun_verbs_adj)
110
+ #st.write(fill_in_the_blanks)
111
+
112
+ # Need to show the shuffle the answer
113
+ # all_answers = []
114
+ # for keys in fill_in_the_blanks['keys']:
115
+ # all_answers.append(keys)
116
+ #random.shuffle(all_answers)
117
+
118
+ # list_answer = list(all_answers)
119
+ # random.shuffle(list_answer)
120
+
121
+ st.header("Fill in the blanks from Input")
122
+ # for ans in list_answer:
123
+ # st.write(ans)
124
+ count =0
125
+ for sentence in fill_in_the_blanks['sentences']:
126
+ count = count + 1
127
+ st.write(count,sentence)
128
+
129
+ st.header(" Correct Answer")
130
+ #st.write(all_answers)
131
+ for key in fill_in_the_blanks['keys']:
132
+ st.write(key)