Automate_MCQ / pages /TextGapPro.py
PankajNk's picture
Upload TextGapPro.py
6bf80e5
import streamlit as st
import textwrap
import json
import requests
import string
import re
import nltk
import string
import itertools
import pke
from nltk.corpus import stopwords
from nltk.corpus import wordnet
import traceback
from nltk.tokenize import sent_tokenize
from flashtext import KeywordProcessor
from pprint import pprint
import random
st.header(" TextGapPro")
st.subheader("TextGapPro is a cutting-edge Natural Language Processing (NLP) application designed to empower users with the ability to generate fill-in-the-blank sentences effortlessly. In a world increasingly reliant on effective communication, TextGapPro stands out as a powerful tool for writers, educators, and content creators seeking to enhance their content's readability and engagement.")
text = st.text_area("Input the text to get the fill in the blanks",placeholder="Enter the text", height=200)
button = st.button("Generate Fill-in-The-Blank")
def tokenize_sentences(text):
sentences = sent_tokenize(text)
sentences = [sentence.strip() for sentence in sentences if len(sentence) > 20]
return sentences
def get_noun_adj_verb(text):
out=[]
try:
extractor = pke.unsupervised.MultipartiteRank()
extractor.load_document(input=text,language='en')
# not contain punctuation marks or stopwords as candidates.
pos = {'VERB', 'ADJ', 'NOUN'}
stoplist = list(string.punctuation)
stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
stoplist += stopwords.words('english')
# extractor.candidate_selection(pos=pos, stoplist=stoplist)
extractor.candidate_selection(pos=pos)
# 4. build the Multipartite graph and rank candidates using random walk,
# alpha controls the weight adjustment mechanism, see TopicRank for
# threshold/method parameters.
extractor.candidate_weighting(alpha=1.1,
threshold=0.75,
method='average')
keyphrases = extractor.get_n_best(n=30)
for val in keyphrases:
out.append(val[0])
except:
out = []
traceback.print_exc()
return out
def get_sentences_for_keyword(keywords, sentences):
keyword_processor = KeywordProcessor()
keyword_sentences = {}
for word in keywords:
keyword_sentences[word] = []
keyword_processor.add_keyword(word)
for sentence in sentences:
keywords_found = keyword_processor.extract_keywords(sentence)
for key in keywords_found:
keyword_sentences[key].append(sentence)
for key in keyword_sentences.keys():
values = keyword_sentences[key]
values = sorted(values, key=len, reverse=True)
keyword_sentences[key] = values
return keyword_sentences
def get_fill_in_the_blanks(sentence_mapping):
out={"title":"Fill in the blanks for these sentences with matching words at the top"}
blank_sentences = []
processed = []
keys=[]
for key in sentence_mapping:
if len(sentence_mapping[key])>0:
sent = sentence_mapping[key][0]
# Compile a regular expression pattern into a regular expression object, which can be used for matching and other methods
insensitive_sent = re.compile(re.escape(key), re.IGNORECASE)
no_of_replacements = len(re.findall(re.escape(key),sent,re.IGNORECASE))
line = insensitive_sent.sub(' _________ ', sent)
if (sentence_mapping[key][0] not in processed) and no_of_replacements<2:
blank_sentences.append(line)
processed.append(sentence_mapping[key][0])
keys.append(key)
out["sentences"]=blank_sentences[:10]
out["keys"]=keys[:10]
return out
if text and button:
wrapper = textwrap.TextWrapper(width=150)
word_list = wrapper.wrap(text=text)
#for element in word_list:
#print(element)
#st.write(word_list)
sentences = tokenize_sentences(text)
#st.write(sentences)
noun_verbs_adj = get_noun_adj_verb(text)
#st.write(noun_verbs_adj)
keyword_sentence_mapping_noun_verbs_adj = get_sentences_for_keyword(noun_verbs_adj, sentences)
#st.write(keyword_sentence_mapping_noun_verbs_adj)
fill_in_the_blanks = get_fill_in_the_blanks(keyword_sentence_mapping_noun_verbs_adj)
#st.write(fill_in_the_blanks)
# Need to show the shuffle the answer
# all_answers = []
# for keys in fill_in_the_blanks['keys']:
# all_answers.append(keys)
#random.shuffle(all_answers)
# list_answer = list(all_answers)
# random.shuffle(list_answer)
st.header("Fill in the blanks from Input")
# for ans in list_answer:
# st.write(ans)
count =0
for sentence in fill_in_the_blanks['sentences']:
count = count + 1
st.write(count,sentence)
st.header(" Correct Answer")
#st.write(all_answers)
for key in fill_in_the_blanks['keys']:
st.write(key)