import streamlit as st import textwrap import json import requests import string import re import nltk import string import itertools import pke from nltk.corpus import stopwords from nltk.corpus import wordnet import traceback from nltk.tokenize import sent_tokenize from flashtext import KeywordProcessor from pprint import pprint import random st.header(" TextGapPro") st.subheader("TextGapPro is a cutting-edge Natural Language Processing (NLP) application designed to empower users with the ability to generate fill-in-the-blank sentences effortlessly. In a world increasingly reliant on effective communication, TextGapPro stands out as a powerful tool for writers, educators, and content creators seeking to enhance their content's readability and engagement.") text = st.text_area("Input the text to get the fill in the blanks",placeholder="Enter the text", height=200) button = st.button("Generate Fill-in-The-Blank") def tokenize_sentences(text): sentences = sent_tokenize(text) sentences = [sentence.strip() for sentence in sentences if len(sentence) > 20] return sentences def get_noun_adj_verb(text): out=[] try: extractor = pke.unsupervised.MultipartiteRank() extractor.load_document(input=text,language='en') # not contain punctuation marks or stopwords as candidates. pos = {'VERB', 'ADJ', 'NOUN'} stoplist = list(string.punctuation) stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-'] stoplist += stopwords.words('english') # extractor.candidate_selection(pos=pos, stoplist=stoplist) extractor.candidate_selection(pos=pos) # 4. build the Multipartite graph and rank candidates using random walk, # alpha controls the weight adjustment mechanism, see TopicRank for # threshold/method parameters. extractor.candidate_weighting(alpha=1.1, threshold=0.75, method='average') keyphrases = extractor.get_n_best(n=30) for val in keyphrases: out.append(val[0]) except: out = [] traceback.print_exc() return out def get_sentences_for_keyword(keywords, sentences): keyword_processor = KeywordProcessor() keyword_sentences = {} for word in keywords: keyword_sentences[word] = [] keyword_processor.add_keyword(word) for sentence in sentences: keywords_found = keyword_processor.extract_keywords(sentence) for key in keywords_found: keyword_sentences[key].append(sentence) for key in keyword_sentences.keys(): values = keyword_sentences[key] values = sorted(values, key=len, reverse=True) keyword_sentences[key] = values return keyword_sentences def get_fill_in_the_blanks(sentence_mapping): out={"title":"Fill in the blanks for these sentences with matching words at the top"} blank_sentences = [] processed = [] keys=[] for key in sentence_mapping: if len(sentence_mapping[key])>0: sent = sentence_mapping[key][0] # Compile a regular expression pattern into a regular expression object, which can be used for matching and other methods insensitive_sent = re.compile(re.escape(key), re.IGNORECASE) no_of_replacements = len(re.findall(re.escape(key),sent,re.IGNORECASE)) line = insensitive_sent.sub(' _________ ', sent) if (sentence_mapping[key][0] not in processed) and no_of_replacements<2: blank_sentences.append(line) processed.append(sentence_mapping[key][0]) keys.append(key) out["sentences"]=blank_sentences[:10] out["keys"]=keys[:10] return out if text and button: wrapper = textwrap.TextWrapper(width=150) word_list = wrapper.wrap(text=text) #for element in word_list: #print(element) #st.write(word_list) sentences = tokenize_sentences(text) #st.write(sentences) noun_verbs_adj = get_noun_adj_verb(text) #st.write(noun_verbs_adj) keyword_sentence_mapping_noun_verbs_adj = get_sentences_for_keyword(noun_verbs_adj, sentences) #st.write(keyword_sentence_mapping_noun_verbs_adj) fill_in_the_blanks = get_fill_in_the_blanks(keyword_sentence_mapping_noun_verbs_adj) #st.write(fill_in_the_blanks) # Need to show the shuffle the answer # all_answers = [] # for keys in fill_in_the_blanks['keys']: # all_answers.append(keys) #random.shuffle(all_answers) # list_answer = list(all_answers) # random.shuffle(list_answer) st.header("Fill in the blanks from Input") # for ans in list_answer: # st.write(ans) count =0 for sentence in fill_in_the_blanks['sentences']: count = count + 1 st.write(count,sentence) st.header(" Correct Answer") #st.write(all_answers) for key in fill_in_the_blanks['keys']: st.write(key)