Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,10 @@ nltk.download('punkt')
|
|
| 12 |
nltk.download('stopwords')
|
| 13 |
nltk.download('brown')
|
| 14 |
from nltk.tokenize import sent_tokenize
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Load spaCy model
|
| 17 |
nlp = spacy.load("en_core_web_sm")
|
|
@@ -20,6 +24,9 @@ nlp = spacy.load("en_core_web_sm")
|
|
| 20 |
user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
|
| 21 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
def load_model():
|
| 24 |
model_name = "DevBM/t5-large-squad"
|
| 25 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
@@ -73,6 +80,54 @@ def map_keywords_to_sentences(text, keywords, context_window_size):
|
|
| 73 |
keyword_sentence_mapping[keyword] += ' ' + context
|
| 74 |
return keyword_sentence_mapping
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# Function to perform entity linking using Wikipedia API
|
| 77 |
@lru_cache(maxsize=128)
|
| 78 |
def entity_linking(keyword):
|
|
@@ -137,9 +192,15 @@ if st.button("Generate Questions"):
|
|
| 137 |
break
|
| 138 |
linked_entity = entity_linking(keyword)
|
| 139 |
question = generate_question(context, keyword, num_beams=num_beams)
|
|
|
|
|
|
|
| 140 |
st.write(f"**Context:** {context}")
|
| 141 |
st.write(f"**Answer:** {keyword}")
|
| 142 |
st.write(f"**Question:** {question}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
if linked_entity:
|
| 144 |
st.write(f"**Entity Link:** {linked_entity}")
|
| 145 |
st.write("---")
|
|
@@ -157,6 +218,9 @@ if st.button("Generate Questions"):
|
|
| 157 |
|
| 158 |
pdf_data = export_to_pdf(data)
|
| 159 |
st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
|
|
|
|
|
|
|
| 160 |
|
|
|
|
| 161 |
else:
|
| 162 |
st.write("Please enter some text to generate questions.")
|
|
|
|
| 12 |
nltk.download('stopwords')
|
| 13 |
nltk.download('brown')
|
| 14 |
from nltk.tokenize import sent_tokenize
|
| 15 |
+
nltk.downlaod('wordnet')
|
| 16 |
+
from gensim.models import KeyedVectors
|
| 17 |
+
from nltk.corpus import wordnet
|
| 18 |
+
import random
|
| 19 |
|
| 20 |
# Load spaCy model
|
| 21 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
| 24 |
user_agent = 'QGen/1.0 (channingfisher7@gmail.com)'
|
| 25 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
| 26 |
|
| 27 |
+
# Load pre-trained word vectors (this may take a while)
|
| 28 |
+
word_vectors = KeyedVectors.load_word2vec_format('vectors/GoogleNews-vectors-negative300.bin', binary=True)
|
| 29 |
+
|
| 30 |
def load_model():
|
| 31 |
model_name = "DevBM/t5-large-squad"
|
| 32 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
|
|
| 80 |
keyword_sentence_mapping[keyword] += ' ' + context
|
| 81 |
return keyword_sentence_mapping
|
| 82 |
|
| 83 |
+
def get_similar_words(word, n=3):
|
| 84 |
+
try:
|
| 85 |
+
similar_words = word_vectors.most_similar(word, topn=n)
|
| 86 |
+
return [word for word, _ in similar_words]
|
| 87 |
+
except KeyError:
|
| 88 |
+
return []
|
| 89 |
+
|
| 90 |
+
def get_synonyms(word, n=3):
|
| 91 |
+
synonyms = []
|
| 92 |
+
for syn in wordnet.synsets(word):
|
| 93 |
+
for lemma in syn.lemmas():
|
| 94 |
+
if lemma.name() != word and lemma.name() not in synonyms:
|
| 95 |
+
synonyms.append(lemma.name())
|
| 96 |
+
if len(synonyms) == n:
|
| 97 |
+
return synonyms
|
| 98 |
+
return synonyms
|
| 99 |
+
|
| 100 |
+
def generate_options(answer, context, n=3):
|
| 101 |
+
options = [answer]
|
| 102 |
+
|
| 103 |
+
# Try to get similar words based on word embeddings
|
| 104 |
+
similar_words = get_similar_words(answer, n)
|
| 105 |
+
options.extend(similar_words)
|
| 106 |
+
|
| 107 |
+
# If we don't have enough options, try synonyms
|
| 108 |
+
if len(options) < n + 1:
|
| 109 |
+
synonyms = get_synonyms(answer, n - len(options) + 1)
|
| 110 |
+
options.extend(synonyms)
|
| 111 |
+
|
| 112 |
+
# If we still don't have enough options, extract other entities from the context
|
| 113 |
+
if len(options) < n + 1:
|
| 114 |
+
doc = nlp(context)
|
| 115 |
+
entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
|
| 116 |
+
options.extend(entities[:n - len(options) + 1])
|
| 117 |
+
|
| 118 |
+
# If we still need more options, add some random words from the context
|
| 119 |
+
if len(options) < n + 1:
|
| 120 |
+
context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
|
| 121 |
+
options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
|
| 122 |
+
|
| 123 |
+
# Ensure we have the correct number of unique options
|
| 124 |
+
options = list(dict.fromkeys(options))[:n+1]
|
| 125 |
+
|
| 126 |
+
# Shuffle the options
|
| 127 |
+
random.shuffle(options)
|
| 128 |
+
|
| 129 |
+
return options
|
| 130 |
+
|
| 131 |
# Function to perform entity linking using Wikipedia API
|
| 132 |
@lru_cache(maxsize=128)
|
| 133 |
def entity_linking(keyword):
|
|
|
|
| 192 |
break
|
| 193 |
linked_entity = entity_linking(keyword)
|
| 194 |
question = generate_question(context, keyword, num_beams=num_beams)
|
| 195 |
+
options = generate_options(keyword, context)
|
| 196 |
+
|
| 197 |
st.write(f"**Context:** {context}")
|
| 198 |
st.write(f"**Answer:** {keyword}")
|
| 199 |
st.write(f"**Question:** {question}")
|
| 200 |
+
st.write(f"**Options:**")
|
| 201 |
+
for j, option in options:
|
| 202 |
+
st.write(f"{chr(65+j)}. {option}")
|
| 203 |
+
|
| 204 |
if linked_entity:
|
| 205 |
st.write(f"**Entity Link:** {linked_entity}")
|
| 206 |
st.write("---")
|
|
|
|
| 218 |
|
| 219 |
pdf_data = export_to_pdf(data)
|
| 220 |
st.download_button(label="PDF Format", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
|
| 221 |
+
if st.session_state.data is not None:
|
| 222 |
+
st.markdown("You can download the data from the sidebar.")
|
| 223 |
|
| 224 |
+
|
| 225 |
else:
|
| 226 |
st.write("Please enter some text to generate questions.")
|