Working Questgen implemented
Browse files- .gitignore +3 -1
- app.py +54 -19
- requirements.txt +4 -1
.gitignore
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
venv
|
| 2 |
.vscode
|
| 3 |
s2v_reddit_2015_md.tar.gz
|
| 4 |
-
__pycache__
|
|
|
|
|
|
|
|
|
| 1 |
venv
|
| 2 |
.vscode
|
| 3 |
s2v_reddit_2015_md.tar.gz
|
| 4 |
+
__pycache__
|
| 5 |
+
s2v_old
|
| 6 |
+
._s2v_old
|
app.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
import gradio as gr
|
| 3 |
from transformers import AutoTokenizer
|
|
@@ -5,6 +7,19 @@ import os
|
|
| 5 |
from pathlib import Path
|
| 6 |
from FastT5 import get_onnx_runtime_sessions, OnnxT5
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
trained_model_path = './t5_squad_v1/'
|
| 10 |
|
|
@@ -42,25 +57,10 @@ def get_question(sentence, answer, mdl, tknizer):
|
|
| 42 |
dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
|
| 43 |
|
| 44 |
Question = dec[0].replace("question:", "")
|
| 45 |
-
|
| 46 |
return Question
|
| 47 |
|
| 48 |
|
| 49 |
-
# context = "Ramsri loves to watch cricket during his free time"
|
| 50 |
-
# answer = "cricket"
|
| 51 |
-
context = "Donald Trump is an American media personality and businessman who served as the 45th president of the United States."
|
| 52 |
-
answer = "Donald Trump"
|
| 53 |
-
ques = get_question(context, answer, model, tokenizer)
|
| 54 |
-
print("question: ", ques)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
context = gr.components.Textbox(
|
| 58 |
-
lines=5, placeholder="Enter paragraph/context here...")
|
| 59 |
-
answer = gr.components.Textbox(
|
| 60 |
-
lines=3, placeholder="Enter answer/keyword here...")
|
| 61 |
-
question = gr.components.Textbox(type="text", label="Question")
|
| 62 |
-
|
| 63 |
-
|
| 64 |
def generate_question(context, answer):
|
| 65 |
start_time = time.time() # Record the start time
|
| 66 |
result = get_question(context, answer, model, tokenizer)
|
|
@@ -70,10 +70,45 @@ def generate_question(context, answer):
|
|
| 70 |
return result
|
| 71 |
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
iface = gr.Interface(
|
| 74 |
-
fn=
|
| 75 |
-
inputs=
|
| 76 |
-
outputs=
|
|
|
|
|
|
|
| 77 |
)
|
| 78 |
|
| 79 |
iface.launch()
|
|
|
|
| 1 |
+
import pke
|
| 2 |
+
from sense2vec import Sense2Vec
|
| 3 |
import time
|
| 4 |
import gradio as gr
|
| 5 |
from transformers import AutoTokenizer
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from FastT5 import get_onnx_runtime_sessions, OnnxT5
|
| 9 |
|
| 10 |
+
commands = [
|
| 11 |
+
"curl -LO https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz",
|
| 12 |
+
"tar -xvf s2v_reddit_2015_md.tar.gz",
|
| 13 |
+
]
|
| 14 |
+
|
| 15 |
+
for command in commands:
|
| 16 |
+
return_code = os.system(command)
|
| 17 |
+
if return_code == 0:
|
| 18 |
+
print(f"Command '{command}' executed successfully")
|
| 19 |
+
else:
|
| 20 |
+
print(f"Command '{command}' failed with return code {return_code}")
|
| 21 |
+
|
| 22 |
+
s2v = Sense2Vec().from_disk("s2v_old")
|
| 23 |
|
| 24 |
trained_model_path = './t5_squad_v1/'
|
| 25 |
|
|
|
|
| 57 |
dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]
|
| 58 |
|
| 59 |
Question = dec[0].replace("question:", "")
|
| 60 |
+
Question = Question.strip()
|
| 61 |
return Question
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def generate_question(context, answer):
|
| 65 |
start_time = time.time() # Record the start time
|
| 66 |
result = get_question(context, answer, model, tokenizer)
|
|
|
|
| 70 |
return result
|
| 71 |
|
| 72 |
|
| 73 |
+
def generate_mcq(context):
|
| 74 |
+
extractor = pke.unsupervised.TopicRank()
|
| 75 |
+
extractor.load_document(input=context, language='en')
|
| 76 |
+
extractor.candidate_selection(pos={"NOUN", "PROPN", "ADJ"})
|
| 77 |
+
extractor.candidate_weighting()
|
| 78 |
+
keyphrases = extractor.get_n_best(n=10)
|
| 79 |
+
|
| 80 |
+
results = []
|
| 81 |
+
|
| 82 |
+
for keyword, _ in keyphrases:
|
| 83 |
+
original_keyword = keyword
|
| 84 |
+
keyword = original_keyword.lower().replace(" ", "_")
|
| 85 |
+
sense = s2v.get_best_sense(keyword)
|
| 86 |
+
|
| 87 |
+
if sense is not None:
|
| 88 |
+
most_similar = s2v.most_similar(sense, n=2)
|
| 89 |
+
distractors = [word.split("|")[0].lower().replace(
|
| 90 |
+
"_", " ") for word, _ in most_similar]
|
| 91 |
+
|
| 92 |
+
question = generate_question(context, original_keyword)
|
| 93 |
+
|
| 94 |
+
result = {
|
| 95 |
+
"Question": question,
|
| 96 |
+
"Keyword": original_keyword,
|
| 97 |
+
"Distractor1": distractors[0],
|
| 98 |
+
"Distractor2": distractors[1]
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
results.append(result)
|
| 102 |
+
|
| 103 |
+
return results
|
| 104 |
+
|
| 105 |
+
|
| 106 |
iface = gr.Interface(
|
| 107 |
+
fn=generate_mcq,
|
| 108 |
+
inputs=gr.Textbox(label="Context", type='text'),
|
| 109 |
+
outputs=gr.JSON(value=list),
|
| 110 |
+
title="Questgen AI",
|
| 111 |
+
description="Enter a context to generate MCQs for keywords."
|
| 112 |
)
|
| 113 |
|
| 114 |
iface.launch()
|
requirements.txt
CHANGED
|
@@ -5,4 +5,7 @@ torch
|
|
| 5 |
transformers
|
| 6 |
sentencepiece
|
| 7 |
progress
|
| 8 |
-
psutil
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
transformers
|
| 6 |
sentencepiece
|
| 7 |
progress
|
| 8 |
+
psutil
|
| 9 |
+
sense2vec
|
| 10 |
+
git+https://github.com/boudinfl/pke.git
|
| 11 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl
|