Spaces:
Sleeping
Sleeping
gen question type incorrect word, antonym, synonym, stress
Browse files- src/enum/question.py +1 -0
- src/factories/gen_question/antonym_question.py +113 -0
- src/factories/gen_question/base.py +18 -3
- src/factories/gen_question/incorrect_word_question.py +73 -0
- src/factories/gen_question/pronunciation_question.py +141 -141
- src/factories/gen_question/question.py +15 -9
- src/factories/gen_question/stress_question.py +58 -43
- src/factories/gen_question/synonym_question.py +113 -0
- src/interfaces/question.py +19 -3
- src/routers/public/quesion.py +15 -2
- src/services/AI/sentence_generator.py +63 -0
- src/utils/word.py +175 -2
src/enum/question.py
CHANGED
|
@@ -5,3 +5,4 @@ class QuestionTypeEnum(str, Enum):
|
|
| 5 |
STRESS = "stress" # trong am
|
| 6 |
SYNONYM = "synonym" # tu dong nghia
|
| 7 |
ANTONYM = "antonym" # tu trai nghia
|
|
|
|
|
|
| 5 |
STRESS = "stress" # trong am
|
| 6 |
SYNONYM = "synonym" # tu dong nghia
|
| 7 |
ANTONYM = "antonym" # tu trai nghia
|
| 8 |
+
INCORRECT_WORD = "incorrect_word"
|
src/factories/gen_question/antonym_question.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
from src.factories.gen_question.base import Question, nltk_words
|
| 5 |
+
from src.enum.question import QuestionTypeEnum
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class AntonymsQuestion(Question):
|
| 9 |
+
"""
|
| 10 |
+
This class generates multiple-choice questions that ask the user
|
| 11 |
+
to select an antonym for a given word.
|
| 12 |
+
|
| 13 |
+
It uses dictionary data (from fetch_word_data) to retrieve
|
| 14 |
+
meanings and antonyms. If the input list is empty or invalid,
|
| 15 |
+
it falls back to randomly chosen words from a built-in word list (nltk_words).
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
|
| 19 |
+
if list_words is None:
|
| 20 |
+
list_words = []
|
| 21 |
+
|
| 22 |
+
result = []
|
| 23 |
+
list_unique_words = set(list_words)
|
| 24 |
+
|
| 25 |
+
# Internal helper function to get a valid question/answer pair
|
| 26 |
+
def get_question_and_answer():
|
| 27 |
+
"""
|
| 28 |
+
Randomly selects a word and finds one of its antonyms.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
tuple(str, str): question_word, antonym_answer
|
| 32 |
+
"""
|
| 33 |
+
# Try from provided list word
|
| 34 |
+
while list_unique_words:
|
| 35 |
+
source_word = random.sample(list(list_unique_words), 1)[0]
|
| 36 |
+
list_unique_words.remove(source_word)
|
| 37 |
+
antonym_word = self.get_antonym(source_word)
|
| 38 |
+
if antonym_word in list_unique_words:
|
| 39 |
+
list_unique_words.remove(antonym_word)
|
| 40 |
+
if antonym_word:
|
| 41 |
+
return source_word, antonym_word
|
| 42 |
+
|
| 43 |
+
# Fallback: use nltk_words
|
| 44 |
+
while True:
|
| 45 |
+
source_word = random.choice(nltk_words)
|
| 46 |
+
antonym_word = self.get_antonym(source_word)
|
| 47 |
+
if antonym_word:
|
| 48 |
+
return source_word, antonym_word
|
| 49 |
+
|
| 50 |
+
for _ in range(num_question):
|
| 51 |
+
question_word, correct_answer = get_question_and_answer()
|
| 52 |
+
|
| 53 |
+
choices = [correct_answer]
|
| 54 |
+
distractor_set = set()
|
| 55 |
+
|
| 56 |
+
while len(choices) < num_ans_per_question:
|
| 57 |
+
distractor_word = random.choice(nltk_words)
|
| 58 |
+
|
| 59 |
+
if (distractor_word.lower() != correct_answer.lower() and
|
| 60 |
+
distractor_word.lower() != question_word.lower() and
|
| 61 |
+
distractor_word.lower() not in distractor_set):
|
| 62 |
+
distractor_set.add(distractor_word)
|
| 63 |
+
choices.append(distractor_word)
|
| 64 |
+
|
| 65 |
+
random.shuffle(choices)
|
| 66 |
+
|
| 67 |
+
result.append({
|
| 68 |
+
"question": question_word,
|
| 69 |
+
"type": QuestionTypeEnum.ANTONYM,
|
| 70 |
+
"choices": choices,
|
| 71 |
+
"answer": choices.index(correct_answer),
|
| 72 |
+
"explain": [],
|
| 73 |
+
})
|
| 74 |
+
|
| 75 |
+
return result
|
| 76 |
+
|
| 77 |
+
def get_antonym(self, word: str):
|
| 78 |
+
"""
|
| 79 |
+
Retrieves a random antonym for the given word using dictionary API data.
|
| 80 |
+
|
| 81 |
+
It checks both the 'meanings.antonyms' and 'meanings.definitions.antonyms' fields.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
word (str): The input word to find an antonym for.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
str or None: An antonym if found, else None.
|
| 88 |
+
"""
|
| 89 |
+
data = self.fetch_word_data(word)
|
| 90 |
+
if not data:
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
meanings = data.get("meanings", [])
|
| 94 |
+
|
| 95 |
+
# Randomly search for antonyms in the meaning entries
|
| 96 |
+
while meanings:
|
| 97 |
+
meaning = random.sample(meanings, 1)[0]
|
| 98 |
+
|
| 99 |
+
# Try top-level antonyms
|
| 100 |
+
antonyms = meaning.get("antonyms", [])
|
| 101 |
+
|
| 102 |
+
# Also check antonyms inside definitions
|
| 103 |
+
if not antonyms:
|
| 104 |
+
definitions = meaning.get("definitions", [])
|
| 105 |
+
for definition in definitions:
|
| 106 |
+
antonyms.extend(definition.get("antonyms", []))
|
| 107 |
+
|
| 108 |
+
if antonyms:
|
| 109 |
+
return random.choice(antonyms)
|
| 110 |
+
|
| 111 |
+
meanings.remove(meaning)
|
| 112 |
+
|
| 113 |
+
return None
|
src/factories/gen_question/base.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
-
from typing import Set
|
| 3 |
|
|
|
|
| 4 |
import nltk
|
| 5 |
|
| 6 |
nltk.download('words')
|
|
@@ -10,7 +11,7 @@ nltk_words = words.words()
|
|
| 10 |
|
| 11 |
class Question(ABC):
|
| 12 |
@abstractmethod
|
| 13 |
-
def generate_questions(self,
|
| 14 |
pass
|
| 15 |
|
| 16 |
@staticmethod
|
|
@@ -19,4 +20,18 @@ class Question(ABC):
|
|
| 19 |
num_questions: int = 1,
|
| 20 |
num_ans_per_question: int = 4
|
| 21 |
) -> int:
|
| 22 |
-
return min(len_list_words//num_questions, num_ans_per_question)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from abc import ABC, abstractmethod
|
| 2 |
+
from typing import Set, Optional
|
| 3 |
|
| 4 |
+
import requests
|
| 5 |
import nltk
|
| 6 |
|
| 7 |
nltk.download('words')
|
|
|
|
| 11 |
|
| 12 |
class Question(ABC):
|
| 13 |
@abstractmethod
|
| 14 |
+
def generate_questions(self, list_words: Set[str], num_questions: int = 1, num_ans_per_question: int = 4):
|
| 15 |
pass
|
| 16 |
|
| 17 |
@staticmethod
|
|
|
|
| 20 |
num_questions: int = 1,
|
| 21 |
num_ans_per_question: int = 4
|
| 22 |
) -> int:
|
| 23 |
+
return min(len_list_words//num_questions, num_ans_per_question)
|
| 24 |
+
|
| 25 |
+
@staticmethod
|
| 26 |
+
def fetch_word_data(word: str) -> Optional[dict]:
|
| 27 |
+
"""API get data of word"""
|
| 28 |
+
try:
|
| 29 |
+
base_url = "https://api.dictionaryapi.dev/api/v2/entries/en/"
|
| 30 |
+
resp = requests.get(base_url + word)
|
| 31 |
+
if resp.status_code == 200:
|
| 32 |
+
data = resp.json()
|
| 33 |
+
return data[0]
|
| 34 |
+
else:
|
| 35 |
+
return None
|
| 36 |
+
except Exception as e:
|
| 37 |
+
return None
|
src/factories/gen_question/incorrect_word_question.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
from src.enum.question import QuestionTypeEnum
|
| 5 |
+
from src.factories.gen_question.base import Question, nltk_words
|
| 6 |
+
from src.services.AI.sentence_generator import SentenceGeneratorModel
|
| 7 |
+
from src.utils.word import transform_word
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class IncorrectWordQuestion(Question):
|
| 11 |
+
"""
|
| 12 |
+
This class generates multiple-choice questions that ask the user
|
| 13 |
+
to find the incorrect word in a sentence.
|
| 14 |
+
|
| 15 |
+
It selects a word from the list, generates a sentence using a simple pattern,
|
| 16 |
+
and injects a grammatically incorrect word into the sentence.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
| 20 |
+
if list_words is None:
|
| 21 |
+
list_words = []
|
| 22 |
+
|
| 23 |
+
result = []
|
| 24 |
+
list_unique_words = set(list_words)
|
| 25 |
+
|
| 26 |
+
# sentence_generator = SentenceGeneratorModel()
|
| 27 |
+
|
| 28 |
+
def choice_word_to_gen_sentence():
|
| 29 |
+
number_choice_word = random.randint(1, 4)
|
| 30 |
+
|
| 31 |
+
available_words = list(list_unique_words)
|
| 32 |
+
if number_choice_word <= len(available_words):
|
| 33 |
+
choice_word = random.sample(available_words, number_choice_word)
|
| 34 |
+
for w in choice_word:
|
| 35 |
+
list_unique_words.remove(w)
|
| 36 |
+
else:
|
| 37 |
+
# Lấy tất cả từ còn lại và thêm từ nltk_words
|
| 38 |
+
choice_word = available_words.copy()
|
| 39 |
+
remaining = number_choice_word - len(choice_word)
|
| 40 |
+
additional_words = random.sample(nltk_words, remaining)
|
| 41 |
+
choice_word += additional_words
|
| 42 |
+
list_unique_words.clear()
|
| 43 |
+
|
| 44 |
+
return choice_word
|
| 45 |
+
|
| 46 |
+
for _ in range(num_question):
|
| 47 |
+
list_choice_word = choice_word_to_gen_sentence()
|
| 48 |
+
|
| 49 |
+
# 1. Generate a simple sentence using a template
|
| 50 |
+
# sentence = sentence_generator.generate_sentence_from_words(list_choice_word, )
|
| 51 |
+
sentence = "The Conservatives and the Liberal Democrats are demanding the Labour government publish the evidence it submitted in the now-collapsed case against two people accused of spying for China."
|
| 52 |
+
# 2. Randomly choose a word to make incorrect in sequence
|
| 53 |
+
sentence_words = sentence.strip(".").split()
|
| 54 |
+
correct_word = random.sample(list(set(sentence_words)), 1)[0]
|
| 55 |
+
sentence_words.remove(correct_word)
|
| 56 |
+
|
| 57 |
+
# 3. Replace it with a grammatically incorrect word
|
| 58 |
+
incorrect_word = transform_word(correct_word)
|
| 59 |
+
modified_sentence = sentence.replace(correct_word, incorrect_word, 1)
|
| 60 |
+
|
| 61 |
+
# 4. Create choices (including incorrect_word and distractors)
|
| 62 |
+
choices = random.sample(list(set(sentence_words)), num_ans_per_question -1) + incorrect_word
|
| 63 |
+
|
| 64 |
+
random.shuffle(choices)
|
| 65 |
+
result.append({
|
| 66 |
+
"question": modified_sentence,
|
| 67 |
+
"type": QuestionTypeEnum.INCORRECT_WORD,
|
| 68 |
+
"choices": choices,
|
| 69 |
+
"answer": choices.index(incorrect_word),
|
| 70 |
+
"explain": ["Correct: {sequence}"],
|
| 71 |
+
})
|
| 72 |
+
|
| 73 |
+
return result
|
src/factories/gen_question/pronunciation_question.py
CHANGED
|
@@ -1,141 +1,141 @@
|
|
| 1 |
-
import random
|
| 2 |
-
from typing import Set, List
|
| 3 |
-
from src.factories.gen_question.base import Question
|
| 4 |
-
from src.enum.question import QuestionTypeEnum
|
| 5 |
-
from collections import defaultdict
|
| 6 |
-
import pronouncing
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
class PronunciationQuestion(Question):
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 1 |
+
# import random
|
| 2 |
+
# from typing import Set, List
|
| 3 |
+
# from src.factories.gen_question.base import Question
|
| 4 |
+
# from src.enum.question import QuestionTypeEnum
|
| 5 |
+
# from collections import defaultdict
|
| 6 |
+
# import pronouncing
|
| 7 |
+
#
|
| 8 |
+
#
|
| 9 |
+
# class PronunciationQuestion(Question):
|
| 10 |
+
# def generate_questions(self, list_words: List[str], num_questions: int = 1, num_ans_per_question: int = 4) :
|
| 11 |
+
# result = []
|
| 12 |
+
# list_unique_words = set(list_words)
|
| 13 |
+
#
|
| 14 |
+
# num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_questions, num_ans_per_question)
|
| 15 |
+
#
|
| 16 |
+
# for _ in range(num_questions) :
|
| 17 |
+
# main_word = None
|
| 18 |
+
# main_segment = None
|
| 19 |
+
# main_pron = None
|
| 20 |
+
# while main_word is None and len(list_words) > 0:
|
| 21 |
+
# main_word = random.choice(list_words)
|
| 22 |
+
# main_segment = self.extract_main_segment(main_word)
|
| 23 |
+
# main_pron, segment_pron = self.get_pronunciation_of_word_and_segment(main_word, main_segment)
|
| 24 |
+
# if main_pron is None or segment_pron is None :
|
| 25 |
+
# main_word = None
|
| 26 |
+
# list_words.remove(main_word)
|
| 27 |
+
#
|
| 28 |
+
# question = main_segment
|
| 29 |
+
# choices = [main_word]
|
| 30 |
+
# explain = [f'{main_word} : {main_pron}']
|
| 31 |
+
# similar_pron_words = []
|
| 32 |
+
# different_pron_word = None
|
| 33 |
+
#
|
| 34 |
+
# # random main_char trong main_word de lam tu so sanh phien am
|
| 35 |
+
# # tim trong list_words co tu nao chua main_char sao cho phiên âm của các từ được tìm thấy là 1 từ có phiên âm khác, còn các từ còn lại có phiên âm giống nhau
|
| 36 |
+
# # xoa cac tu duoc chon trong list_words
|
| 37 |
+
#
|
| 38 |
+
# result.append({
|
| 39 |
+
# "question": "",
|
| 40 |
+
# "type": QuestionTypeEnum.PRONUNCIATION,
|
| 41 |
+
# "choices": choices,
|
| 42 |
+
# "answer": `index_of_choice`,
|
| 43 |
+
# "explain":
|
| 44 |
+
# })
|
| 45 |
+
#
|
| 46 |
+
# def get_pronunciation_of_word_and_segment(self, word: str, segment: str):
|
| 47 |
+
# word_pron = None
|
| 48 |
+
# word_segment = None
|
| 49 |
+
# try:
|
| 50 |
+
# p = pronouncing.phones_for_word(word)
|
| 51 |
+
# if not p:
|
| 52 |
+
# return None, None
|
| 53 |
+
# word_pron = p[0]
|
| 54 |
+
# except Exception:
|
| 55 |
+
# return None, None
|
| 56 |
+
#
|
| 57 |
+
#
|
| 58 |
+
#
|
| 59 |
+
#
|
| 60 |
+
#
|
| 61 |
+
#
|
| 62 |
+
# def extract_main_segment(self, word: str) -> str:
|
| 63 |
+
# """
|
| 64 |
+
# Extracts a random phonetic segment (vowel, consonant, consonant cluster, diphthong, or common ending)
|
| 65 |
+
# from the word, excluding the last segment unless it's the only option.
|
| 66 |
+
# For example, 'pronunciation' can be segmented as:
|
| 67 |
+
# - Individual: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 't']
|
| 68 |
+
# - Grouped: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 'tion']
|
| 69 |
+
# For example, 'phone' can be segmented as:
|
| 70 |
+
# - Grouped: ['ph', 'o', 'n', 'e']
|
| 71 |
+
# """
|
| 72 |
+
# if not word or len(word) <= 2:
|
| 73 |
+
# return word
|
| 74 |
+
#
|
| 75 |
+
# word = word.lower()
|
| 76 |
+
#
|
| 77 |
+
# # Define phonetic components
|
| 78 |
+
# vowels = set('aeiou')
|
| 79 |
+
# consonant_clusters = ['th', 'ph', 'sh', 'ch', 'wh', 'gh', 'sch', 'tr', 'sh', 's', 't', 'p']
|
| 80 |
+
# diphthongs = ['ai', 'au', 'ei', 'eu', 'oi', 'ou', 'ui', 'ie', 'io', 'ea', 'ee', 'oa', 'oe']
|
| 81 |
+
# common_endings = ['tion', 'sion', 'ing', 'ed', 'es']
|
| 82 |
+
#
|
| 83 |
+
# # Step 1: Segment the word
|
| 84 |
+
# segments = []
|
| 85 |
+
# i = 0
|
| 86 |
+
# while i < len(word):
|
| 87 |
+
# # Check for common endings (e.g., 'tion')
|
| 88 |
+
# matched_ending = False
|
| 89 |
+
# for ending in common_endings:
|
| 90 |
+
# if word[i:].startswith(ending) and i + len(ending) <= len(word):
|
| 91 |
+
# segments.append(ending)
|
| 92 |
+
# i += len(ending)
|
| 93 |
+
# matched_ending = True
|
| 94 |
+
# break
|
| 95 |
+
# if matched_ending:
|
| 96 |
+
# continue
|
| 97 |
+
#
|
| 98 |
+
# # Check for consonant clusters (e.g., 'th', 'ph', 'sch')
|
| 99 |
+
# matched_cluster = False
|
| 100 |
+
# for cluster in consonant_clusters:
|
| 101 |
+
# if word[i:].startswith(cluster) and i + len(cluster) <= len(word):
|
| 102 |
+
# segments.append(cluster)
|
| 103 |
+
# i += len(cluster)
|
| 104 |
+
# matched_cluster = True
|
| 105 |
+
# break
|
| 106 |
+
# if matched_cluster:
|
| 107 |
+
# continue
|
| 108 |
+
#
|
| 109 |
+
# # Check for diphthongs (e.g., 'io')
|
| 110 |
+
# matched_diphthong = False
|
| 111 |
+
# for diph in diphthongs:
|
| 112 |
+
# if word[i:].startswith(diph) and i + len(diph) <= len(word):
|
| 113 |
+
# segments.append(diph)
|
| 114 |
+
# i += len(diph)
|
| 115 |
+
# matched_diphthong = True
|
| 116 |
+
# break
|
| 117 |
+
# if matched_diphthong:
|
| 118 |
+
# continue
|
| 119 |
+
#
|
| 120 |
+
# # Add single character (vowel or consonant)
|
| 121 |
+
# if word[i].isalpha():
|
| 122 |
+
# segments.append(word[i])
|
| 123 |
+
# i += 1
|
| 124 |
+
#
|
| 125 |
+
# # Step 2: Filter valid segments (exclude the last segment if possible)
|
| 126 |
+
# valid_segments = segments[:-1] if len(segments) > 1 else segments
|
| 127 |
+
#
|
| 128 |
+
# # Step 3: If no valid segments, fall back to single characters
|
| 129 |
+
# if not valid_segments:
|
| 130 |
+
# valid_positions = [i for i in range(len(word) - 1) if word[i].isalpha()]
|
| 131 |
+
# if not valid_positions:
|
| 132 |
+
# return ''
|
| 133 |
+
# pos = random.choice(valid_positions)
|
| 134 |
+
# return word[pos]
|
| 135 |
+
#
|
| 136 |
+
# # Step 4: Randomly choose a segment
|
| 137 |
+
# return random.choice(valid_segments)
|
| 138 |
+
#
|
| 139 |
+
# def cal_num_word_in_list_available_per_question(self, len_list_words: int, num_questions: int = 1, num_ans_per_question: int = 4) -> int:
|
| 140 |
+
# num_word_in_list_available_per_question = len_list_words // num_questions
|
| 141 |
+
# return num_word_in_list_available_per_question if num_word_in_list_available_per_question < num_ans_per_question else num_ans_per_question
|
src/factories/gen_question/question.py
CHANGED
|
@@ -1,15 +1,21 @@
|
|
| 1 |
from src.enum.question import QuestionTypeEnum
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from src.utils.exceptions import BadRequestException
|
| 3 |
|
| 4 |
|
| 5 |
-
def
|
| 6 |
-
if
|
| 7 |
-
return
|
| 8 |
-
elif
|
| 9 |
-
return
|
| 10 |
-
elif
|
| 11 |
-
return
|
| 12 |
-
elif
|
| 13 |
-
return
|
|
|
|
|
|
|
| 14 |
else:
|
| 15 |
raise BadRequestException('type_invalid')
|
|
|
|
| 1 |
from src.enum.question import QuestionTypeEnum
|
| 2 |
+
from src.factories.gen_question.antonym_question import AntonymsQuestion
|
| 3 |
+
from src.factories.gen_question.incorrect_word_question import IncorrectWordQuestion
|
| 4 |
+
from src.factories.gen_question.stress_question import StressQuestion
|
| 5 |
+
from src.factories.gen_question.synonym_question import SynonymsQuestion
|
| 6 |
from src.utils.exceptions import BadRequestException
|
| 7 |
|
| 8 |
|
| 9 |
+
def create_question_instance(question_type: QuestionTypeEnum) :
|
| 10 |
+
if question_type == QuestionTypeEnum.PRONUNCIATION :
|
| 11 |
+
return StressQuestion()
|
| 12 |
+
elif question_type == QuestionTypeEnum.STRESS :
|
| 13 |
+
return StressQuestion()
|
| 14 |
+
elif question_type == QuestionTypeEnum.SYNONYM :
|
| 15 |
+
return SynonymsQuestion()
|
| 16 |
+
elif question_type == QuestionTypeEnum.ANTONYM :
|
| 17 |
+
return AntonymsQuestion()
|
| 18 |
+
elif question_type == QuestionTypeEnum.INCORRECT_WORD:
|
| 19 |
+
return IncorrectWordQuestion()
|
| 20 |
else:
|
| 21 |
raise BadRequestException('type_invalid')
|
src/factories/gen_question/stress_question.py
CHANGED
|
@@ -1,18 +1,23 @@
|
|
| 1 |
-
import random
|
| 2 |
-
from collections import defaultdict
|
| 3 |
from typing import List
|
|
|
|
|
|
|
|
|
|
| 4 |
from src.factories.gen_question.base import Question, nltk_words
|
| 5 |
from src.enum.question import QuestionTypeEnum
|
| 6 |
-
|
| 7 |
from src.utils.number import rand_exclude
|
| 8 |
from src.utils.word import get_stress_pattern, convert_word_to_ipa
|
| 9 |
|
| 10 |
|
| 11 |
class StressQuestion(Question):
|
| 12 |
-
def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
|
|
|
|
|
|
|
|
|
|
| 13 |
result = []
|
| 14 |
|
| 15 |
-
#
|
|
|
|
|
|
|
| 16 |
stress_groups = defaultdict(list)
|
| 17 |
for word in list_words:
|
| 18 |
stress = get_stress_pattern(word)
|
|
@@ -20,75 +25,85 @@ class StressQuestion(Question):
|
|
| 20 |
if ipa is None or stress is None:
|
| 21 |
continue
|
| 22 |
stress_groups[stress].append({"word": word, "ipa": ipa})
|
| 23 |
-
num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
|
| 24 |
|
| 25 |
# create
|
| 26 |
-
def choice_random_words_in_stress_group(stress_group_key):
|
| 27 |
stress_group = stress_groups[stress_group_key]
|
| 28 |
item = random.choice(stress_group)
|
| 29 |
-
stress_group.remove(item)
|
| 30 |
return item["word"], item["ipa"]
|
| 31 |
|
| 32 |
-
for _ in range(num_question):
|
| 33 |
choices = []
|
| 34 |
explain = []
|
| 35 |
list_stress_group_keys = list(stress_groups.keys())
|
| 36 |
|
| 37 |
-
#
|
| 38 |
-
if
|
| 39 |
different_stress = random.choice(list_stress_group_keys)
|
| 40 |
list_stress_group_keys.remove(different_stress)
|
| 41 |
-
|
|
|
|
| 42 |
else:
|
| 43 |
different_stress = random.randint(1, 3)
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
choices.append(different_word)
|
| 47 |
-
explain.append(f'{different_word} ({different_ipa})')
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
if
|
| 51 |
-
# if in list word exist more two stresses, get choice in list word
|
| 52 |
common_stress = random.choice(list_stress_group_keys)
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
common_word, common_ipa = choice_random_words_in_stress_group(common_stress)
|
| 57 |
-
|
| 58 |
choices.append(common_word)
|
| 59 |
-
explain.append(f'{common_word} ({common_ipa})')
|
| 60 |
else:
|
| 61 |
common_stress = rand_exclude(1, 3, different_stress)
|
| 62 |
|
| 63 |
-
#
|
| 64 |
while len(choices) < num_ans_per_question:
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
choices.append(common_word)
|
| 68 |
-
explain.append(f'{common_word} ({
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
return result
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
word = random.choice(nltk_words)
|
| 86 |
word_ipa = convert_word_to_ipa(word)
|
| 87 |
word_stress = get_stress_pattern(word)
|
| 88 |
if word_ipa is None or word_stress is None:
|
| 89 |
-
|
| 90 |
continue
|
| 91 |
if word_stress == stress:
|
| 92 |
return word, word_ipa
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import List
|
| 2 |
+
from collections import defaultdict
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
from src.factories.gen_question.base import Question, nltk_words
|
| 6 |
from src.enum.question import QuestionTypeEnum
|
|
|
|
| 7 |
from src.utils.number import rand_exclude
|
| 8 |
from src.utils.word import get_stress_pattern, convert_word_to_ipa
|
| 9 |
|
| 10 |
|
| 11 |
class StressQuestion(Question):
|
| 12 |
+
def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
|
| 13 |
+
if list_words is None:
|
| 14 |
+
list_words = []
|
| 15 |
+
|
| 16 |
result = []
|
| 17 |
|
| 18 |
+
# Process data: group words by stress pattern
|
| 19 |
+
num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
|
| 20 |
+
|
| 21 |
stress_groups = defaultdict(list)
|
| 22 |
for word in list_words:
|
| 23 |
stress = get_stress_pattern(word)
|
|
|
|
| 25 |
if ipa is None or stress is None:
|
| 26 |
continue
|
| 27 |
stress_groups[stress].append({"word": word, "ipa": ipa})
|
|
|
|
| 28 |
|
| 29 |
# create
|
| 30 |
+
def choice_random_words_in_stress_group(stress_group_key: int):
|
| 31 |
stress_group = stress_groups[stress_group_key]
|
| 32 |
item = random.choice(stress_group)
|
| 33 |
+
stress_group.remove(item) # Remove to avoid reuse within the same question
|
| 34 |
return item["word"], item["ipa"]
|
| 35 |
|
| 36 |
+
for _ in range(num_question):
|
| 37 |
choices = []
|
| 38 |
explain = []
|
| 39 |
list_stress_group_keys = list(stress_groups.keys())
|
| 40 |
|
| 41 |
+
# Get word with different stress
|
| 42 |
+
if list_stress_group_keys:
|
| 43 |
different_stress = random.choice(list_stress_group_keys)
|
| 44 |
list_stress_group_keys.remove(different_stress)
|
| 45 |
+
different_word_ipa = choice_random_words_in_stress_group(different_stress)
|
| 46 |
+
different_word, different_ipa = different_word_ipa
|
| 47 |
else:
|
| 48 |
different_stress = random.randint(1, 3)
|
| 49 |
+
different_word_ipa = self.get_random_word_and_ipa_by_stress(different_stress)
|
| 50 |
+
if different_word_ipa is None:
|
| 51 |
+
continue # Skip this question if no valid word is found
|
| 52 |
+
different_word, different_ipa = different_word_ipa
|
| 53 |
|
| 54 |
choices.append(different_word)
|
| 55 |
+
explain.append(f'{different_word} ({different_ipa}, stress pattern: {different_stress})')
|
| 56 |
|
| 57 |
+
# Get words with common stress
|
| 58 |
+
if list_stress_group_keys:
|
|
|
|
| 59 |
common_stress = random.choice(list_stress_group_keys)
|
| 60 |
+
while len(choices) < num_word_in_list_per_question and stress_groups[common_stress]:
|
| 61 |
+
common_word_ipa = choice_random_words_in_stress_group(common_stress)
|
| 62 |
+
common_word, common_ipa = common_word_ipa
|
|
|
|
|
|
|
| 63 |
choices.append(common_word)
|
| 64 |
+
explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
|
| 65 |
else:
|
| 66 |
common_stress = rand_exclude(1, 3, different_stress)
|
| 67 |
|
| 68 |
+
# Fill remaining choices from nltk_words if needed
|
| 69 |
while len(choices) < num_ans_per_question:
|
| 70 |
+
common_word_ipa = self.get_random_word_and_ipa_by_stress(common_stress)
|
| 71 |
+
if common_word_ipa is None:
|
| 72 |
+
break # Skip adding if no valid word is found
|
| 73 |
+
common_word, common_ipa = common_word_ipa
|
| 74 |
choices.append(common_word)
|
| 75 |
+
explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
|
| 76 |
+
|
| 77 |
+
# Only add the question if we have enough choices
|
| 78 |
+
print(choices, len(choices))
|
| 79 |
+
if len(choices) == num_ans_per_question:
|
| 80 |
+
random.shuffle(choices)
|
| 81 |
+
result.append({
|
| 82 |
+
"question": "",
|
| 83 |
+
"type": QuestionTypeEnum.STRESS,
|
| 84 |
+
"choices": choices,
|
| 85 |
+
"answer": choices.index(different_word),
|
| 86 |
+
"explain": explain,
|
| 87 |
+
})
|
| 88 |
+
|
| 89 |
+
print(result)
|
| 90 |
|
| 91 |
return result
|
| 92 |
|
| 93 |
+
@staticmethod
|
| 94 |
+
def get_random_word_and_ipa_by_stress(stress: int):
|
| 95 |
+
max_attempts = 10000
|
| 96 |
+
attempts = 0
|
| 97 |
+
while attempts < max_attempts:
|
| 98 |
+
if not nltk_words:
|
| 99 |
+
return None
|
| 100 |
word = random.choice(nltk_words)
|
| 101 |
word_ipa = convert_word_to_ipa(word)
|
| 102 |
word_stress = get_stress_pattern(word)
|
| 103 |
if word_ipa is None or word_stress is None:
|
| 104 |
+
attempts += 1
|
| 105 |
continue
|
| 106 |
if word_stress == stress:
|
| 107 |
return word, word_ipa
|
| 108 |
+
attempts += 1
|
| 109 |
+
return None # Return None if no word is found
|
src/factories/gen_question/synonym_question.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
from src.factories.gen_question.base import Question, nltk_words
|
| 5 |
+
from src.enum.question import QuestionTypeEnum
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class SynonymsQuestion(Question):
|
| 9 |
+
"""
|
| 10 |
+
This class generates multiple-choice questions that ask the user
|
| 11 |
+
to select a synonym for a given word.
|
| 12 |
+
|
| 13 |
+
It uses dictionary data (from fetch_word_data) to retrieve
|
| 14 |
+
meanings and synonyms. If the input list is empty or invalid,
|
| 15 |
+
it falls back to randomly chosen words from a built-in word list (nltk_words).
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
|
| 19 |
+
if list_words is None:
|
| 20 |
+
list_words = []
|
| 21 |
+
|
| 22 |
+
result = []
|
| 23 |
+
list_unique_words = set(list_words)
|
| 24 |
+
|
| 25 |
+
# Internal helper function to get a valid question/answer pair
|
| 26 |
+
def get_question_and_answer():
|
| 27 |
+
"""
|
| 28 |
+
Randomly selects a word and finds one of its synonyms.
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
tuple(str, str): question_word, synonym_answer
|
| 32 |
+
"""
|
| 33 |
+
# Try from provided list word
|
| 34 |
+
while list_unique_words:
|
| 35 |
+
source_word = random.sample(list(list_unique_words), 1)[0]
|
| 36 |
+
list_unique_words.remove(source_word)
|
| 37 |
+
synonym_word = self.get_synonym(source_word)
|
| 38 |
+
if synonym_word in list_unique_words:
|
| 39 |
+
list_unique_words.remove(source_word)
|
| 40 |
+
if synonym_word:
|
| 41 |
+
return source_word, synonym_word
|
| 42 |
+
|
| 43 |
+
# Fallback: use nltk_words
|
| 44 |
+
while True:
|
| 45 |
+
source_word = random.choice(nltk_words)
|
| 46 |
+
synonym_word = self.get_synonym(source_word)
|
| 47 |
+
if synonym_word:
|
| 48 |
+
return source_word, synonym_word
|
| 49 |
+
|
| 50 |
+
for _ in range(num_question):
|
| 51 |
+
question_word, correct_answer = get_question_and_answer()
|
| 52 |
+
|
| 53 |
+
choices = [correct_answer]
|
| 54 |
+
distractor_set = set()
|
| 55 |
+
|
| 56 |
+
while len(choices) < num_ans_per_question:
|
| 57 |
+
distractor_word = random.choice(nltk_words)
|
| 58 |
+
|
| 59 |
+
if (distractor_word.lower() != correct_answer.lower() and
|
| 60 |
+
distractor_word.lower() != question_word.lower() and
|
| 61 |
+
distractor_word.lower() not in distractor_set):
|
| 62 |
+
distractor_set.add(distractor_word)
|
| 63 |
+
choices.append(distractor_word)
|
| 64 |
+
|
| 65 |
+
random.shuffle(choices)
|
| 66 |
+
|
| 67 |
+
result.append({
|
| 68 |
+
"question": question_word,
|
| 69 |
+
"type": QuestionTypeEnum.SYNONYM,
|
| 70 |
+
"choices": choices,
|
| 71 |
+
"answer": choices.index(correct_answer),
|
| 72 |
+
"explain": [],
|
| 73 |
+
})
|
| 74 |
+
|
| 75 |
+
return result
|
| 76 |
+
|
| 77 |
+
def get_synonym(self, word: str):
|
| 78 |
+
"""
|
| 79 |
+
Retrieves a random synonym for the given word using dictionary API data.
|
| 80 |
+
|
| 81 |
+
It checks both the 'meanings.synonyms' and 'meanings.definitions.synonyms' fields.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
word (str): The input word to find a synonym for.
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
str or None: A synonym if found, else None.
|
| 88 |
+
"""
|
| 89 |
+
data = self.fetch_word_data(word)
|
| 90 |
+
if not data:
|
| 91 |
+
return None
|
| 92 |
+
|
| 93 |
+
meanings = data.get("meanings", [])
|
| 94 |
+
|
| 95 |
+
# Randomly search for synonyms in the meaning entries
|
| 96 |
+
while meanings:
|
| 97 |
+
meaning = random.sample(meanings, 1)[0]
|
| 98 |
+
|
| 99 |
+
# Try top-level synonyms
|
| 100 |
+
synonyms = meaning.get("synonyms", [])
|
| 101 |
+
|
| 102 |
+
# Also check synonyms inside definitions
|
| 103 |
+
if not synonyms:
|
| 104 |
+
definitions = meaning.get("definitions", [])
|
| 105 |
+
for definition in definitions:
|
| 106 |
+
synonyms.extend(definition.get("synonyms", []))
|
| 107 |
+
|
| 108 |
+
if synonyms:
|
| 109 |
+
return random.choice(synonyms)
|
| 110 |
+
|
| 111 |
+
meanings.remove(meaning)
|
| 112 |
+
|
| 113 |
+
return None
|
src/interfaces/question.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
| 1 |
-
from pydantic import BaseModel
|
| 2 |
-
from typing import Optional
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
class ModelInput(BaseModel):
|
| 5 |
"""General request model structure for flutter incoming req."""
|
|
@@ -9,4 +12,17 @@ class ModelInput(BaseModel):
|
|
| 9 |
|
| 10 |
class ICQuestion(BaseModel):
|
| 11 |
context: str
|
| 12 |
-
name: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field, field_validator
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
|
| 4 |
+
from src.enum.question import QuestionTypeEnum
|
| 5 |
+
|
| 6 |
|
| 7 |
class ModelInput(BaseModel):
|
| 8 |
"""General request model structure for flutter incoming req."""
|
|
|
|
| 12 |
|
| 13 |
class ICQuestion(BaseModel):
|
| 14 |
context: str
|
| 15 |
+
name: str
|
| 16 |
+
|
| 17 |
+
class ICreateQuestion(BaseModel):
|
| 18 |
+
question_type: QuestionTypeEnum
|
| 19 |
+
list_words: List[str]
|
| 20 |
+
num_ans_per_question: int = Field(..., ge=2, le=10)
|
| 21 |
+
num_question: int = Field(..., ge=1, le=10)
|
| 22 |
+
|
| 23 |
+
@field_validator('list_words')
|
| 24 |
+
def check_single_word(cls, value):
|
| 25 |
+
for word in value:
|
| 26 |
+
if " " in word:
|
| 27 |
+
raise ValueError("list_words_just_includes_single_word")
|
| 28 |
+
return value
|
src/routers/public/quesion.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
from fastapi import APIRouter, Request
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
|
|
|
|
| 4 |
from src.utils.response import res_ok
|
| 5 |
from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
|
| 6 |
-
from src.interfaces.question import ModelInput, ICQuestion
|
| 7 |
from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
|
| 8 |
from src.services.AI.question_generator import QuestionGenerator
|
| 9 |
from src.services.AI.false_ans_generator import FalseAnswerGenerator
|
|
@@ -11,8 +12,20 @@ from src.services.AI.keyword_extractor import KeywordExtractor
|
|
| 11 |
|
| 12 |
route = APIRouter(prefix="/question", tags=["Question"])
|
| 13 |
print("Including question routes...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
@route.post('/sentence')
|
| 15 |
-
async def generate_questions_from_sentence(
|
| 16 |
"""Process user request
|
| 17 |
|
| 18 |
Args:
|
|
|
|
| 1 |
from fastapi import APIRouter, Request
|
| 2 |
from fastapi.responses import JSONResponse
|
| 3 |
|
| 4 |
+
from src.factories.gen_question.question import create_question_instance
|
| 5 |
from src.utils.response import res_ok
|
| 6 |
from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
|
| 7 |
+
from src.interfaces.question import ModelInput, ICQuestion, ICreateQuestion
|
| 8 |
from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
|
| 9 |
from src.services.AI.question_generator import QuestionGenerator
|
| 10 |
from src.services.AI.false_ans_generator import FalseAnswerGenerator
|
|
|
|
| 12 |
|
| 13 |
route = APIRouter(prefix="/question", tags=["Question"])
|
| 14 |
print("Including question routes...")
|
| 15 |
+
|
| 16 |
+
@route.post('/')
|
| 17 |
+
async def generate_question(body: ICreateQuestion):
|
| 18 |
+
question = create_question_instance(body.question_type)
|
| 19 |
+
list_questions = question.generate_questions(
|
| 20 |
+
list_words=body.list_words,
|
| 21 |
+
num_question=body.num_question,
|
| 22 |
+
num_ans_per_question=body.num_ans_per_question,
|
| 23 |
+
)
|
| 24 |
+
print(list_questions)
|
| 25 |
+
return JSONResponse(status_code=200, content=res_ok(list_questions))
|
| 26 |
+
|
| 27 |
@route.post('/sentence')
|
| 28 |
+
async def generate_questions_from_sentence(bßody: ICQuestion, request: Request):
|
| 29 |
"""Process user request
|
| 30 |
|
| 31 |
Args:
|
src/services/AI/sentence_generator.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .base import Model
|
| 2 |
+
from typing import List
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class SentenceGeneratorModel(Model):
|
| 7 |
+
"""
|
| 8 |
+
A wrapper around the base Model class to generate English sentences
|
| 9 |
+
that include given vocabulary words.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
_instance = None
|
| 13 |
+
|
| 14 |
+
def __new__(cls, model_name: str = "google/flan-t5-base"):
|
| 15 |
+
if cls._instance is None:
|
| 16 |
+
cls._instance = super(SentenceGeneratorModel, cls).__new__(cls)
|
| 17 |
+
cls._instance._initialized = False
|
| 18 |
+
return cls._instance
|
| 19 |
+
|
| 20 |
+
def __init__(self, model_name: str = "google/flan-t5-base"):
|
| 21 |
+
if self._initialized:
|
| 22 |
+
return
|
| 23 |
+
super().__init__(model_name)
|
| 24 |
+
self._initialized = True
|
| 25 |
+
|
| 26 |
+
def generate_sentence_from_words(
|
| 27 |
+
self,
|
| 28 |
+
vocab_list: List[str],
|
| 29 |
+
min_words: int = 2,
|
| 30 |
+
max_words: int = 5,
|
| 31 |
+
model_max_length: int = 64,
|
| 32 |
+
token_max_length: int = 64
|
| 33 |
+
) -> str:
|
| 34 |
+
"""
|
| 35 |
+
Generate a sentence that uses the given vocabulary words.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
vocab_list (List[str]): The list of available vocabulary words.
|
| 39 |
+
min_words (int): Minimum number of words to include in sentence.
|
| 40 |
+
max_words (int): Maximum number of words to include.
|
| 41 |
+
model_max_length (int): Max length of generated sentence.
|
| 42 |
+
token_max_length (int): Max length for tokenization.
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
str: A generated sentence using selected words.
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
if not vocab_list:
|
| 49 |
+
raise ValueError("vocab_list cannot be empty.")
|
| 50 |
+
|
| 51 |
+
selected_words = random.sample(
|
| 52 |
+
vocab_list, k=min(len(vocab_list), random.randint(min_words, max_words))
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
prompt = f"Write an English sentence using the following words: {', '.join(selected_words)}."
|
| 56 |
+
|
| 57 |
+
sentence = self.inference(
|
| 58 |
+
model_max_length=model_max_length,
|
| 59 |
+
token_max_length=token_max_length,
|
| 60 |
+
task=prompt
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
return sentence
|
src/utils/word.py
CHANGED
|
@@ -27,11 +27,184 @@ def get_stress_pattern(word):
|
|
| 27 |
0 indicates no stress.
|
| 28 |
"""
|
| 29 |
list_pattern = pronouncing.stresses_for_word(word)
|
|
|
|
|
|
|
| 30 |
pattern = list_pattern[0]
|
|
|
|
|
|
|
| 31 |
try:
|
| 32 |
-
index = pattern.index('1')
|
| 33 |
return index
|
| 34 |
-
except ValueError
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
return None
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
|
|
|
| 27 |
0 indicates no stress.
|
| 28 |
"""
|
| 29 |
list_pattern = pronouncing.stresses_for_word(word)
|
| 30 |
+
if list_pattern is None or list_pattern == []:
|
| 31 |
+
return None
|
| 32 |
pattern = list_pattern[0]
|
| 33 |
+
if len(pattern) == 1:
|
| 34 |
+
return None
|
| 35 |
try:
|
| 36 |
+
index = pattern.index('1') + 1
|
| 37 |
return index
|
| 38 |
+
except ValueError:
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
import random
|
| 43 |
+
from typing import Optional
|
| 44 |
+
import nltk
|
| 45 |
+
nltk.download('wordnet')
|
| 46 |
+
from nltk.corpus import wordnet as wn
|
| 47 |
+
|
| 48 |
+
def transform_word(word: str) -> Optional[str]:
|
| 49 |
+
"""
|
| 50 |
+
Transform a word into another word by changing its type, tense, article-related form,
|
| 51 |
+
or meaning to create an incorrect answer for a 'find the wrong word' question.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
word (str): The input word to transform.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
Optional[str]: The transformed word, or None if no transformation is possible.
|
| 58 |
+
"""
|
| 59 |
+
# List of possible transformations
|
| 60 |
+
transformation_methods = [
|
| 61 |
+
transform_preposition, # Handle prepositions
|
| 62 |
+
transform_word_type, # Change word type (e.g., noun to verb)
|
| 63 |
+
transform_tense, # Change verb tense
|
| 64 |
+
transform_article, # Change article-related form
|
| 65 |
+
transform_meaning # Change to a word with different meaning
|
| 66 |
+
]
|
| 67 |
+
|
| 68 |
+
# Randomly select a transformation method
|
| 69 |
+
random.shuffle(transformation_methods)
|
| 70 |
+
for method in transformation_methods:
|
| 71 |
+
transformed = method(word)
|
| 72 |
+
if transformed and transformed != word:
|
| 73 |
+
return transformed
|
| 74 |
+
|
| 75 |
+
# Fallback: return a random word from nltk_words if no transformation works
|
| 76 |
+
try:
|
| 77 |
+
from src.factories.gen_question.base import nltk_words
|
| 78 |
+
return random.choice(nltk_words) if nltk_words else None
|
| 79 |
+
except ImportError:
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def transform_preposition(word: str) -> Optional[str]:
|
| 84 |
+
"""
|
| 85 |
+
Transform a preposition into another preposition that is likely to be incorrect in context.
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
word (str): The input word to check and transform.
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
Optional[str]: A different preposition, or None if the input is not a preposition.
|
| 92 |
+
"""
|
| 93 |
+
# Common prepositions and their common incorrect substitutions
|
| 94 |
+
preposition_map = {
|
| 95 |
+
'in': ['on', 'at', 'to'],
|
| 96 |
+
'on': ['in', 'at', 'over'],
|
| 97 |
+
'at': ['in', 'on', 'by'],
|
| 98 |
+
'to': ['in', 'at', 'for'],
|
| 99 |
+
'for': ['to', 'with', 'in'],
|
| 100 |
+
'with': ['for', 'by', 'in'],
|
| 101 |
+
'by': ['with', 'at', 'on'],
|
| 102 |
+
'from': ['to', 'in', 'at'],
|
| 103 |
+
'of': ['for', 'in', 'on']
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
word_lower = word.lower()
|
| 107 |
+
if word_lower in preposition_map:
|
| 108 |
+
return random.sample(preposition_map[word_lower], 1)[0]
|
| 109 |
+
return None
|
| 110 |
+
|
| 111 |
+
def transform_word_type(word: str) -> Optional[str]:
|
| 112 |
+
"""
|
| 113 |
+
Transform a word by changing its part of speech (e.g., noun to verb).
|
| 114 |
+
Uses WordNet to find related words with different POS.
|
| 115 |
+
"""
|
| 116 |
+
pos_map = {
|
| 117 |
+
'n': 'v', # Noun to verb
|
| 118 |
+
'v': 'n', # Verb to noun
|
| 119 |
+
'a': 'r', # Adjective to adverb
|
| 120 |
+
'r': 'a' # Adverb to adjective
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
# Get part of speech for the word
|
| 124 |
+
synsets = wn.synsets(word)
|
| 125 |
+
if not synsets:
|
| 126 |
+
return None
|
| 127 |
+
|
| 128 |
+
current_pos = synsets[0].pos() # Get the first synset's POS
|
| 129 |
+
target_pos = pos_map.get(current_pos)
|
| 130 |
+
if not target_pos:
|
| 131 |
+
return None
|
| 132 |
+
|
| 133 |
+
# Find a synset with the target POS
|
| 134 |
+
for synset in wn.synsets(word):
|
| 135 |
+
if synset.pos() == target_pos:
|
| 136 |
+
return synset.lemmas()[0].name().replace('_', ' ')
|
| 137 |
+
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def transform_tense(word: str) -> Optional[str]:
|
| 142 |
+
"""
|
| 143 |
+
Transform a verb by changing its tense (e.g., present to past).
|
| 144 |
+
Uses simple rules for common verb forms.
|
| 145 |
+
"""
|
| 146 |
+
# Simple past tense rules for regular verbs
|
| 147 |
+
if word.endswith('e'):
|
| 148 |
+
return word + 'd' # e.g., love -> loved
|
| 149 |
+
elif word.endswith('y') and word[-2] not in 'aeiou':
|
| 150 |
+
return word[:-1] + 'ied' # e.g., study -> studied
|
| 151 |
+
elif word[-1] not in 'aeiou' and word[-2] not in 'aeiou':
|
| 152 |
+
return word + 'ed' # e.g., walk -> walked
|
| 153 |
+
else:
|
| 154 |
+
# Irregular verbs (small hardcoded list for simplicity)
|
| 155 |
+
irregular = {
|
| 156 |
+
'run': 'ran',
|
| 157 |
+
'go': 'went',
|
| 158 |
+
'see': 'saw',
|
| 159 |
+
'write': 'wrote',
|
| 160 |
+
'is': 'was',
|
| 161 |
+
'are': 'were'
|
| 162 |
+
}
|
| 163 |
+
return irregular.get(word, None)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def transform_article(word: str) -> Optional[str]:
|
| 167 |
+
"""
|
| 168 |
+
Transform a word related to articles (e.g., 'a' to 'an' or remove article).
|
| 169 |
+
For nouns, return a different noun that might cause article-related errors.
|
| 170 |
+
"""
|
| 171 |
+
if word.lower() in ['a', 'an']:
|
| 172 |
+
return 'an' if word.lower() == 'a' else 'a'
|
| 173 |
+
|
| 174 |
+
# For nouns, find another noun that might cause article confusion
|
| 175 |
+
synsets = wn.synsets(word, pos='n')
|
| 176 |
+
if not synsets:
|
| 177 |
return None
|
| 178 |
|
| 179 |
+
# Pick a random synonym or related noun
|
| 180 |
+
synonyms = []
|
| 181 |
+
for synset in synsets:
|
| 182 |
+
for lemma in synset.lemmas():
|
| 183 |
+
synonym = lemma.name().replace('_', ' ')
|
| 184 |
+
if synonym != word:
|
| 185 |
+
synonyms.append(synonym)
|
| 186 |
+
|
| 187 |
+
return random.sample(synonyms, 1)[0] if synonyms else None
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def transform_meaning(word: str) -> Optional[str]:
|
| 191 |
+
"""
|
| 192 |
+
Transform a word to another with a different meaning (e.g., homophone or unrelated word).
|
| 193 |
+
"""
|
| 194 |
+
# Find a word with different meaning but the same POS
|
| 195 |
+
synsets = wn.synsets(word)
|
| 196 |
+
if not synsets:
|
| 197 |
+
return None
|
| 198 |
+
|
| 199 |
+
current_pos = synsets[0].pos()
|
| 200 |
+
# Get all words with the same POS but different synsets
|
| 201 |
+
different_words = []
|
| 202 |
+
for synset in wn.all_synsets(pos=current_pos):
|
| 203 |
+
for lemma in synset.lemmas():
|
| 204 |
+
candidate = lemma.name().replace('_', ' ')
|
| 205 |
+
if candidate != word and candidate not in different_words:
|
| 206 |
+
different_words.append(candidate)
|
| 207 |
+
|
| 208 |
+
return random.sample(different_words, 1)[0] if different_words else None
|
| 209 |
+
|
| 210 |
|