Spaces:

kbl2810
/

gen-question

Sleeping

App Files Files Community

PhamHelga commited on Oct 15, 2025

Commit

342fe87

1 Parent(s): cdfdd8d

gen question type incorrect word, antonym, synonym, stress

Browse files

Files changed (12) hide show

src/enum/question.py +1 -0
src/factories/gen_question/antonym_question.py +113 -0
src/factories/gen_question/base.py +18 -3
src/factories/gen_question/incorrect_word_question.py +73 -0
src/factories/gen_question/pronunciation_question.py +141 -141
src/factories/gen_question/question.py +15 -9
src/factories/gen_question/stress_question.py +58 -43
src/factories/gen_question/synonym_question.py +113 -0
src/interfaces/question.py +19 -3
src/routers/public/quesion.py +15 -2
src/services/AI/sentence_generator.py +63 -0
src/utils/word.py +175 -2

src/enum/question.py CHANGED Viewed

@@ -5,3 +5,4 @@ class QuestionTypeEnum(str, Enum):
     STRESS = "stress" # trong am
     SYNONYM = "synonym" # tu dong nghia
     ANTONYM = "antonym" # tu trai nghia

     STRESS = "stress" # trong am
     SYNONYM = "synonym" # tu dong nghia
     ANTONYM = "antonym" # tu trai nghia
+    INCORRECT_WORD = "incorrect_word"

src/factories/gen_question/antonym_question.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import List
+import random
+from src.factories.gen_question.base import Question, nltk_words
+from src.enum.question import QuestionTypeEnum
+class AntonymsQuestion(Question):
+    """
+    This class generates multiple-choice questions that ask the user
+    to select an antonym for a given word.
+    It uses dictionary data (from fetch_word_data) to retrieve
+    meanings and antonyms. If the input list is empty or invalid,
+    it falls back to randomly chosen words from a built-in word list (nltk_words).
+    """
+    def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
+        if list_words is None:
+            list_words = []
+        result = []
+        list_unique_words = set(list_words)
+        # Internal helper function to get a valid question/answer pair
+        def get_question_and_answer():
+            """
+            Randomly selects a word and finds one of its antonyms.
+            Returns:
+                tuple(str, str): question_word, antonym_answer
+            """
+            # Try from provided list word
+            while list_unique_words:
+                source_word = random.sample(list(list_unique_words), 1)[0]
+                list_unique_words.remove(source_word)
+                antonym_word = self.get_antonym(source_word)
+                if antonym_word in list_unique_words:
+                    list_unique_words.remove(antonym_word)
+                if antonym_word:
+                    return source_word, antonym_word
+            # Fallback: use nltk_words
+            while True:
+                source_word = random.choice(nltk_words)
+                antonym_word = self.get_antonym(source_word)
+                if antonym_word:
+                    return source_word, antonym_word
+        for _ in range(num_question):
+            question_word, correct_answer = get_question_and_answer()
+            choices = [correct_answer]
+            distractor_set = set()
+            while len(choices) < num_ans_per_question:
+                distractor_word = random.choice(nltk_words)
+                if (distractor_word.lower() != correct_answer.lower() and
+                    distractor_word.lower() != question_word.lower() and
+                    distractor_word.lower() not in distractor_set):
+                    distractor_set.add(distractor_word)
+                    choices.append(distractor_word)
+            random.shuffle(choices)
+            result.append({
+                "question": question_word,
+                "type": QuestionTypeEnum.ANTONYM,
+                "choices": choices,
+                "answer": choices.index(correct_answer),
+                "explain": [],
+            })
+        return result
+    def get_antonym(self, word: str):
+        """
+        Retrieves a random antonym for the given word using dictionary API data.
+        It checks both the 'meanings.antonyms' and 'meanings.definitions.antonyms' fields.
+        Args:
+            word (str): The input word to find an antonym for.
+        Returns:
+            str or None: An antonym if found, else None.
+        """
+        data = self.fetch_word_data(word)
+        if not data:
+            return None
+        meanings = data.get("meanings", [])
+        # Randomly search for antonyms in the meaning entries
+        while meanings:
+            meaning = random.sample(meanings, 1)[0]
+            # Try top-level antonyms
+            antonyms = meaning.get("antonyms", [])
+            # Also check antonyms inside definitions
+            if not antonyms:
+                definitions = meaning.get("definitions", [])
+                for definition in definitions:
+                    antonyms.extend(definition.get("antonyms", []))
+            if antonyms:
+                return random.choice(antonyms)
+            meanings.remove(meaning)
+        return None

src/factories/gen_question/base.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
-from typing import Set
 import nltk
 nltk.download('words')
@@ -10,7 +11,7 @@ nltk_words = words.words()
 class Question(ABC):
     @abstractmethod
-    def generate_questions(self, words: Set[str], num_questions: int = 1, num_ans_per_question: int = 4):
         pass
     @staticmethod
@@ -19,4 +20,18 @@ class Question(ABC):
             num_questions: int = 1,
             num_ans_per_question: int = 4
     ) -> int:
-        return min(len_list_words//num_questions, num_ans_per_question)

 from abc import ABC, abstractmethod
+from typing import Set, Optional
+import requests
 import nltk
 nltk.download('words')
 class Question(ABC):
     @abstractmethod
+    def generate_questions(self, list_words: Set[str], num_questions: int = 1, num_ans_per_question: int = 4):
         pass
     @staticmethod
             num_questions: int = 1,
             num_ans_per_question: int = 4
     ) -> int:
+        return min(len_list_words//num_questions, num_ans_per_question)
+    @staticmethod
+    def fetch_word_data(word: str) -> Optional[dict]:
+        """API get data of word"""
+        try:
+            base_url = "https://api.dictionaryapi.dev/api/v2/entries/en/"
+            resp = requests.get(base_url + word)
+            if resp.status_code == 200:
+                data = resp.json()
+                return data[0]
+            else:
+                return None
+        except Exception as e:
+            return None

src/factories/gen_question/incorrect_word_question.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from typing import List
+import random
+from src.enum.question import QuestionTypeEnum
+from src.factories.gen_question.base import Question, nltk_words
+from src.services.AI.sentence_generator import SentenceGeneratorModel
+from src.utils.word import transform_word
+class IncorrectWordQuestion(Question):
+    """
+    This class generates multiple-choice questions that ask the user
+    to find the incorrect word in a sentence.
+    It selects a word from the list, generates a sentence using a simple pattern,
+    and injects a grammatically incorrect word into the sentence.
+    """
+    def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
+        if list_words is None:
+            list_words = []
+        result = []
+        list_unique_words = set(list_words)
+        # sentence_generator = SentenceGeneratorModel()
+        def choice_word_to_gen_sentence():
+            number_choice_word = random.randint(1, 4)
+            available_words = list(list_unique_words)
+            if number_choice_word <= len(available_words):
+                choice_word = random.sample(available_words, number_choice_word)
+                for w in choice_word:
+                    list_unique_words.remove(w)
+            else:
+                # Lấy tất cả từ còn lại và thêm từ nltk_words
+                choice_word = available_words.copy()
+                remaining = number_choice_word - len(choice_word)
+                additional_words = random.sample(nltk_words, remaining)
+                choice_word += additional_words
+                list_unique_words.clear()
+            return choice_word
+        for _ in range(num_question):
+            list_choice_word = choice_word_to_gen_sentence()
+            # 1. Generate a simple sentence using a template
+            # sentence = sentence_generator.generate_sentence_from_words(list_choice_word, )
+            sentence = "The Conservatives and the Liberal Democrats are demanding the Labour government publish the evidence it submitted in the now-collapsed case against two people accused of spying for China."
+            # 2. Randomly choose a word to make incorrect in sequence
+            sentence_words = sentence.strip(".").split()
+            correct_word = random.sample(list(set(sentence_words)), 1)[0]
+            sentence_words.remove(correct_word)
+            # 3. Replace it with a grammatically incorrect word
+            incorrect_word = transform_word(correct_word)
+            modified_sentence = sentence.replace(correct_word, incorrect_word, 1)
+            # 4. Create choices (including incorrect_word and distractors)
+            choices = random.sample(list(set(sentence_words)), num_ans_per_question -1) + incorrect_word
+            random.shuffle(choices)
+            result.append({
+                "question": modified_sentence,
+                "type": QuestionTypeEnum.INCORRECT_WORD,
+                "choices": choices,
+                "answer": choices.index(incorrect_word),
+                "explain": ["Correct: {sequence}"],
+            })
+        return result

src/factories/gen_question/pronunciation_question.py CHANGED Viewed

@@ -1,141 +1,141 @@
-import random
-from typing import Set, List
-from src.factories.gen_question.base import Question
-from src.enum.question import QuestionTypeEnum
-from collections import defaultdict
-import pronouncing
-class PronunciationQuestion(Question):
-    def generate_questions(self, list_words: List[str], num_questions: int = 1, num_ans_per_question: int = 4) :
-        result = []
-        list_unique_words = set(list_words)
-        num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_questions, num_ans_per_question)
-        for _ in range(num_questions) :
-            main_word = None
-            main_segment = None
-            main_pron = None
-            while main_word is None and len(list_words) > 0:
-                main_word = random.choice(list_words)
-                main_segment = self.extract_main_segment(main_word)
-                main_pron, segment_pron = self.get_pronunciation_of_word_and_segment(main_word, main_segment)
-                if main_pron is None or segment_pron is None :
-                    main_word = None
-                list_words.remove(main_word)
-            question = main_segment
-            choices = [main_word]
-            explain = [f'{main_word} : {main_pron}']
-            similar_pron_words = []
-            different_pron_word = None
-            # random main_char trong main_word de lam tu so sanh phien am
-            # tim trong list_words co tu nao chua main_char sao cho phiên âm của các từ được tìm thấy là 1 từ có phiên âm khác, còn các từ còn lại có phiên âm giống nhau
-            # xoa cac tu duoc chon trong list_words
-            result.append({
-                "question": "",
-                "type": QuestionTypeEnum.PRONUNCIATION,
-                "choices": choices,
-                "answer": `index_of_choice`,
-                "explain":
-            })
-    def get_pronunciation_of_word_and_segment(self, word: str, segment: str):
-        word_pron = None
-        word_segment = None
-        try:
-            p = pronouncing.phones_for_word(word)
-            if not p:
-                return None, None
-            word_pron = p[0]
-        except Exception:
-            return None, None
-    def extract_main_segment(self, word: str) -> str:
-        """
-        Extracts a random phonetic segment (vowel, consonant, consonant cluster, diphthong, or common ending)
-        from the word, excluding the last segment unless it's the only option.
-        For example, 'pronunciation' can be segmented as:
-        - Individual: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 't']
-        - Grouped: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 'tion']
-        For example, 'phone' can be segmented as:
-        - Grouped: ['ph', 'o', 'n', 'e']
-        """
-        if not word or len(word) <= 2:
-            return word
-        word = word.lower()
-        # Define phonetic components
-        vowels = set('aeiou')
-        consonant_clusters = ['th', 'ph', 'sh', 'ch', 'wh', 'gh', 'sch', 'tr', 'sh', 's', 't', 'p']
-        diphthongs = ['ai', 'au', 'ei', 'eu', 'oi', 'ou', 'ui', 'ie', 'io', 'ea', 'ee', 'oa', 'oe']
-        common_endings = ['tion', 'sion', 'ing', 'ed', 'es']
-        # Step 1: Segment the word
-        segments = []
-        i = 0
-        while i < len(word):
-            # Check for common endings (e.g., 'tion')
-            matched_ending = False
-            for ending in common_endings:
-                if word[i:].startswith(ending) and i + len(ending) <= len(word):
-                    segments.append(ending)
-                    i += len(ending)
-                    matched_ending = True
-                    break
-            if matched_ending:
-                continue
-            # Check for consonant clusters (e.g., 'th', 'ph', 'sch')
-            matched_cluster = False
-            for cluster in consonant_clusters:
-                if word[i:].startswith(cluster) and i + len(cluster) <= len(word):
-                    segments.append(cluster)
-                    i += len(cluster)
-                    matched_cluster = True
-                    break
-            if matched_cluster:
-                continue
-            # Check for diphthongs (e.g., 'io')
-            matched_diphthong = False
-            for diph in diphthongs:
-                if word[i:].startswith(diph) and i + len(diph) <= len(word):
-                    segments.append(diph)
-                    i += len(diph)
-                    matched_diphthong = True
-                    break
-            if matched_diphthong:
-                continue
-            # Add single character (vowel or consonant)
-            if word[i].isalpha():
-                segments.append(word[i])
-            i += 1
-        # Step 2: Filter valid segments (exclude the last segment if possible)
-        valid_segments = segments[:-1] if len(segments) > 1 else segments
-        # Step 3: If no valid segments, fall back to single characters
-        if not valid_segments:
-            valid_positions = [i for i in range(len(word) - 1) if word[i].isalpha()]
-            if not valid_positions:
-                return ''
-            pos = random.choice(valid_positions)
-            return word[pos]
-        # Step 4: Randomly choose a segment
-        return random.choice(valid_segments)
-    def cal_num_word_in_list_available_per_question(self, len_list_words: int, num_questions: int = 1, num_ans_per_question: int = 4) -> int:
-        num_word_in_list_available_per_question = len_list_words // num_questions
-        return num_word_in_list_available_per_question if num_word_in_list_available_per_question < num_ans_per_question else num_ans_per_question

+# import random
+# from typing import Set, List
+# from src.factories.gen_question.base import Question
+# from src.enum.question import QuestionTypeEnum
+# from collections import defaultdict
+# import pronouncing
+#
+#
+# class PronunciationQuestion(Question):
+#     def generate_questions(self, list_words: List[str], num_questions: int = 1, num_ans_per_question: int = 4) :
+#         result = []
+#         list_unique_words = set(list_words)
+#
+#         num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_questions, num_ans_per_question)
+#
+#         for _ in range(num_questions) :
+#             main_word = None
+#             main_segment = None
+#             main_pron = None
+#             while main_word is None and len(list_words) > 0:
+#                 main_word = random.choice(list_words)
+#                 main_segment = self.extract_main_segment(main_word)
+#                 main_pron, segment_pron = self.get_pronunciation_of_word_and_segment(main_word, main_segment)
+#                 if main_pron is None or segment_pron is None :
+#                     main_word = None
+#                 list_words.remove(main_word)
+#
+#             question = main_segment
+#             choices = [main_word]
+#             explain = [f'{main_word} : {main_pron}']
+#             similar_pron_words = []
+#             different_pron_word = None
+#
+#             # random main_char trong main_word de lam tu so sanh phien am
+#             # tim trong list_words co tu nao chua main_char sao cho phiên âm của các từ được tìm thấy là 1 từ có phiên âm khác, còn các từ còn lại có phiên âm giống nhau
+#             # xoa cac tu duoc chon trong list_words
+#
+#             result.append({
+#                 "question": "",
+#                 "type": QuestionTypeEnum.PRONUNCIATION,
+#                 "choices": choices,
+#                 "answer": `index_of_choice`,
+#                 "explain":
+#             })
+#
+#     def get_pronunciation_of_word_and_segment(self, word: str, segment: str):
+#         word_pron = None
+#         word_segment = None
+#         try:
+#             p = pronouncing.phones_for_word(word)
+#             if not p:
+#                 return None, None
+#             word_pron = p[0]
+#         except Exception:
+#             return None, None
+#
+#
+#
+#
+#
+#
+#     def extract_main_segment(self, word: str) -> str:
+#         """
+#         Extracts a random phonetic segment (vowel, consonant, consonant cluster, diphthong, or common ending)
+#         from the word, excluding the last segment unless it's the only option.
+#         For example, 'pronunciation' can be segmented as:
+#         - Individual: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 't']
+#         - Grouped: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 'tion']
+#         For example, 'phone' can be segmented as:
+#         - Grouped: ['ph', 'o', 'n', 'e']
+#         """
+#         if not word or len(word) <= 2:
+#             return word
+#
+#         word = word.lower()
+#
+#         # Define phonetic components
+#         vowels = set('aeiou')
+#         consonant_clusters = ['th', 'ph', 'sh', 'ch', 'wh', 'gh', 'sch', 'tr', 'sh', 's', 't', 'p']
+#         diphthongs = ['ai', 'au', 'ei', 'eu', 'oi', 'ou', 'ui', 'ie', 'io', 'ea', 'ee', 'oa', 'oe']
+#         common_endings = ['tion', 'sion', 'ing', 'ed', 'es']
+#
+#         # Step 1: Segment the word
+#         segments = []
+#         i = 0
+#         while i < len(word):
+#             # Check for common endings (e.g., 'tion')
+#             matched_ending = False
+#             for ending in common_endings:
+#                 if word[i:].startswith(ending) and i + len(ending) <= len(word):
+#                     segments.append(ending)
+#                     i += len(ending)
+#                     matched_ending = True
+#                     break
+#             if matched_ending:
+#                 continue
+#
+#             # Check for consonant clusters (e.g., 'th', 'ph', 'sch')
+#             matched_cluster = False
+#             for cluster in consonant_clusters:
+#                 if word[i:].startswith(cluster) and i + len(cluster) <= len(word):
+#                     segments.append(cluster)
+#                     i += len(cluster)
+#                     matched_cluster = True
+#                     break
+#             if matched_cluster:
+#                 continue
+#
+#             # Check for diphthongs (e.g., 'io')
+#             matched_diphthong = False
+#             for diph in diphthongs:
+#                 if word[i:].startswith(diph) and i + len(diph) <= len(word):
+#                     segments.append(diph)
+#                     i += len(diph)
+#                     matched_diphthong = True
+#                     break
+#             if matched_diphthong:
+#                 continue
+#
+#             # Add single character (vowel or consonant)
+#             if word[i].isalpha():
+#                 segments.append(word[i])
+#             i += 1
+#
+#         # Step 2: Filter valid segments (exclude the last segment if possible)
+#         valid_segments = segments[:-1] if len(segments) > 1 else segments
+#
+#         # Step 3: If no valid segments, fall back to single characters
+#         if not valid_segments:
+#             valid_positions = [i for i in range(len(word) - 1) if word[i].isalpha()]
+#             if not valid_positions:
+#                 return ''
+#             pos = random.choice(valid_positions)
+#             return word[pos]
+#
+#         # Step 4: Randomly choose a segment
+#         return random.choice(valid_segments)
+#
+#     def cal_num_word_in_list_available_per_question(self, len_list_words: int, num_questions: int = 1, num_ans_per_question: int = 4) -> int:
+#         num_word_in_list_available_per_question = len_list_words // num_questions
+#         return num_word_in_list_available_per_question if num_word_in_list_available_per_question < num_ans_per_question else num_ans_per_question

src/factories/gen_question/question.py CHANGED Viewed

@@ -1,15 +1,21 @@
 from src.enum.question import QuestionTypeEnum
 from src.utils.exceptions import BadRequestException
-def get_question_type(question_type: QuestionTypeEnum) :
-    if type == QuestionTypeEnum.SYLLABLE :
-        return
-    elif type == QuestionTypeEnum.STRESS :
-        return
-    elif type == QuestionTypeEnum.SYNONYM :
-        return
-    elif type == QuestionTypeEnum.ANTONYM :
-        return
     else:
         raise BadRequestException('type_invalid')

 from src.enum.question import QuestionTypeEnum
+from src.factories.gen_question.antonym_question import AntonymsQuestion
+from src.factories.gen_question.incorrect_word_question import IncorrectWordQuestion
+from src.factories.gen_question.stress_question import StressQuestion
+from src.factories.gen_question.synonym_question import SynonymsQuestion
 from src.utils.exceptions import BadRequestException
+def create_question_instance(question_type: QuestionTypeEnum) :
+    if question_type == QuestionTypeEnum.PRONUNCIATION :
+        return StressQuestion()
+    elif question_type == QuestionTypeEnum.STRESS :
+        return StressQuestion()
+    elif question_type == QuestionTypeEnum.SYNONYM :
+        return SynonymsQuestion()
+    elif question_type == QuestionTypeEnum.ANTONYM :
+        return AntonymsQuestion()
+    elif question_type == QuestionTypeEnum.INCORRECT_WORD:
+        return IncorrectWordQuestion()
     else:
         raise BadRequestException('type_invalid')

src/factories/gen_question/stress_question.py CHANGED Viewed

@@ -1,18 +1,23 @@
-import random
-from collections import defaultdict
 from typing import List
 from src.factories.gen_question.base import Question, nltk_words
 from src.enum.question import QuestionTypeEnum
 from src.utils.number import rand_exclude
 from src.utils.word import get_stress_pattern, convert_word_to_ipa
 class StressQuestion(Question):
-    def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
         result = []
-        # process data:
         stress_groups = defaultdict(list)
         for word in list_words:
             stress = get_stress_pattern(word)
@@ -20,75 +25,85 @@ class StressQuestion(Question):
             if ipa is None or stress is None:
                 continue
             stress_groups[stress].append({"word": word, "ipa": ipa})
-        num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
         # create
-        def choice_random_words_in_stress_group(stress_group_key):
             stress_group = stress_groups[stress_group_key]
             item = random.choice(stress_group)
-            stress_group.remove(item)
             return item["word"], item["ipa"]
-        for _ in range(num_question): # type: ignore
             choices = []
             explain = []
             list_stress_group_keys = list(stress_groups.keys())
-            # get different stress
-            if len(list_stress_group_keys) != 0:
                 different_stress = random.choice(list_stress_group_keys)
                 list_stress_group_keys.remove(different_stress)
-                different_word, different_ipa = choice_random_words_in_stress_group(different_stress)
             else:
                 different_stress = random.randint(1, 3)
-                different_word, different_ipa = self.get_random_word_and_ipa_by_stress(different_stress)
             choices.append(different_word)
-            explain.append(f'{different_word} ({different_ipa})')
-            # get common stress
-            if len(list_stress_group_keys) != 0:
-                # if in list word exist more two stresses, get choice in list word
                 common_stress = random.choice(list_stress_group_keys)
-                list_stress_group_keys.remove(common_stress)
-                # number of choice must be lesster number of list word slipt number of question and in stress group must exist item
-                while len(choices) < num_word_in_list_per_question and len(stress_groups[common_stress]) > 0:
-                    common_word, common_ipa = choice_random_words_in_stress_group(common_stress)
                     choices.append(common_word)
-                    explain.append(f'{common_word} ({common_ipa})')
             else:
                 common_stress = rand_exclude(1, 3, different_stress)
-            # maybe after get choice in list word, number of choice is not enough, so get choice in local data
             while len(choices) < num_ans_per_question:
-                common_word, common_word_ipa = self.get_random_word_and_ipa_by_stress(common_stress)
                 choices.append(common_word)
-                explain.append(f'{common_word} ({common_word_ipa})')
-                continue
-            random.shuffle(choices)
-            result.append({
-                "question": "",
-                "type": QuestionTypeEnum.STRESS,
-                "choices": choices,
-                "answer": choices.index(different_word),
-                "explain": explain,
-            })
         return result
-    def get_random_word_and_ipa_by_stress(self, stress: int):
-        while True:
             word = random.choice(nltk_words)
             word_ipa = convert_word_to_ipa(word)
             word_stress = get_stress_pattern(word)
             if word_ipa is None or word_stress is None:
-                nltk_words.remove(word)
                 continue
             if word_stress == stress:
                 return word, word_ipa

 from typing import List
+from collections import defaultdict
+import random
 from src.factories.gen_question.base import Question, nltk_words
 from src.enum.question import QuestionTypeEnum
 from src.utils.number import rand_exclude
 from src.utils.word import get_stress_pattern, convert_word_to_ipa
 class StressQuestion(Question):
+    def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
+        if list_words is None:
+            list_words = []
         result = []
+        # Process data: group words by stress pattern
+        num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
         stress_groups = defaultdict(list)
         for word in list_words:
             stress = get_stress_pattern(word)
             if ipa is None or stress is None:
                 continue
             stress_groups[stress].append({"word": word, "ipa": ipa})
         # create
+        def choice_random_words_in_stress_group(stress_group_key: int):
             stress_group = stress_groups[stress_group_key]
             item = random.choice(stress_group)
+            stress_group.remove(item)  # Remove to avoid reuse within the same question
             return item["word"], item["ipa"]
+        for _ in range(num_question):
             choices = []
             explain = []
             list_stress_group_keys = list(stress_groups.keys())
+            # Get word with different stress
+            if list_stress_group_keys:
                 different_stress = random.choice(list_stress_group_keys)
                 list_stress_group_keys.remove(different_stress)
+                different_word_ipa = choice_random_words_in_stress_group(different_stress)
+                different_word, different_ipa = different_word_ipa
             else:
                 different_stress = random.randint(1, 3)
+                different_word_ipa = self.get_random_word_and_ipa_by_stress(different_stress)
+                if different_word_ipa is None:
+                    continue  # Skip this question if no valid word is found
+                different_word, different_ipa = different_word_ipa
             choices.append(different_word)
+            explain.append(f'{different_word} ({different_ipa}, stress pattern: {different_stress})')
+            # Get words with common stress
+            if list_stress_group_keys:
                 common_stress = random.choice(list_stress_group_keys)
+                while len(choices) < num_word_in_list_per_question and stress_groups[common_stress]:
+                    common_word_ipa = choice_random_words_in_stress_group(common_stress)
+                    common_word, common_ipa = common_word_ipa
                     choices.append(common_word)
+                    explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
             else:
                 common_stress = rand_exclude(1, 3, different_stress)
+            # Fill remaining choices from nltk_words if needed
             while len(choices) < num_ans_per_question:
+                common_word_ipa = self.get_random_word_and_ipa_by_stress(common_stress)
+                if common_word_ipa is None:
+                    break  # Skip adding if no valid word is found
+                common_word, common_ipa = common_word_ipa
                 choices.append(common_word)
+                explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
+            # Only add the question if we have enough choices
+            print(choices, len(choices))
+            if len(choices) == num_ans_per_question:
+                random.shuffle(choices)
+                result.append({
+                    "question": "",
+                    "type": QuestionTypeEnum.STRESS,
+                    "choices": choices,
+                    "answer": choices.index(different_word),
+                    "explain": explain,
+                })
+        print(result)
         return result
+    @staticmethod
+    def get_random_word_and_ipa_by_stress(stress: int):
+        max_attempts = 10000
+        attempts = 0
+        while attempts < max_attempts:
+            if not nltk_words:
+                return None
             word = random.choice(nltk_words)
             word_ipa = convert_word_to_ipa(word)
             word_stress = get_stress_pattern(word)
             if word_ipa is None or word_stress is None:
+                attempts += 1
                 continue
             if word_stress == stress:
                 return word, word_ipa
+            attempts += 1
+        return None  # Return None if no word is found

src/factories/gen_question/synonym_question.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import List
+import random
+from src.factories.gen_question.base import Question, nltk_words
+from src.enum.question import QuestionTypeEnum
+class SynonymsQuestion(Question):
+    """
+    This class generates multiple-choice questions that ask the user
+    to select a synonym for a given word.
+    It uses dictionary data (from fetch_word_data) to retrieve
+    meanings and synonyms. If the input list is empty or invalid,
+    it falls back to randomly chosen words from a built-in word list (nltk_words).
+    """
+    def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
+        if list_words is None:
+            list_words = []
+        result = []
+        list_unique_words = set(list_words)
+        # Internal helper function to get a valid question/answer pair
+        def get_question_and_answer():
+            """
+            Randomly selects a word and finds one of its synonyms.
+            Returns:
+                tuple(str, str): question_word, synonym_answer
+            """
+            # Try from provided list word
+            while list_unique_words:
+                source_word = random.sample(list(list_unique_words), 1)[0]
+                list_unique_words.remove(source_word)
+                synonym_word = self.get_synonym(source_word)
+                if synonym_word in list_unique_words:
+                    list_unique_words.remove(source_word)
+                if synonym_word:
+                    return source_word, synonym_word
+            # Fallback: use nltk_words
+            while True:
+                source_word = random.choice(nltk_words)
+                synonym_word = self.get_synonym(source_word)
+                if synonym_word:
+                    return source_word, synonym_word
+        for _ in range(num_question):
+            question_word, correct_answer = get_question_and_answer()
+            choices = [correct_answer]
+            distractor_set = set()
+            while len(choices) < num_ans_per_question:
+                distractor_word = random.choice(nltk_words)
+                if (distractor_word.lower() != correct_answer.lower() and
+                    distractor_word.lower() != question_word.lower() and
+                    distractor_word.lower() not in distractor_set):
+                    distractor_set.add(distractor_word)
+                    choices.append(distractor_word)
+            random.shuffle(choices)
+            result.append({
+                "question": question_word,
+                "type": QuestionTypeEnum.SYNONYM,
+                "choices": choices,
+                "answer": choices.index(correct_answer),
+                "explain": [],
+            })
+        return result
+    def get_synonym(self, word: str):
+        """
+        Retrieves a random synonym for the given word using dictionary API data.
+        It checks both the 'meanings.synonyms' and 'meanings.definitions.synonyms' fields.
+        Args:
+            word (str): The input word to find a synonym for.
+        Returns:
+            str or None: A synonym if found, else None.
+        """
+        data = self.fetch_word_data(word)
+        if not data:
+            return None
+        meanings = data.get("meanings", [])
+        # Randomly search for synonyms in the meaning entries
+        while meanings:
+            meaning = random.sample(meanings, 1)[0]
+            # Try top-level synonyms
+            synonyms = meaning.get("synonyms", [])
+            # Also check synonyms inside definitions
+            if not synonyms:
+                definitions = meaning.get("definitions", [])
+                for definition in definitions:
+                    synonyms.extend(definition.get("synonyms", []))
+            if synonyms:
+                return random.choice(synonyms)
+            meanings.remove(meaning)
+        return None

src/interfaces/question.py CHANGED Viewed

@@ -1,5 +1,8 @@
-from pydantic import BaseModel
-from typing import Optional
 class ModelInput(BaseModel):
     """General request model structure for flutter incoming req."""
@@ -9,4 +12,17 @@ class ModelInput(BaseModel):
 class ICQuestion(BaseModel):
     context: str
-    name: str

+from pydantic import BaseModel, Field, field_validator
+from typing import Optional, List
+from src.enum.question import QuestionTypeEnum
 class ModelInput(BaseModel):
     """General request model structure for flutter incoming req."""
 class ICQuestion(BaseModel):
     context: str
+    name: str
+class ICreateQuestion(BaseModel):
+    question_type: QuestionTypeEnum
+    list_words: List[str]
+    num_ans_per_question: int = Field(..., ge=2, le=10)
+    num_question: int = Field(..., ge=1, le=10)
+    @field_validator('list_words')
+    def check_single_word(cls, value):
+        for word in value:
+            if " " in word:
+                raise ValueError("list_words_just_includes_single_word")
+        return value

src/routers/public/quesion.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse
 from src.utils.response import res_ok
 from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
-from src.interfaces.question import ModelInput, ICQuestion
 from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
 from src.services.AI.question_generator import QuestionGenerator
 from src.services.AI.false_ans_generator import FalseAnswerGenerator
@@ -11,8 +12,20 @@ from src.services.AI.keyword_extractor import KeywordExtractor
 route = APIRouter(prefix="/question", tags=["Question"])
 print("Including question routes...")
 @route.post('/sentence')
-async def generate_questions_from_sentence(body: ICQuestion, request: Request):
     """Process user request
     Args:

 from fastapi import APIRouter, Request
 from fastapi.responses import JSONResponse
+from src.factories.gen_question.question import create_question_instance
 from src.utils.response import res_ok
 from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
+from src.interfaces.question import ModelInput, ICQuestion, ICreateQuestion
 from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
 from src.services.AI.question_generator import QuestionGenerator
 from src.services.AI.false_ans_generator import FalseAnswerGenerator
 route = APIRouter(prefix="/question", tags=["Question"])
 print("Including question routes...")
+@route.post('/')
+async def generate_question(body: ICreateQuestion):
+    question = create_question_instance(body.question_type)
+    list_questions = question.generate_questions(
+        list_words=body.list_words,
+        num_question=body.num_question,
+        num_ans_per_question=body.num_ans_per_question,
+    )
+    print(list_questions)
+    return JSONResponse(status_code=200, content=res_ok(list_questions))
 @route.post('/sentence')
+async def generate_questions_from_sentence(bßody: ICQuestion, request: Request):
     """Process user request
     Args:

src/services/AI/sentence_generator.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from .base import Model
+from typing import List
+import random
+class SentenceGeneratorModel(Model):
+    """
+    A wrapper around the base Model class to generate English sentences
+    that include given vocabulary words.
+    """
+    _instance = None
+    def __new__(cls, model_name: str = "google/flan-t5-base"):
+        if cls._instance is None:
+            cls._instance = super(SentenceGeneratorModel, cls).__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+    def __init__(self, model_name: str = "google/flan-t5-base"):
+        if self._initialized:
+            return
+        super().__init__(model_name)
+        self._initialized = True
+    def generate_sentence_from_words(
+        self,
+        vocab_list: List[str],
+        min_words: int = 2,
+        max_words: int = 5,
+        model_max_length: int = 64,
+        token_max_length: int = 64
+    ) -> str:
+        """
+        Generate a sentence that uses the given vocabulary words.
+        Args:
+            vocab_list (List[str]): The list of available vocabulary words.
+            min_words (int): Minimum number of words to include in sentence.
+            max_words (int): Maximum number of words to include.
+            model_max_length (int): Max length of generated sentence.
+            token_max_length (int): Max length for tokenization.
+        Returns:
+            str: A generated sentence using selected words.
+        """
+        if not vocab_list:
+            raise ValueError("vocab_list cannot be empty.")
+        selected_words = random.sample(
+            vocab_list, k=min(len(vocab_list), random.randint(min_words, max_words))
+        )
+        prompt = f"Write an English sentence using the following words: {', '.join(selected_words)}."
+        sentence = self.inference(
+            model_max_length=model_max_length,
+            token_max_length=token_max_length,
+            task=prompt
+        )
+        return sentence

src/utils/word.py CHANGED Viewed

@@ -27,11 +27,184 @@ def get_stress_pattern(word):
             0 indicates no stress.
     """
     list_pattern = pronouncing.stresses_for_word(word)
     pattern = list_pattern[0]
     try:
-        index = pattern.index('1')
         return index
-    except ValueError
         return None

             0 indicates no stress.
     """
     list_pattern = pronouncing.stresses_for_word(word)
+    if list_pattern is None or list_pattern == []:
+        return None
     pattern = list_pattern[0]
+    if len(pattern) == 1:
+        return None
     try:
+        index = pattern.index('1') + 1
         return index
+    except ValueError:
+        return None
+import random
+from typing import Optional
+import nltk
+nltk.download('wordnet')
+from nltk.corpus import wordnet as wn
+def transform_word(word: str) -> Optional[str]:
+    """
+    Transform a word into another word by changing its type, tense, article-related form,
+    or meaning to create an incorrect answer for a 'find the wrong word' question.
+    Args:
+        word (str): The input word to transform.
+    Returns:
+        Optional[str]: The transformed word, or None if no transformation is possible.
+    """
+    # List of possible transformations
+    transformation_methods = [
+        transform_preposition,  # Handle prepositions
+        transform_word_type,  # Change word type (e.g., noun to verb)
+        transform_tense,  # Change verb tense
+        transform_article,  # Change article-related form
+        transform_meaning  # Change to a word with different meaning
+    ]
+    # Randomly select a transformation method
+    random.shuffle(transformation_methods)
+    for method in transformation_methods:
+        transformed = method(word)
+        if transformed and transformed != word:
+            return transformed
+    # Fallback: return a random word from nltk_words if no transformation works
+    try:
+        from src.factories.gen_question.base import nltk_words
+        return random.choice(nltk_words) if nltk_words else None
+    except ImportError:
+        return None
+def transform_preposition(word: str) -> Optional[str]:
+    """
+    Transform a preposition into another preposition that is likely to be incorrect in context.
+    Args:
+        word (str): The input word to check and transform.
+    Returns:
+        Optional[str]: A different preposition, or None if the input is not a preposition.
+    """
+    # Common prepositions and their common incorrect substitutions
+    preposition_map = {
+        'in': ['on', 'at', 'to'],
+        'on': ['in', 'at', 'over'],
+        'at': ['in', 'on', 'by'],
+        'to': ['in', 'at', 'for'],
+        'for': ['to', 'with', 'in'],
+        'with': ['for', 'by', 'in'],
+        'by': ['with', 'at', 'on'],
+        'from': ['to', 'in', 'at'],
+        'of': ['for', 'in', 'on']
+    }
+    word_lower = word.lower()
+    if word_lower in preposition_map:
+        return random.sample(preposition_map[word_lower], 1)[0]
+    return None
+def transform_word_type(word: str) -> Optional[str]:
+    """
+    Transform a word by changing its part of speech (e.g., noun to verb).
+    Uses WordNet to find related words with different POS.
+    """
+    pos_map = {
+        'n': 'v',  # Noun to verb
+        'v': 'n',  # Verb to noun
+        'a': 'r',  # Adjective to adverb
+        'r': 'a'  # Adverb to adjective
+    }
+    # Get part of speech for the word
+    synsets = wn.synsets(word)
+    if not synsets:
+        return None
+    current_pos = synsets[0].pos()  # Get the first synset's POS
+    target_pos = pos_map.get(current_pos)
+    if not target_pos:
+        return None
+    # Find a synset with the target POS
+    for synset in wn.synsets(word):
+        if synset.pos() == target_pos:
+            return synset.lemmas()[0].name().replace('_', ' ')
+    return None
+def transform_tense(word: str) -> Optional[str]:
+    """
+    Transform a verb by changing its tense (e.g., present to past).
+    Uses simple rules for common verb forms.
+    """
+    # Simple past tense rules for regular verbs
+    if word.endswith('e'):
+        return word + 'd'  # e.g., love -> loved
+    elif word.endswith('y') and word[-2] not in 'aeiou':
+        return word[:-1] + 'ied'  # e.g., study -> studied
+    elif word[-1] not in 'aeiou' and word[-2] not in 'aeiou':
+        return word + 'ed'  # e.g., walk -> walked
+    else:
+        # Irregular verbs (small hardcoded list for simplicity)
+        irregular = {
+            'run': 'ran',
+            'go': 'went',
+            'see': 'saw',
+            'write': 'wrote',
+            'is': 'was',
+            'are': 'were'
+        }
+        return irregular.get(word, None)
+def transform_article(word: str) -> Optional[str]:
+    """
+    Transform a word related to articles (e.g., 'a' to 'an' or remove article).
+    For nouns, return a different noun that might cause article-related errors.
+    """
+    if word.lower() in ['a', 'an']:
+        return 'an' if word.lower() == 'a' else 'a'
+    # For nouns, find another noun that might cause article confusion
+    synsets = wn.synsets(word, pos='n')
+    if not synsets:
         return None
+    # Pick a random synonym or related noun
+    synonyms = []
+    for synset in synsets:
+        for lemma in synset.lemmas():
+            synonym = lemma.name().replace('_', ' ')
+            if synonym != word:
+                synonyms.append(synonym)
+    return random.sample(synonyms, 1)[0] if synonyms else None
+def transform_meaning(word: str) -> Optional[str]:
+    """
+    Transform a word to another with a different meaning (e.g., homophone or unrelated word).
+    """
+    # Find a word with different meaning but the same POS
+    synsets = wn.synsets(word)
+    if not synsets:
+        return None
+    current_pos = synsets[0].pos()
+    # Get all words with the same POS but different synsets
+    different_words = []
+    for synset in wn.all_synsets(pos=current_pos):
+        for lemma in synset.lemmas():
+            candidate = lemma.name().replace('_', ' ')
+            if candidate != word and candidate not in different_words:
+                different_words.append(candidate)
+    return random.sample(different_words, 1)[0] if different_words else None