PhamHelga commited on
Commit
342fe87
·
1 Parent(s): cdfdd8d

gen question type incorrect word, antonym, synonym, stress

Browse files
src/enum/question.py CHANGED
@@ -5,3 +5,4 @@ class QuestionTypeEnum(str, Enum):
5
  STRESS = "stress" # trong am
6
  SYNONYM = "synonym" # tu dong nghia
7
  ANTONYM = "antonym" # tu trai nghia
 
 
5
  STRESS = "stress" # trong am
6
  SYNONYM = "synonym" # tu dong nghia
7
  ANTONYM = "antonym" # tu trai nghia
8
+ INCORRECT_WORD = "incorrect_word"
src/factories/gen_question/antonym_question.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import random
3
+
4
+ from src.factories.gen_question.base import Question, nltk_words
5
+ from src.enum.question import QuestionTypeEnum
6
+
7
+
8
+ class AntonymsQuestion(Question):
9
+ """
10
+ This class generates multiple-choice questions that ask the user
11
+ to select an antonym for a given word.
12
+
13
+ It uses dictionary data (from fetch_word_data) to retrieve
14
+ meanings and antonyms. If the input list is empty or invalid,
15
+ it falls back to randomly chosen words from a built-in word list (nltk_words).
16
+ """
17
+
18
+ def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
19
+ if list_words is None:
20
+ list_words = []
21
+
22
+ result = []
23
+ list_unique_words = set(list_words)
24
+
25
+ # Internal helper function to get a valid question/answer pair
26
+ def get_question_and_answer():
27
+ """
28
+ Randomly selects a word and finds one of its antonyms.
29
+
30
+ Returns:
31
+ tuple(str, str): question_word, antonym_answer
32
+ """
33
+ # Try from provided list word
34
+ while list_unique_words:
35
+ source_word = random.sample(list(list_unique_words), 1)[0]
36
+ list_unique_words.remove(source_word)
37
+ antonym_word = self.get_antonym(source_word)
38
+ if antonym_word in list_unique_words:
39
+ list_unique_words.remove(antonym_word)
40
+ if antonym_word:
41
+ return source_word, antonym_word
42
+
43
+ # Fallback: use nltk_words
44
+ while True:
45
+ source_word = random.choice(nltk_words)
46
+ antonym_word = self.get_antonym(source_word)
47
+ if antonym_word:
48
+ return source_word, antonym_word
49
+
50
+ for _ in range(num_question):
51
+ question_word, correct_answer = get_question_and_answer()
52
+
53
+ choices = [correct_answer]
54
+ distractor_set = set()
55
+
56
+ while len(choices) < num_ans_per_question:
57
+ distractor_word = random.choice(nltk_words)
58
+
59
+ if (distractor_word.lower() != correct_answer.lower() and
60
+ distractor_word.lower() != question_word.lower() and
61
+ distractor_word.lower() not in distractor_set):
62
+ distractor_set.add(distractor_word)
63
+ choices.append(distractor_word)
64
+
65
+ random.shuffle(choices)
66
+
67
+ result.append({
68
+ "question": question_word,
69
+ "type": QuestionTypeEnum.ANTONYM,
70
+ "choices": choices,
71
+ "answer": choices.index(correct_answer),
72
+ "explain": [],
73
+ })
74
+
75
+ return result
76
+
77
+ def get_antonym(self, word: str):
78
+ """
79
+ Retrieves a random antonym for the given word using dictionary API data.
80
+
81
+ It checks both the 'meanings.antonyms' and 'meanings.definitions.antonyms' fields.
82
+
83
+ Args:
84
+ word (str): The input word to find an antonym for.
85
+
86
+ Returns:
87
+ str or None: An antonym if found, else None.
88
+ """
89
+ data = self.fetch_word_data(word)
90
+ if not data:
91
+ return None
92
+
93
+ meanings = data.get("meanings", [])
94
+
95
+ # Randomly search for antonyms in the meaning entries
96
+ while meanings:
97
+ meaning = random.sample(meanings, 1)[0]
98
+
99
+ # Try top-level antonyms
100
+ antonyms = meaning.get("antonyms", [])
101
+
102
+ # Also check antonyms inside definitions
103
+ if not antonyms:
104
+ definitions = meaning.get("definitions", [])
105
+ for definition in definitions:
106
+ antonyms.extend(definition.get("antonyms", []))
107
+
108
+ if antonyms:
109
+ return random.choice(antonyms)
110
+
111
+ meanings.remove(meaning)
112
+
113
+ return None
src/factories/gen_question/base.py CHANGED
@@ -1,6 +1,7 @@
1
  from abc import ABC, abstractmethod
2
- from typing import Set
3
 
 
4
  import nltk
5
 
6
  nltk.download('words')
@@ -10,7 +11,7 @@ nltk_words = words.words()
10
 
11
  class Question(ABC):
12
  @abstractmethod
13
- def generate_questions(self, words: Set[str], num_questions: int = 1, num_ans_per_question: int = 4):
14
  pass
15
 
16
  @staticmethod
@@ -19,4 +20,18 @@ class Question(ABC):
19
  num_questions: int = 1,
20
  num_ans_per_question: int = 4
21
  ) -> int:
22
- return min(len_list_words//num_questions, num_ans_per_question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from abc import ABC, abstractmethod
2
+ from typing import Set, Optional
3
 
4
+ import requests
5
  import nltk
6
 
7
  nltk.download('words')
 
11
 
12
  class Question(ABC):
13
  @abstractmethod
14
+ def generate_questions(self, list_words: Set[str], num_questions: int = 1, num_ans_per_question: int = 4):
15
  pass
16
 
17
  @staticmethod
 
20
  num_questions: int = 1,
21
  num_ans_per_question: int = 4
22
  ) -> int:
23
+ return min(len_list_words//num_questions, num_ans_per_question)
24
+
25
+ @staticmethod
26
+ def fetch_word_data(word: str) -> Optional[dict]:
27
+ """API get data of word"""
28
+ try:
29
+ base_url = "https://api.dictionaryapi.dev/api/v2/entries/en/"
30
+ resp = requests.get(base_url + word)
31
+ if resp.status_code == 200:
32
+ data = resp.json()
33
+ return data[0]
34
+ else:
35
+ return None
36
+ except Exception as e:
37
+ return None
src/factories/gen_question/incorrect_word_question.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import random
3
+
4
+ from src.enum.question import QuestionTypeEnum
5
+ from src.factories.gen_question.base import Question, nltk_words
6
+ from src.services.AI.sentence_generator import SentenceGeneratorModel
7
+ from src.utils.word import transform_word
8
+
9
+
10
+ class IncorrectWordQuestion(Question):
11
+ """
12
+ This class generates multiple-choice questions that ask the user
13
+ to find the incorrect word in a sentence.
14
+
15
+ It selects a word from the list, generates a sentence using a simple pattern,
16
+ and injects a grammatically incorrect word into the sentence.
17
+ """
18
+
19
+ def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
20
+ if list_words is None:
21
+ list_words = []
22
+
23
+ result = []
24
+ list_unique_words = set(list_words)
25
+
26
+ # sentence_generator = SentenceGeneratorModel()
27
+
28
+ def choice_word_to_gen_sentence():
29
+ number_choice_word = random.randint(1, 4)
30
+
31
+ available_words = list(list_unique_words)
32
+ if number_choice_word <= len(available_words):
33
+ choice_word = random.sample(available_words, number_choice_word)
34
+ for w in choice_word:
35
+ list_unique_words.remove(w)
36
+ else:
37
+ # Lấy tất cả từ còn lại và thêm từ nltk_words
38
+ choice_word = available_words.copy()
39
+ remaining = number_choice_word - len(choice_word)
40
+ additional_words = random.sample(nltk_words, remaining)
41
+ choice_word += additional_words
42
+ list_unique_words.clear()
43
+
44
+ return choice_word
45
+
46
+ for _ in range(num_question):
47
+ list_choice_word = choice_word_to_gen_sentence()
48
+
49
+ # 1. Generate a simple sentence using a template
50
+ # sentence = sentence_generator.generate_sentence_from_words(list_choice_word, )
51
+ sentence = "The Conservatives and the Liberal Democrats are demanding the Labour government publish the evidence it submitted in the now-collapsed case against two people accused of spying for China."
52
+ # 2. Randomly choose a word to make incorrect in sequence
53
+ sentence_words = sentence.strip(".").split()
54
+ correct_word = random.sample(list(set(sentence_words)), 1)[0]
55
+ sentence_words.remove(correct_word)
56
+
57
+ # 3. Replace it with a grammatically incorrect word
58
+ incorrect_word = transform_word(correct_word)
59
+ modified_sentence = sentence.replace(correct_word, incorrect_word, 1)
60
+
61
+ # 4. Create choices (including incorrect_word and distractors)
62
+ choices = random.sample(list(set(sentence_words)), num_ans_per_question -1) + incorrect_word
63
+
64
+ random.shuffle(choices)
65
+ result.append({
66
+ "question": modified_sentence,
67
+ "type": QuestionTypeEnum.INCORRECT_WORD,
68
+ "choices": choices,
69
+ "answer": choices.index(incorrect_word),
70
+ "explain": ["Correct: {sequence}"],
71
+ })
72
+
73
+ return result
src/factories/gen_question/pronunciation_question.py CHANGED
@@ -1,141 +1,141 @@
1
- import random
2
- from typing import Set, List
3
- from src.factories.gen_question.base import Question
4
- from src.enum.question import QuestionTypeEnum
5
- from collections import defaultdict
6
- import pronouncing
7
-
8
-
9
- class PronunciationQuestion(Question):
10
- def generate_questions(self, list_words: List[str], num_questions: int = 1, num_ans_per_question: int = 4) :
11
- result = []
12
- list_unique_words = set(list_words)
13
-
14
- num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_questions, num_ans_per_question)
15
-
16
- for _ in range(num_questions) :
17
- main_word = None
18
- main_segment = None
19
- main_pron = None
20
- while main_word is None and len(list_words) > 0:
21
- main_word = random.choice(list_words)
22
- main_segment = self.extract_main_segment(main_word)
23
- main_pron, segment_pron = self.get_pronunciation_of_word_and_segment(main_word, main_segment)
24
- if main_pron is None or segment_pron is None :
25
- main_word = None
26
- list_words.remove(main_word)
27
-
28
- question = main_segment
29
- choices = [main_word]
30
- explain = [f'{main_word} : {main_pron}']
31
- similar_pron_words = []
32
- different_pron_word = None
33
-
34
- # random main_char trong main_word de lam tu so sanh phien am
35
- # tim trong list_words co tu nao chua main_char sao cho phiên âm của các từ được tìm thấy là 1 từ có phiên âm khác, còn các từ còn lại có phiên âm giống nhau
36
- # xoa cac tu duoc chon trong list_words
37
-
38
- result.append({
39
- "question": "",
40
- "type": QuestionTypeEnum.PRONUNCIATION,
41
- "choices": choices,
42
- "answer": `index_of_choice`,
43
- "explain":
44
- })
45
-
46
- def get_pronunciation_of_word_and_segment(self, word: str, segment: str):
47
- word_pron = None
48
- word_segment = None
49
- try:
50
- p = pronouncing.phones_for_word(word)
51
- if not p:
52
- return None, None
53
- word_pron = p[0]
54
- except Exception:
55
- return None, None
56
-
57
-
58
-
59
-
60
-
61
-
62
- def extract_main_segment(self, word: str) -> str:
63
- """
64
- Extracts a random phonetic segment (vowel, consonant, consonant cluster, diphthong, or common ending)
65
- from the word, excluding the last segment unless it's the only option.
66
- For example, 'pronunciation' can be segmented as:
67
- - Individual: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 't']
68
- - Grouped: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 'tion']
69
- For example, 'phone' can be segmented as:
70
- - Grouped: ['ph', 'o', 'n', 'e']
71
- """
72
- if not word or len(word) <= 2:
73
- return word
74
-
75
- word = word.lower()
76
-
77
- # Define phonetic components
78
- vowels = set('aeiou')
79
- consonant_clusters = ['th', 'ph', 'sh', 'ch', 'wh', 'gh', 'sch', 'tr', 'sh', 's', 't', 'p']
80
- diphthongs = ['ai', 'au', 'ei', 'eu', 'oi', 'ou', 'ui', 'ie', 'io', 'ea', 'ee', 'oa', 'oe']
81
- common_endings = ['tion', 'sion', 'ing', 'ed', 'es']
82
-
83
- # Step 1: Segment the word
84
- segments = []
85
- i = 0
86
- while i < len(word):
87
- # Check for common endings (e.g., 'tion')
88
- matched_ending = False
89
- for ending in common_endings:
90
- if word[i:].startswith(ending) and i + len(ending) <= len(word):
91
- segments.append(ending)
92
- i += len(ending)
93
- matched_ending = True
94
- break
95
- if matched_ending:
96
- continue
97
-
98
- # Check for consonant clusters (e.g., 'th', 'ph', 'sch')
99
- matched_cluster = False
100
- for cluster in consonant_clusters:
101
- if word[i:].startswith(cluster) and i + len(cluster) <= len(word):
102
- segments.append(cluster)
103
- i += len(cluster)
104
- matched_cluster = True
105
- break
106
- if matched_cluster:
107
- continue
108
-
109
- # Check for diphthongs (e.g., 'io')
110
- matched_diphthong = False
111
- for diph in diphthongs:
112
- if word[i:].startswith(diph) and i + len(diph) <= len(word):
113
- segments.append(diph)
114
- i += len(diph)
115
- matched_diphthong = True
116
- break
117
- if matched_diphthong:
118
- continue
119
-
120
- # Add single character (vowel or consonant)
121
- if word[i].isalpha():
122
- segments.append(word[i])
123
- i += 1
124
-
125
- # Step 2: Filter valid segments (exclude the last segment if possible)
126
- valid_segments = segments[:-1] if len(segments) > 1 else segments
127
-
128
- # Step 3: If no valid segments, fall back to single characters
129
- if not valid_segments:
130
- valid_positions = [i for i in range(len(word) - 1) if word[i].isalpha()]
131
- if not valid_positions:
132
- return ''
133
- pos = random.choice(valid_positions)
134
- return word[pos]
135
-
136
- # Step 4: Randomly choose a segment
137
- return random.choice(valid_segments)
138
-
139
- def cal_num_word_in_list_available_per_question(self, len_list_words: int, num_questions: int = 1, num_ans_per_question: int = 4) -> int:
140
- num_word_in_list_available_per_question = len_list_words // num_questions
141
- return num_word_in_list_available_per_question if num_word_in_list_available_per_question < num_ans_per_question else num_ans_per_question
 
1
+ # import random
2
+ # from typing import Set, List
3
+ # from src.factories.gen_question.base import Question
4
+ # from src.enum.question import QuestionTypeEnum
5
+ # from collections import defaultdict
6
+ # import pronouncing
7
+ #
8
+ #
9
+ # class PronunciationQuestion(Question):
10
+ # def generate_questions(self, list_words: List[str], num_questions: int = 1, num_ans_per_question: int = 4) :
11
+ # result = []
12
+ # list_unique_words = set(list_words)
13
+ #
14
+ # num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_questions, num_ans_per_question)
15
+ #
16
+ # for _ in range(num_questions) :
17
+ # main_word = None
18
+ # main_segment = None
19
+ # main_pron = None
20
+ # while main_word is None and len(list_words) > 0:
21
+ # main_word = random.choice(list_words)
22
+ # main_segment = self.extract_main_segment(main_word)
23
+ # main_pron, segment_pron = self.get_pronunciation_of_word_and_segment(main_word, main_segment)
24
+ # if main_pron is None or segment_pron is None :
25
+ # main_word = None
26
+ # list_words.remove(main_word)
27
+ #
28
+ # question = main_segment
29
+ # choices = [main_word]
30
+ # explain = [f'{main_word} : {main_pron}']
31
+ # similar_pron_words = []
32
+ # different_pron_word = None
33
+ #
34
+ # # random main_char trong main_word de lam tu so sanh phien am
35
+ # # tim trong list_words co tu nao chua main_char sao cho phiên âm của các từ được tìm thấy là 1 từ có phiên âm khác, còn các từ còn lại có phiên âm giống nhau
36
+ # # xoa cac tu duoc chon trong list_words
37
+ #
38
+ # result.append({
39
+ # "question": "",
40
+ # "type": QuestionTypeEnum.PRONUNCIATION,
41
+ # "choices": choices,
42
+ # "answer": `index_of_choice`,
43
+ # "explain":
44
+ # })
45
+ #
46
+ # def get_pronunciation_of_word_and_segment(self, word: str, segment: str):
47
+ # word_pron = None
48
+ # word_segment = None
49
+ # try:
50
+ # p = pronouncing.phones_for_word(word)
51
+ # if not p:
52
+ # return None, None
53
+ # word_pron = p[0]
54
+ # except Exception:
55
+ # return None, None
56
+ #
57
+ #
58
+ #
59
+ #
60
+ #
61
+ #
62
+ # def extract_main_segment(self, word: str) -> str:
63
+ # """
64
+ # Extracts a random phonetic segment (vowel, consonant, consonant cluster, diphthong, or common ending)
65
+ # from the word, excluding the last segment unless it's the only option.
66
+ # For example, 'pronunciation' can be segmented as:
67
+ # - Individual: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 't']
68
+ # - Grouped: ['p', 'r', 'o', 'n', 'u', 'n', 'c', 'i', 'a', 'tion']
69
+ # For example, 'phone' can be segmented as:
70
+ # - Grouped: ['ph', 'o', 'n', 'e']
71
+ # """
72
+ # if not word or len(word) <= 2:
73
+ # return word
74
+ #
75
+ # word = word.lower()
76
+ #
77
+ # # Define phonetic components
78
+ # vowels = set('aeiou')
79
+ # consonant_clusters = ['th', 'ph', 'sh', 'ch', 'wh', 'gh', 'sch', 'tr', 'sh', 's', 't', 'p']
80
+ # diphthongs = ['ai', 'au', 'ei', 'eu', 'oi', 'ou', 'ui', 'ie', 'io', 'ea', 'ee', 'oa', 'oe']
81
+ # common_endings = ['tion', 'sion', 'ing', 'ed', 'es']
82
+ #
83
+ # # Step 1: Segment the word
84
+ # segments = []
85
+ # i = 0
86
+ # while i < len(word):
87
+ # # Check for common endings (e.g., 'tion')
88
+ # matched_ending = False
89
+ # for ending in common_endings:
90
+ # if word[i:].startswith(ending) and i + len(ending) <= len(word):
91
+ # segments.append(ending)
92
+ # i += len(ending)
93
+ # matched_ending = True
94
+ # break
95
+ # if matched_ending:
96
+ # continue
97
+ #
98
+ # # Check for consonant clusters (e.g., 'th', 'ph', 'sch')
99
+ # matched_cluster = False
100
+ # for cluster in consonant_clusters:
101
+ # if word[i:].startswith(cluster) and i + len(cluster) <= len(word):
102
+ # segments.append(cluster)
103
+ # i += len(cluster)
104
+ # matched_cluster = True
105
+ # break
106
+ # if matched_cluster:
107
+ # continue
108
+ #
109
+ # # Check for diphthongs (e.g., 'io')
110
+ # matched_diphthong = False
111
+ # for diph in diphthongs:
112
+ # if word[i:].startswith(diph) and i + len(diph) <= len(word):
113
+ # segments.append(diph)
114
+ # i += len(diph)
115
+ # matched_diphthong = True
116
+ # break
117
+ # if matched_diphthong:
118
+ # continue
119
+ #
120
+ # # Add single character (vowel or consonant)
121
+ # if word[i].isalpha():
122
+ # segments.append(word[i])
123
+ # i += 1
124
+ #
125
+ # # Step 2: Filter valid segments (exclude the last segment if possible)
126
+ # valid_segments = segments[:-1] if len(segments) > 1 else segments
127
+ #
128
+ # # Step 3: If no valid segments, fall back to single characters
129
+ # if not valid_segments:
130
+ # valid_positions = [i for i in range(len(word) - 1) if word[i].isalpha()]
131
+ # if not valid_positions:
132
+ # return ''
133
+ # pos = random.choice(valid_positions)
134
+ # return word[pos]
135
+ #
136
+ # # Step 4: Randomly choose a segment
137
+ # return random.choice(valid_segments)
138
+ #
139
+ # def cal_num_word_in_list_available_per_question(self, len_list_words: int, num_questions: int = 1, num_ans_per_question: int = 4) -> int:
140
+ # num_word_in_list_available_per_question = len_list_words // num_questions
141
+ # return num_word_in_list_available_per_question if num_word_in_list_available_per_question < num_ans_per_question else num_ans_per_question
src/factories/gen_question/question.py CHANGED
@@ -1,15 +1,21 @@
1
  from src.enum.question import QuestionTypeEnum
 
 
 
 
2
  from src.utils.exceptions import BadRequestException
3
 
4
 
5
- def get_question_type(question_type: QuestionTypeEnum) :
6
- if type == QuestionTypeEnum.SYLLABLE :
7
- return
8
- elif type == QuestionTypeEnum.STRESS :
9
- return
10
- elif type == QuestionTypeEnum.SYNONYM :
11
- return
12
- elif type == QuestionTypeEnum.ANTONYM :
13
- return
 
 
14
  else:
15
  raise BadRequestException('type_invalid')
 
1
  from src.enum.question import QuestionTypeEnum
2
+ from src.factories.gen_question.antonym_question import AntonymsQuestion
3
+ from src.factories.gen_question.incorrect_word_question import IncorrectWordQuestion
4
+ from src.factories.gen_question.stress_question import StressQuestion
5
+ from src.factories.gen_question.synonym_question import SynonymsQuestion
6
  from src.utils.exceptions import BadRequestException
7
 
8
 
9
+ def create_question_instance(question_type: QuestionTypeEnum) :
10
+ if question_type == QuestionTypeEnum.PRONUNCIATION :
11
+ return StressQuestion()
12
+ elif question_type == QuestionTypeEnum.STRESS :
13
+ return StressQuestion()
14
+ elif question_type == QuestionTypeEnum.SYNONYM :
15
+ return SynonymsQuestion()
16
+ elif question_type == QuestionTypeEnum.ANTONYM :
17
+ return AntonymsQuestion()
18
+ elif question_type == QuestionTypeEnum.INCORRECT_WORD:
19
+ return IncorrectWordQuestion()
20
  else:
21
  raise BadRequestException('type_invalid')
src/factories/gen_question/stress_question.py CHANGED
@@ -1,18 +1,23 @@
1
- import random
2
- from collections import defaultdict
3
  from typing import List
 
 
 
4
  from src.factories.gen_question.base import Question, nltk_words
5
  from src.enum.question import QuestionTypeEnum
6
-
7
  from src.utils.number import rand_exclude
8
  from src.utils.word import get_stress_pattern, convert_word_to_ipa
9
 
10
 
11
  class StressQuestion(Question):
12
- def generate_questions(self, list_words: List[str], num_question: int = 1, num_ans_per_question: int = 4):
 
 
 
13
  result = []
14
 
15
- # process data:
 
 
16
  stress_groups = defaultdict(list)
17
  for word in list_words:
18
  stress = get_stress_pattern(word)
@@ -20,75 +25,85 @@ class StressQuestion(Question):
20
  if ipa is None or stress is None:
21
  continue
22
  stress_groups[stress].append({"word": word, "ipa": ipa})
23
- num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
24
 
25
  # create
26
- def choice_random_words_in_stress_group(stress_group_key):
27
  stress_group = stress_groups[stress_group_key]
28
  item = random.choice(stress_group)
29
- stress_group.remove(item)
30
  return item["word"], item["ipa"]
31
 
32
- for _ in range(num_question): # type: ignore
33
  choices = []
34
  explain = []
35
  list_stress_group_keys = list(stress_groups.keys())
36
 
37
- # get different stress
38
- if len(list_stress_group_keys) != 0:
39
  different_stress = random.choice(list_stress_group_keys)
40
  list_stress_group_keys.remove(different_stress)
41
- different_word, different_ipa = choice_random_words_in_stress_group(different_stress)
 
42
  else:
43
  different_stress = random.randint(1, 3)
44
- different_word, different_ipa = self.get_random_word_and_ipa_by_stress(different_stress)
 
 
 
45
 
46
  choices.append(different_word)
47
- explain.append(f'{different_word} ({different_ipa})')
48
 
49
- # get common stress
50
- if len(list_stress_group_keys) != 0:
51
- # if in list word exist more two stresses, get choice in list word
52
  common_stress = random.choice(list_stress_group_keys)
53
- list_stress_group_keys.remove(common_stress)
54
- # number of choice must be lesster number of list word slipt number of question and in stress group must exist item
55
- while len(choices) < num_word_in_list_per_question and len(stress_groups[common_stress]) > 0:
56
- common_word, common_ipa = choice_random_words_in_stress_group(common_stress)
57
-
58
  choices.append(common_word)
59
- explain.append(f'{common_word} ({common_ipa})')
60
  else:
61
  common_stress = rand_exclude(1, 3, different_stress)
62
 
63
- # maybe after get choice in list word, number of choice is not enough, so get choice in local data
64
  while len(choices) < num_ans_per_question:
65
- common_word, common_word_ipa = self.get_random_word_and_ipa_by_stress(common_stress)
66
-
 
 
67
  choices.append(common_word)
68
- explain.append(f'{common_word} ({common_word_ipa})')
69
- continue
70
-
71
- random.shuffle(choices)
72
-
73
- result.append({
74
- "question": "",
75
- "type": QuestionTypeEnum.STRESS,
76
- "choices": choices,
77
- "answer": choices.index(different_word),
78
- "explain": explain,
79
- })
 
 
 
80
 
81
  return result
82
 
83
- def get_random_word_and_ipa_by_stress(self, stress: int):
84
- while True:
 
 
 
 
 
85
  word = random.choice(nltk_words)
86
  word_ipa = convert_word_to_ipa(word)
87
  word_stress = get_stress_pattern(word)
88
  if word_ipa is None or word_stress is None:
89
- nltk_words.remove(word)
90
  continue
91
  if word_stress == stress:
92
  return word, word_ipa
93
-
94
-
 
 
 
1
  from typing import List
2
+ from collections import defaultdict
3
+ import random
4
+
5
  from src.factories.gen_question.base import Question, nltk_words
6
  from src.enum.question import QuestionTypeEnum
 
7
  from src.utils.number import rand_exclude
8
  from src.utils.word import get_stress_pattern, convert_word_to_ipa
9
 
10
 
11
  class StressQuestion(Question):
12
+ def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
13
+ if list_words is None:
14
+ list_words = []
15
+
16
  result = []
17
 
18
+ # Process data: group words by stress pattern
19
+ num_word_in_list_per_question = self.cal_num_word_in_list_available_per_question(len(list_words), num_question, num_ans_per_question)
20
+
21
  stress_groups = defaultdict(list)
22
  for word in list_words:
23
  stress = get_stress_pattern(word)
 
25
  if ipa is None or stress is None:
26
  continue
27
  stress_groups[stress].append({"word": word, "ipa": ipa})
 
28
 
29
  # create
30
+ def choice_random_words_in_stress_group(stress_group_key: int):
31
  stress_group = stress_groups[stress_group_key]
32
  item = random.choice(stress_group)
33
+ stress_group.remove(item) # Remove to avoid reuse within the same question
34
  return item["word"], item["ipa"]
35
 
36
+ for _ in range(num_question):
37
  choices = []
38
  explain = []
39
  list_stress_group_keys = list(stress_groups.keys())
40
 
41
+ # Get word with different stress
42
+ if list_stress_group_keys:
43
  different_stress = random.choice(list_stress_group_keys)
44
  list_stress_group_keys.remove(different_stress)
45
+ different_word_ipa = choice_random_words_in_stress_group(different_stress)
46
+ different_word, different_ipa = different_word_ipa
47
  else:
48
  different_stress = random.randint(1, 3)
49
+ different_word_ipa = self.get_random_word_and_ipa_by_stress(different_stress)
50
+ if different_word_ipa is None:
51
+ continue # Skip this question if no valid word is found
52
+ different_word, different_ipa = different_word_ipa
53
 
54
  choices.append(different_word)
55
+ explain.append(f'{different_word} ({different_ipa}, stress pattern: {different_stress})')
56
 
57
+ # Get words with common stress
58
+ if list_stress_group_keys:
 
59
  common_stress = random.choice(list_stress_group_keys)
60
+ while len(choices) < num_word_in_list_per_question and stress_groups[common_stress]:
61
+ common_word_ipa = choice_random_words_in_stress_group(common_stress)
62
+ common_word, common_ipa = common_word_ipa
 
 
63
  choices.append(common_word)
64
+ explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
65
  else:
66
  common_stress = rand_exclude(1, 3, different_stress)
67
 
68
+ # Fill remaining choices from nltk_words if needed
69
  while len(choices) < num_ans_per_question:
70
+ common_word_ipa = self.get_random_word_and_ipa_by_stress(common_stress)
71
+ if common_word_ipa is None:
72
+ break # Skip adding if no valid word is found
73
+ common_word, common_ipa = common_word_ipa
74
  choices.append(common_word)
75
+ explain.append(f'{common_word} ({common_ipa}, stress pattern: {common_stress})')
76
+
77
+ # Only add the question if we have enough choices
78
+ print(choices, len(choices))
79
+ if len(choices) == num_ans_per_question:
80
+ random.shuffle(choices)
81
+ result.append({
82
+ "question": "",
83
+ "type": QuestionTypeEnum.STRESS,
84
+ "choices": choices,
85
+ "answer": choices.index(different_word),
86
+ "explain": explain,
87
+ })
88
+
89
+ print(result)
90
 
91
  return result
92
 
93
+ @staticmethod
94
+ def get_random_word_and_ipa_by_stress(stress: int):
95
+ max_attempts = 10000
96
+ attempts = 0
97
+ while attempts < max_attempts:
98
+ if not nltk_words:
99
+ return None
100
  word = random.choice(nltk_words)
101
  word_ipa = convert_word_to_ipa(word)
102
  word_stress = get_stress_pattern(word)
103
  if word_ipa is None or word_stress is None:
104
+ attempts += 1
105
  continue
106
  if word_stress == stress:
107
  return word, word_ipa
108
+ attempts += 1
109
+ return None # Return None if no word is found
src/factories/gen_question/synonym_question.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import random
3
+
4
+ from src.factories.gen_question.base import Question, nltk_words
5
+ from src.enum.question import QuestionTypeEnum
6
+
7
+
8
+ class SynonymsQuestion(Question):
9
+ """
10
+ This class generates multiple-choice questions that ask the user
11
+ to select a synonym for a given word.
12
+
13
+ It uses dictionary data (from fetch_word_data) to retrieve
14
+ meanings and synonyms. If the input list is empty or invalid,
15
+ it falls back to randomly chosen words from a built-in word list (nltk_words).
16
+ """
17
+
18
+ def generate_questions(self, list_words: List[str] = None, num_question: int = 1, num_ans_per_question: int = 4):
19
+ if list_words is None:
20
+ list_words = []
21
+
22
+ result = []
23
+ list_unique_words = set(list_words)
24
+
25
+ # Internal helper function to get a valid question/answer pair
26
+ def get_question_and_answer():
27
+ """
28
+ Randomly selects a word and finds one of its synonyms.
29
+
30
+ Returns:
31
+ tuple(str, str): question_word, synonym_answer
32
+ """
33
+ # Try from provided list word
34
+ while list_unique_words:
35
+ source_word = random.sample(list(list_unique_words), 1)[0]
36
+ list_unique_words.remove(source_word)
37
+ synonym_word = self.get_synonym(source_word)
38
+ if synonym_word in list_unique_words:
39
+ list_unique_words.remove(source_word)
40
+ if synonym_word:
41
+ return source_word, synonym_word
42
+
43
+ # Fallback: use nltk_words
44
+ while True:
45
+ source_word = random.choice(nltk_words)
46
+ synonym_word = self.get_synonym(source_word)
47
+ if synonym_word:
48
+ return source_word, synonym_word
49
+
50
+ for _ in range(num_question):
51
+ question_word, correct_answer = get_question_and_answer()
52
+
53
+ choices = [correct_answer]
54
+ distractor_set = set()
55
+
56
+ while len(choices) < num_ans_per_question:
57
+ distractor_word = random.choice(nltk_words)
58
+
59
+ if (distractor_word.lower() != correct_answer.lower() and
60
+ distractor_word.lower() != question_word.lower() and
61
+ distractor_word.lower() not in distractor_set):
62
+ distractor_set.add(distractor_word)
63
+ choices.append(distractor_word)
64
+
65
+ random.shuffle(choices)
66
+
67
+ result.append({
68
+ "question": question_word,
69
+ "type": QuestionTypeEnum.SYNONYM,
70
+ "choices": choices,
71
+ "answer": choices.index(correct_answer),
72
+ "explain": [],
73
+ })
74
+
75
+ return result
76
+
77
+ def get_synonym(self, word: str):
78
+ """
79
+ Retrieves a random synonym for the given word using dictionary API data.
80
+
81
+ It checks both the 'meanings.synonyms' and 'meanings.definitions.synonyms' fields.
82
+
83
+ Args:
84
+ word (str): The input word to find a synonym for.
85
+
86
+ Returns:
87
+ str or None: A synonym if found, else None.
88
+ """
89
+ data = self.fetch_word_data(word)
90
+ if not data:
91
+ return None
92
+
93
+ meanings = data.get("meanings", [])
94
+
95
+ # Randomly search for synonyms in the meaning entries
96
+ while meanings:
97
+ meaning = random.sample(meanings, 1)[0]
98
+
99
+ # Try top-level synonyms
100
+ synonyms = meaning.get("synonyms", [])
101
+
102
+ # Also check synonyms inside definitions
103
+ if not synonyms:
104
+ definitions = meaning.get("definitions", [])
105
+ for definition in definitions:
106
+ synonyms.extend(definition.get("synonyms", []))
107
+
108
+ if synonyms:
109
+ return random.choice(synonyms)
110
+
111
+ meanings.remove(meaning)
112
+
113
+ return None
src/interfaces/question.py CHANGED
@@ -1,5 +1,8 @@
1
- from pydantic import BaseModel
2
- from typing import Optional
 
 
 
3
 
4
  class ModelInput(BaseModel):
5
  """General request model structure for flutter incoming req."""
@@ -9,4 +12,17 @@ class ModelInput(BaseModel):
9
 
10
  class ICQuestion(BaseModel):
11
  context: str
12
- name: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field, field_validator
2
+ from typing import Optional, List
3
+
4
+ from src.enum.question import QuestionTypeEnum
5
+
6
 
7
  class ModelInput(BaseModel):
8
  """General request model structure for flutter incoming req."""
 
12
 
13
  class ICQuestion(BaseModel):
14
  context: str
15
+ name: str
16
+
17
+ class ICreateQuestion(BaseModel):
18
+ question_type: QuestionTypeEnum
19
+ list_words: List[str]
20
+ num_ans_per_question: int = Field(..., ge=2, le=10)
21
+ num_question: int = Field(..., ge=1, le=10)
22
+
23
+ @field_validator('list_words')
24
+ def check_single_word(cls, value):
25
+ for word in value:
26
+ if " " in word:
27
+ raise ValueError("list_words_just_includes_single_word")
28
+ return value
src/routers/public/quesion.py CHANGED
@@ -1,9 +1,10 @@
1
  from fastapi import APIRouter, Request
2
  from fastapi.responses import JSONResponse
3
 
 
4
  from src.utils.response import res_ok
5
  from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
6
- from src.interfaces.question import ModelInput, ICQuestion
7
  from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
8
  from src.services.AI.question_generator import QuestionGenerator
9
  from src.services.AI.false_ans_generator import FalseAnswerGenerator
@@ -11,8 +12,20 @@ from src.services.AI.keyword_extractor import KeywordExtractor
11
 
12
  route = APIRouter(prefix="/question", tags=["Question"])
13
  print("Including question routes...")
 
 
 
 
 
 
 
 
 
 
 
 
14
  @route.post('/sentence')
15
- async def generate_questions_from_sentence(body: ICQuestion, request: Request):
16
  """Process user request
17
 
18
  Args:
 
1
  from fastapi import APIRouter, Request
2
  from fastapi.responses import JSONResponse
3
 
4
+ from src.factories.gen_question.question import create_question_instance
5
  from src.utils.response import res_ok
6
  from src.utils.text_process import vietnamese_to_english, english_to_vietnamese, get_all_summary, get_all_questions
7
+ from src.interfaces.question import ModelInput, ICQuestion, ICreateQuestion
8
  from src.services.AI.abstractive_summarizer import AbstractiveSummarizer
9
  from src.services.AI.question_generator import QuestionGenerator
10
  from src.services.AI.false_ans_generator import FalseAnswerGenerator
 
12
 
13
  route = APIRouter(prefix="/question", tags=["Question"])
14
  print("Including question routes...")
15
+
16
+ @route.post('/')
17
+ async def generate_question(body: ICreateQuestion):
18
+ question = create_question_instance(body.question_type)
19
+ list_questions = question.generate_questions(
20
+ list_words=body.list_words,
21
+ num_question=body.num_question,
22
+ num_ans_per_question=body.num_ans_per_question,
23
+ )
24
+ print(list_questions)
25
+ return JSONResponse(status_code=200, content=res_ok(list_questions))
26
+
27
  @route.post('/sentence')
28
+ async def generate_questions_from_sentence(bßody: ICQuestion, request: Request):
29
  """Process user request
30
 
31
  Args:
src/services/AI/sentence_generator.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .base import Model
2
+ from typing import List
3
+ import random
4
+
5
+
6
+ class SentenceGeneratorModel(Model):
7
+ """
8
+ A wrapper around the base Model class to generate English sentences
9
+ that include given vocabulary words.
10
+ """
11
+
12
+ _instance = None
13
+
14
+ def __new__(cls, model_name: str = "google/flan-t5-base"):
15
+ if cls._instance is None:
16
+ cls._instance = super(SentenceGeneratorModel, cls).__new__(cls)
17
+ cls._instance._initialized = False
18
+ return cls._instance
19
+
20
+ def __init__(self, model_name: str = "google/flan-t5-base"):
21
+ if self._initialized:
22
+ return
23
+ super().__init__(model_name)
24
+ self._initialized = True
25
+
26
+ def generate_sentence_from_words(
27
+ self,
28
+ vocab_list: List[str],
29
+ min_words: int = 2,
30
+ max_words: int = 5,
31
+ model_max_length: int = 64,
32
+ token_max_length: int = 64
33
+ ) -> str:
34
+ """
35
+ Generate a sentence that uses the given vocabulary words.
36
+
37
+ Args:
38
+ vocab_list (List[str]): The list of available vocabulary words.
39
+ min_words (int): Minimum number of words to include in sentence.
40
+ max_words (int): Maximum number of words to include.
41
+ model_max_length (int): Max length of generated sentence.
42
+ token_max_length (int): Max length for tokenization.
43
+
44
+ Returns:
45
+ str: A generated sentence using selected words.
46
+ """
47
+
48
+ if not vocab_list:
49
+ raise ValueError("vocab_list cannot be empty.")
50
+
51
+ selected_words = random.sample(
52
+ vocab_list, k=min(len(vocab_list), random.randint(min_words, max_words))
53
+ )
54
+
55
+ prompt = f"Write an English sentence using the following words: {', '.join(selected_words)}."
56
+
57
+ sentence = self.inference(
58
+ model_max_length=model_max_length,
59
+ token_max_length=token_max_length,
60
+ task=prompt
61
+ )
62
+
63
+ return sentence
src/utils/word.py CHANGED
@@ -27,11 +27,184 @@ def get_stress_pattern(word):
27
  0 indicates no stress.
28
  """
29
  list_pattern = pronouncing.stresses_for_word(word)
 
 
30
  pattern = list_pattern[0]
 
 
31
  try:
32
- index = pattern.index('1')
33
  return index
34
- except ValueError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  return None
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
27
  0 indicates no stress.
28
  """
29
  list_pattern = pronouncing.stresses_for_word(word)
30
+ if list_pattern is None or list_pattern == []:
31
+ return None
32
  pattern = list_pattern[0]
33
+ if len(pattern) == 1:
34
+ return None
35
  try:
36
+ index = pattern.index('1') + 1
37
  return index
38
+ except ValueError:
39
+ return None
40
+
41
+
42
+ import random
43
+ from typing import Optional
44
+ import nltk
45
+ nltk.download('wordnet')
46
+ from nltk.corpus import wordnet as wn
47
+
48
+ def transform_word(word: str) -> Optional[str]:
49
+ """
50
+ Transform a word into another word by changing its type, tense, article-related form,
51
+ or meaning to create an incorrect answer for a 'find the wrong word' question.
52
+
53
+ Args:
54
+ word (str): The input word to transform.
55
+
56
+ Returns:
57
+ Optional[str]: The transformed word, or None if no transformation is possible.
58
+ """
59
+ # List of possible transformations
60
+ transformation_methods = [
61
+ transform_preposition, # Handle prepositions
62
+ transform_word_type, # Change word type (e.g., noun to verb)
63
+ transform_tense, # Change verb tense
64
+ transform_article, # Change article-related form
65
+ transform_meaning # Change to a word with different meaning
66
+ ]
67
+
68
+ # Randomly select a transformation method
69
+ random.shuffle(transformation_methods)
70
+ for method in transformation_methods:
71
+ transformed = method(word)
72
+ if transformed and transformed != word:
73
+ return transformed
74
+
75
+ # Fallback: return a random word from nltk_words if no transformation works
76
+ try:
77
+ from src.factories.gen_question.base import nltk_words
78
+ return random.choice(nltk_words) if nltk_words else None
79
+ except ImportError:
80
+ return None
81
+
82
+
83
+ def transform_preposition(word: str) -> Optional[str]:
84
+ """
85
+ Transform a preposition into another preposition that is likely to be incorrect in context.
86
+
87
+ Args:
88
+ word (str): The input word to check and transform.
89
+
90
+ Returns:
91
+ Optional[str]: A different preposition, or None if the input is not a preposition.
92
+ """
93
+ # Common prepositions and their common incorrect substitutions
94
+ preposition_map = {
95
+ 'in': ['on', 'at', 'to'],
96
+ 'on': ['in', 'at', 'over'],
97
+ 'at': ['in', 'on', 'by'],
98
+ 'to': ['in', 'at', 'for'],
99
+ 'for': ['to', 'with', 'in'],
100
+ 'with': ['for', 'by', 'in'],
101
+ 'by': ['with', 'at', 'on'],
102
+ 'from': ['to', 'in', 'at'],
103
+ 'of': ['for', 'in', 'on']
104
+ }
105
+
106
+ word_lower = word.lower()
107
+ if word_lower in preposition_map:
108
+ return random.sample(preposition_map[word_lower], 1)[0]
109
+ return None
110
+
111
+ def transform_word_type(word: str) -> Optional[str]:
112
+ """
113
+ Transform a word by changing its part of speech (e.g., noun to verb).
114
+ Uses WordNet to find related words with different POS.
115
+ """
116
+ pos_map = {
117
+ 'n': 'v', # Noun to verb
118
+ 'v': 'n', # Verb to noun
119
+ 'a': 'r', # Adjective to adverb
120
+ 'r': 'a' # Adverb to adjective
121
+ }
122
+
123
+ # Get part of speech for the word
124
+ synsets = wn.synsets(word)
125
+ if not synsets:
126
+ return None
127
+
128
+ current_pos = synsets[0].pos() # Get the first synset's POS
129
+ target_pos = pos_map.get(current_pos)
130
+ if not target_pos:
131
+ return None
132
+
133
+ # Find a synset with the target POS
134
+ for synset in wn.synsets(word):
135
+ if synset.pos() == target_pos:
136
+ return synset.lemmas()[0].name().replace('_', ' ')
137
+
138
+ return None
139
+
140
+
141
+ def transform_tense(word: str) -> Optional[str]:
142
+ """
143
+ Transform a verb by changing its tense (e.g., present to past).
144
+ Uses simple rules for common verb forms.
145
+ """
146
+ # Simple past tense rules for regular verbs
147
+ if word.endswith('e'):
148
+ return word + 'd' # e.g., love -> loved
149
+ elif word.endswith('y') and word[-2] not in 'aeiou':
150
+ return word[:-1] + 'ied' # e.g., study -> studied
151
+ elif word[-1] not in 'aeiou' and word[-2] not in 'aeiou':
152
+ return word + 'ed' # e.g., walk -> walked
153
+ else:
154
+ # Irregular verbs (small hardcoded list for simplicity)
155
+ irregular = {
156
+ 'run': 'ran',
157
+ 'go': 'went',
158
+ 'see': 'saw',
159
+ 'write': 'wrote',
160
+ 'is': 'was',
161
+ 'are': 'were'
162
+ }
163
+ return irregular.get(word, None)
164
+
165
+
166
+ def transform_article(word: str) -> Optional[str]:
167
+ """
168
+ Transform a word related to articles (e.g., 'a' to 'an' or remove article).
169
+ For nouns, return a different noun that might cause article-related errors.
170
+ """
171
+ if word.lower() in ['a', 'an']:
172
+ return 'an' if word.lower() == 'a' else 'a'
173
+
174
+ # For nouns, find another noun that might cause article confusion
175
+ synsets = wn.synsets(word, pos='n')
176
+ if not synsets:
177
  return None
178
 
179
+ # Pick a random synonym or related noun
180
+ synonyms = []
181
+ for synset in synsets:
182
+ for lemma in synset.lemmas():
183
+ synonym = lemma.name().replace('_', ' ')
184
+ if synonym != word:
185
+ synonyms.append(synonym)
186
+
187
+ return random.sample(synonyms, 1)[0] if synonyms else None
188
+
189
+
190
+ def transform_meaning(word: str) -> Optional[str]:
191
+ """
192
+ Transform a word to another with a different meaning (e.g., homophone or unrelated word).
193
+ """
194
+ # Find a word with different meaning but the same POS
195
+ synsets = wn.synsets(word)
196
+ if not synsets:
197
+ return None
198
+
199
+ current_pos = synsets[0].pos()
200
+ # Get all words with the same POS but different synsets
201
+ different_words = []
202
+ for synset in wn.all_synsets(pos=current_pos):
203
+ for lemma in synset.lemmas():
204
+ candidate = lemma.name().replace('_', ' ')
205
+ if candidate != word and candidate not in different_words:
206
+ different_words.append(candidate)
207
+
208
+ return random.sample(different_words, 1)[0] if different_words else None
209
+
210