Spaces:
Sleeping
Sleeping
File size: 6,435 Bytes
cdfdd8d 342fe87 cdfdd8d 342fe87 cdfdd8d 342fe87 cdfdd8d 342fe87 78dd479 342fe87 78dd479 342fe87 cdfdd8d 342fe87 cdfdd8d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import eng_to_ipa
import pronouncing
# US
def convert_word_to_ipa(word):
try:
word_pron = eng_to_ipa.convert(word)
if word_pron == word or '*' in word_pron:
return None
return word_pron
except Exception:
return None
def get_stress_pattern(word):
"""
Args:
word: str
Returns:
the stress position
Explain:
The function pronouncing.stresses_for_word returns a list where:
1 indicates primary stress (main stress),
2 indicates secondary stress,
0 indicates no stress.
"""
list_pattern = pronouncing.stresses_for_word(word)
if list_pattern is None or list_pattern == []:
return None
pattern = list_pattern[0]
if len(pattern) == 1:
return None
try:
index = pattern.index('1') + 1
return index
except ValueError:
return None
import random
from typing import Optional
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn
def ç(word: str) -> Optional[str]:
"""
Transform a word into another word by changing its type, tense, article-related form,
or meaning to create an incorrect answer for a 'find the wrong word' question.
Args:
word (str): The input word to transform.
Returns:
Optional[str]: The transformed word, or None if no transformation is possible.
"""
# List of possible transformations
transformation_methods = [
transform_preposition, # Handle prepositions
transform_word_type, # Change word type (e.g., noun to verb)
transform_tense, # Change verb tense
transform_article, # Change article-related form
transform_meaning # Change to a word with different meaning
]
# Randomly select a transformation method
random.shuffle(transformation_methods)
for method in transformation_methods:
transformed = method(word)
if transformed and transformed != word:
return transformed
# Fallback: return a random word from nltk_words if no transformation works
try:
from src.factories.gen_question.types.base import nltk_words
return random.choice(nltk_words) if nltk_words else None
except ImportError:
return None
def transform_preposition(word: str) -> Optional[str]:
"""
Transform a preposition into another preposition that is likely to be incorrect in context.
Args:
word (str): The input word to check and transform.
Returns:
Optional[str]: A different preposition, or None if the input is not a preposition.
"""
# Common prepositions and their common incorrect substitutions
preposition_map = {
'in': ['on', 'at', 'to'],
'on': ['in', 'at', 'over'],
'at': ['in', 'on', 'by'],
'to': ['in', 'at', 'for'],
'for': ['to', 'with', 'in'],
'with': ['for', 'by', 'in'],
'by': ['with', 'at', 'on'],
'from': ['to', 'in', 'at'],
'of': ['for', 'in', 'on']
}
word_lower = word.lower()
if word_lower in preposition_map:
return random.sample(preposition_map[word_lower], 1)[0]
return None
def transform_word_type(word: str) -> Optional[str]:
"""
Transform a word by changing its part of speech (e.g., noun to verb).
Uses WordNet to find related words with different POS.
"""
pos_map = {
'n': 'v', # Noun to verb
'v': 'n', # Verb to noun
'a': 'r', # Adjective to adverb
'r': 'a' # Adverb to adjective
}
# Get part of speech for the word
synsets = wn.synsets(word)
if not synsets:
return None
current_pos = synsets[0].pos() # Get the first synset's POS
target_pos = pos_map.get(current_pos)
if not target_pos:
return None
# Find a synset with the target POS
for synset in wn.synsets(word):
if synset.pos() == target_pos:
return synset.lemmas()[0].name().replace('_', ' ')
return None
def transform_tense(word: str) -> Optional[str]:
"""
Transform a verb by changing its tense (e.g., present to past).
Uses simple rules for common verb forms.
"""
# Simple past tense rules for regular verbs
if word.endswith('e'):
return word + 'd' # e.g., love -> loved
elif word.endswith('y') and word[-2] not in 'aeiou':
return word[:-1] + 'ied' # e.g., study -> studied
elif word[-1] not in 'aeiou' and word[-2] not in 'aeiou':
return word + 'ed' # e.g., walk -> walked
else:
# Irregular verbs (small hardcoded list for simplicity)
irregular = {
'run': 'ran',
'go': 'went',
'see': 'saw',
'write': 'wrote',
'is': 'was',
'are': 'were'
}
return irregular.get(word, None)
def transform_article(word: str) -> Optional[str]:
"""
Transform a word related to articles (e.g., 'a' to 'an' or remove article).
For nouns, return a different noun that might cause article-related errors.
"""
if word.lower() in ['a', 'an']:
return 'an' if word.lower() == 'a' else 'a'
# For nouns, find another noun that might cause article confusion
synsets = wn.synsets(word, pos='n')
if not synsets:
return None
# Pick a random synonym or related noun
synonyms = []
for synset in synsets:
for lemma in synset.lemmas():
synonym = lemma.name().replace('_', ' ')
if synonym != word:
synonyms.append(synonym)
return random.sample(synonyms, 1)[0] if synonyms else None
def transform_meaning(word: str) -> Optional[str]:
"""
Transform a word to another with a different meaning (e.g., homophone or unrelated word).
"""
# Find a word with different meaning but the same POS
synsets = wn.synsets(word)
if not synsets:
return None
current_pos = synsets[0].pos()
# Get all words with the same POS but different synsets
different_words = []
for synset in wn.all_synsets(pos=current_pos):
for lemma in synset.lemmas():
candidate = lemma.name().replace('_', ' ')
if candidate != word and candidate not in different_words:
different_words.append(candidate)
return random.sample(different_words, 1)[0] if different_words else None
|