add extr regex check to detect plurals
Browse files
utils.py
CHANGED
|
@@ -836,6 +836,11 @@ PLURAL_TO_SINGULAR_EXCLUSIONS = [
|
|
| 836 |
|
| 837 |
p = inflect.engine()
|
| 838 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 839 |
def is_plural_wordnet(word):
|
| 840 |
# Check if WordNet has both singular and plural forms
|
| 841 |
singular_synsets = wordnet.synsets(word, pos=wordnet.NOUN)
|
|
@@ -860,6 +865,8 @@ def plural_to_singular(word):
|
|
| 860 |
"""Convert plural word to singular using inflect."""
|
| 861 |
if is_plural(word):
|
| 862 |
return p.singular_noun(word) or word
|
|
|
|
|
|
|
| 863 |
if is_plural_wordnet(word):
|
| 864 |
return p.singular_noun(word) or word
|
| 865 |
return word
|
|
|
|
| 836 |
|
| 837 |
p = inflect.engine()
|
| 838 |
|
| 839 |
+
def is_plural_regex(word):
|
| 840 |
+
"""Detect if a word is plural using common pluralization rules."""
|
| 841 |
+
# Check for common plural forms
|
| 842 |
+
return re.search(r'(s$|es$|ies$)', word.lower()) and not re.search(r'(ss$)', word.lower())
|
| 843 |
+
|
| 844 |
def is_plural_wordnet(word):
|
| 845 |
# Check if WordNet has both singular and plural forms
|
| 846 |
singular_synsets = wordnet.synsets(word, pos=wordnet.NOUN)
|
|
|
|
| 865 |
"""Convert plural word to singular using inflect."""
|
| 866 |
if is_plural(word):
|
| 867 |
return p.singular_noun(word) or word
|
| 868 |
+
if is_plural_regex(word):
|
| 869 |
+
return p.singular_noun(word) or word
|
| 870 |
if is_plural_wordnet(word):
|
| 871 |
return p.singular_noun(word) or word
|
| 872 |
return word
|