Bayan AI commited on
Commit ·
100db1e
1
Parent(s): 19a801d
Fix: Add missing dual and plural IVtoOOV bypass rules in app.py
Browse files- src/app.py +37 -34
src/app.py
CHANGED
|
@@ -2009,58 +2009,61 @@ def analyze_text():
|
|
| 2009 |
# Evaluate grammar patterns early to bypass heuristic blocks.
|
| 2010 |
_is_grammar_pattern = False
|
| 2011 |
if orig_text and corr_text:
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2015 |
_is_grammar_pattern = True
|
| 2016 |
# Nasb/Jazm: ون → وا (verb mood)
|
| 2017 |
-
elif (
|
| 2018 |
-
_o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2019 |
-
_c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2020 |
_o_stem = _o_cl[:-2]
|
| 2021 |
_c_stem = _c_cl[:-2]
|
| 2022 |
if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
|
| 2023 |
_is_grammar_pattern = True
|
| 2024 |
-
# Five nouns: وك → اك/يك
|
| 2025 |
-
elif (len(
|
| 2026 |
-
orig_text[-2:] in ('وك', 'وه') and
|
| 2027 |
-
corr_text[-2:] in ('اك', 'يك', 'اه', 'يه')):
|
| 2028 |
-
_is_grammar_pattern = True
|
| 2029 |
-
# Dual: ان → ين (dual oblique)
|
| 2030 |
-
elif (orig_text.endswith('ان') and corr_text.endswith('ين') and
|
| 2031 |
-
orig_text[:-2] == corr_text[:-2] and len(orig_text) >= 4):
|
| 2032 |
_is_grammar_pattern = True
|
| 2033 |
# Demonstrative: هذان→هاتان, هاتان→هذان
|
| 2034 |
-
elif ({
|
| 2035 |
_is_grammar_pattern = True
|
| 2036 |
-
#
|
| 2037 |
-
|
| 2038 |
-
elif (corr_text.endswith('وا') and corr_text[:-2] == orig_text
|
| 2039 |
-
and len(orig_text) >= 3):
|
| 2040 |
_is_grammar_pattern = True
|
| 2041 |
-
# Past tense fem plural: verb→verb+ن
|
| 2042 |
-
elif (
|
| 2043 |
-
and len(orig_text) >= 3):
|
| 2044 |
_is_grammar_pattern = True
|
| 2045 |
-
# Present tense fem plural: ون → ن
|
| 2046 |
-
elif (
|
| 2047 |
-
_o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2048 |
-
_c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2049 |
_o_stem = _o_cl[:-2]
|
| 2050 |
_c_stem = _c_cl[:-1]
|
| 2051 |
if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
|
| 2052 |
_is_grammar_pattern = True
|
| 2053 |
-
#
|
| 2054 |
-
elif (
|
| 2055 |
-
and len(orig_text) >= 3):
|
| 2056 |
_is_grammar_pattern = True
|
| 2057 |
-
#
|
| 2058 |
-
elif (
|
| 2059 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2060 |
_is_grammar_pattern = True
|
| 2061 |
# Gender with ي: ذكي→ذكية
|
| 2062 |
-
elif (
|
| 2063 |
-
and orig_text.endswith('ي') and len(orig_text) >= 3):
|
| 2064 |
_is_grammar_pattern = True
|
| 2065 |
|
| 2066 |
|
|
|
|
| 2009 |
# Evaluate grammar patterns early to bypass heuristic blocks.
|
| 2010 |
_is_grammar_pattern = False
|
| 2011 |
if orig_text and corr_text:
|
| 2012 |
+
_o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2013 |
+
_c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
|
| 2014 |
+
|
| 2015 |
+
# Case: ون/ان → ين (sound masculine plural / dual case change)
|
| 2016 |
+
if (_o_cl.endswith('ون') and _c_cl.endswith('ين') and _o_cl[:-2] == _c_cl[:-2]):
|
| 2017 |
+
_is_grammar_pattern = True
|
| 2018 |
+
elif (_o_cl.endswith('ان') and _c_cl.endswith('ين') and _o_cl[:-2] == _c_cl[:-2] and len(_o_cl) >= 4):
|
| 2019 |
_is_grammar_pattern = True
|
| 2020 |
# Nasb/Jazm: ون → وا (verb mood)
|
| 2021 |
+
elif (_o_cl.endswith('ون') and _c_cl.endswith('وا') and len(_o_cl) >= 3):
|
|
|
|
|
|
|
| 2022 |
_o_stem = _o_cl[:-2]
|
| 2023 |
_c_stem = _c_cl[:-2]
|
| 2024 |
if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
|
| 2025 |
_is_grammar_pattern = True
|
| 2026 |
+
# Five nouns: وك → اك/يك
|
| 2027 |
+
elif (len(_o_cl) >= 3 and len(_c_cl) >= 3 and _o_cl[-2:] in ('وك', 'وه') and _c_cl[-2:] in ('اك', 'يك', 'اه', 'يه')):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2028 |
_is_grammar_pattern = True
|
| 2029 |
# Demonstrative: هذان→هاتان, هاتان→هذان
|
| 2030 |
+
elif ({_o_cl, _c_cl} <= {'هذان', 'هاتان'}):
|
| 2031 |
_is_grammar_pattern = True
|
| 2032 |
+
# Past tense masc plural: verb→verb+وا
|
| 2033 |
+
elif (_c_cl.endswith('وا') and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
|
|
|
|
|
|
|
| 2034 |
_is_grammar_pattern = True
|
| 2035 |
+
# Past tense fem plural: verb→verb+ن
|
| 2036 |
+
elif (_c_cl.endswith('ن') and _c_cl[:-1] == _o_cl and len(_o_cl) >= 3):
|
|
|
|
| 2037 |
_is_grammar_pattern = True
|
| 2038 |
+
# Present tense fem plural: ون → ن
|
| 2039 |
+
elif (_o_cl.endswith('ون') and _c_cl.endswith('ن') and len(_o_cl) >= 3):
|
|
|
|
|
|
|
| 2040 |
_o_stem = _o_cl[:-2]
|
| 2041 |
_c_stem = _c_cl[:-1]
|
| 2042 |
if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
|
| 2043 |
_is_grammar_pattern = True
|
| 2044 |
+
# Masc Plural Addition: +ون
|
| 2045 |
+
elif (_c_cl.endswith('ون') and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
|
|
|
|
| 2046 |
_is_grammar_pattern = True
|
| 2047 |
+
# Dual Addition: +ان or +ين
|
| 2048 |
+
elif ((_c_cl.endswith('ان') or _c_cl.endswith('ين')) and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
|
| 2049 |
+
_is_grammar_pattern = True
|
| 2050 |
+
# Feminine Dual Addition: +تان / +تين
|
| 2051 |
+
elif (_c_cl.endswith('تان') or _c_cl.endswith('تين')):
|
| 2052 |
+
if _o_cl.endswith('ة') and _c_cl[:-3] == _o_cl[:-1] and len(_o_cl) >= 3:
|
| 2053 |
+
_is_grammar_pattern = True
|
| 2054 |
+
elif _c_cl[:-3] == _o_cl and len(_o_cl) >= 3:
|
| 2055 |
+
_is_grammar_pattern = True
|
| 2056 |
+
# Feminine Plural Addition: +ات
|
| 2057 |
+
elif (_c_cl.endswith('ات') and len(_c_cl) >= 4):
|
| 2058 |
+
if _o_cl.endswith('ة') and _c_cl[:-2] == _o_cl[:-1]:
|
| 2059 |
+
_is_grammar_pattern = True
|
| 2060 |
+
elif _c_cl[:-2] == _o_cl:
|
| 2061 |
+
_is_grammar_pattern = True
|
| 2062 |
+
# Gender: +ة (جميل→جميلة)
|
| 2063 |
+
elif (_c_cl.endswith('ة') and _c_cl[:-1] == _o_cl and len(_o_cl) >= 3):
|
| 2064 |
_is_grammar_pattern = True
|
| 2065 |
# Gender with ي: ذكي→ذكية
|
| 2066 |
+
elif (_c_cl.endswith('ية') and _c_cl[:-1] == _o_cl[:-1] + 'ي' and _o_cl.endswith('ي') and len(_o_cl) >= 3):
|
|
|
|
| 2067 |
_is_grammar_pattern = True
|
| 2068 |
|
| 2069 |
|