Bayan AI commited on
Commit
100db1e
·
1 Parent(s): 19a801d

Fix: Add missing dual and plural IVtoOOV bypass rules in app.py

Browse files
Files changed (1) hide show
  1. src/app.py +37 -34
src/app.py CHANGED
@@ -2009,58 +2009,61 @@ def analyze_text():
2009
  # Evaluate grammar patterns early to bypass heuristic blocks.
2010
  _is_grammar_pattern = False
2011
  if orig_text and corr_text:
2012
- # Case: ون/ان → ين (sound masculine plural case)
2013
- if (orig_text.endswith('ون') and corr_text.endswith('ين') and
2014
- orig_text[:-2] == corr_text[:-2]):
 
 
 
 
2015
  _is_grammar_pattern = True
2016
  # Nasb/Jazm: ون → وا (verb mood)
2017
- elif (orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…').endswith('ون') and corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…').endswith('وا') and len(orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')) >= 3):
2018
- _o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
2019
- _c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
2020
  _o_stem = _o_cl[:-2]
2021
  _c_stem = _c_cl[:-2]
2022
  if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
2023
  _is_grammar_pattern = True
2024
- # Five nouns: وك → اك/يك (أبوك→أباك, أخوك→أخيك)
2025
- elif (len(orig_text) >= 3 and len(corr_text) >= 3 and
2026
- orig_text[-2:] in ('وك', 'وه') and
2027
- corr_text[-2:] in ('اك', 'يك', 'اه', 'يه')):
2028
- _is_grammar_pattern = True
2029
- # Dual: ان → ين (dual oblique)
2030
- elif (orig_text.endswith('ان') and corr_text.endswith('ين') and
2031
- orig_text[:-2] == corr_text[:-2] and len(orig_text) >= 4):
2032
  _is_grammar_pattern = True
2033
  # Demonstrative: هذان→هاتان, هاتان→هذان
2034
- elif ({orig_text, corr_text} <= {'هذان', 'هاتان'}):
2035
  _is_grammar_pattern = True
2036
- # ── NEW: SV agreement suffix additions ──
2037
- # Past tense masc plural: verb→verb+وا (ذهب→ذهبوا, حضر→حضروا)
2038
- elif (corr_text.endswith('وا') and corr_text[:-2] == orig_text
2039
- and len(orig_text) >= 3):
2040
  _is_grammar_pattern = True
2041
- # Past tense fem plural: verb→verb+ن (ذهب→ذهبن, حضر→حضرن)
2042
- elif (corr_text.endswith('ن') and corr_text[:-1] == orig_text
2043
- and len(orig_text) >= 3):
2044
  _is_grammar_pattern = True
2045
- # Present tense fem plural: ون → ن (يلعبون → يلعبن or يلعبون → تلعبن)
2046
- elif (orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…').endswith('ون') and corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…').endswith('ن') and len(orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')) >= 3):
2047
- _o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
2048
- _c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
2049
  _o_stem = _o_cl[:-2]
2050
  _c_stem = _c_cl[:-1]
2051
  if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
2052
  _is_grammar_pattern = True
2053
- # Present tense masc plural: يفعل→يفعلون (adding ون)
2054
- elif (corr_text.endswith('ون') and corr_text[:-2] == orig_text
2055
- and len(orig_text) >= 3):
2056
  _is_grammar_pattern = True
2057
- # Gender: adjective→adjective+ة (جميل→جميلة, كبير→كبيرة)
2058
- elif (corr_text.endswith('ة') and corr_text[:-1] == orig_text
2059
- and len(orig_text) >= 3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2060
  _is_grammar_pattern = True
2061
  # Gender with ي: ذكي→ذكية
2062
- elif (corr_text.endswith('ية') and corr_text[:-1] == orig_text[:-1] + 'ي'
2063
- and orig_text.endswith('ي') and len(orig_text) >= 3):
2064
  _is_grammar_pattern = True
2065
 
2066
 
 
2009
  # Evaluate grammar patterns early to bypass heuristic blocks.
2010
  _is_grammar_pattern = False
2011
  if orig_text and corr_text:
2012
+ _o_cl = orig_text.rstrip('.,،؛;:!؟?()[]{}«»"\'…')
2013
+ _c_cl = corr_text.rstrip('.,،؛;:!؟?()[]{}«»"\'')
2014
+
2015
+ # Case: ون/ان → ين (sound masculine plural / dual case change)
2016
+ if (_o_cl.endswith('ون') and _c_cl.endswith('ين') and _o_cl[:-2] == _c_cl[:-2]):
2017
+ _is_grammar_pattern = True
2018
+ elif (_o_cl.endswith('ان') and _c_cl.endswith('ين') and _o_cl[:-2] == _c_cl[:-2] and len(_o_cl) >= 4):
2019
  _is_grammar_pattern = True
2020
  # Nasb/Jazm: ون → وا (verb mood)
2021
+ elif (_o_cl.endswith('ون') and _c_cl.endswith('وا') and len(_o_cl) >= 3):
 
 
2022
  _o_stem = _o_cl[:-2]
2023
  _c_stem = _c_cl[:-2]
2024
  if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
2025
  _is_grammar_pattern = True
2026
+ # Five nouns: وك → اك/يك
2027
+ elif (len(_o_cl) >= 3 and len(_c_cl) >= 3 and _o_cl[-2:] in ('وك', 'وه') and _c_cl[-2:] in ('اك', 'يك', 'اه', 'يه')):
 
 
 
 
 
 
2028
  _is_grammar_pattern = True
2029
  # Demonstrative: هذان→هاتان, هاتان→هذان
2030
+ elif ({_o_cl, _c_cl} <= {'هذان', 'هاتان'}):
2031
  _is_grammar_pattern = True
2032
+ # Past tense masc plural: verb→verb+وا
2033
+ elif (_c_cl.endswith('وا') and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
 
 
2034
  _is_grammar_pattern = True
2035
+ # Past tense fem plural: verb→verb+ن
2036
+ elif (_c_cl.endswith('ن') and _c_cl[:-1] == _o_cl and len(_o_cl) >= 3):
 
2037
  _is_grammar_pattern = True
2038
+ # Present tense fem plural: ون → ن
2039
+ elif (_o_cl.endswith('ون') and _c_cl.endswith('ن') and len(_o_cl) >= 3):
 
 
2040
  _o_stem = _o_cl[:-2]
2041
  _c_stem = _c_cl[:-1]
2042
  if _o_stem == _c_stem or (len(_o_stem) > 1 and _o_stem[1:] == _c_stem[1:] and _o_stem[0] in 'يت' and _c_stem[0] in 'يت'):
2043
  _is_grammar_pattern = True
2044
+ # Masc Plural Addition: +ون
2045
+ elif (_c_cl.endswith('ون') and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
 
2046
  _is_grammar_pattern = True
2047
+ # Dual Addition: +ان or +ين
2048
+ elif ((_c_cl.endswith('ان') or _c_cl.endswith('ين')) and _c_cl[:-2] == _o_cl and len(_o_cl) >= 3):
2049
+ _is_grammar_pattern = True
2050
+ # Feminine Dual Addition: +تان / +تين
2051
+ elif (_c_cl.endswith('تان') or _c_cl.endswith('تين')):
2052
+ if _o_cl.endswith('ة') and _c_cl[:-3] == _o_cl[:-1] and len(_o_cl) >= 3:
2053
+ _is_grammar_pattern = True
2054
+ elif _c_cl[:-3] == _o_cl and len(_o_cl) >= 3:
2055
+ _is_grammar_pattern = True
2056
+ # Feminine Plural Addition: +ات
2057
+ elif (_c_cl.endswith('ات') and len(_c_cl) >= 4):
2058
+ if _o_cl.endswith('ة') and _c_cl[:-2] == _o_cl[:-1]:
2059
+ _is_grammar_pattern = True
2060
+ elif _c_cl[:-2] == _o_cl:
2061
+ _is_grammar_pattern = True
2062
+ # Gender: +ة (جميل→جميلة)
2063
+ elif (_c_cl.endswith('ة') and _c_cl[:-1] == _o_cl and len(_o_cl) >= 3):
2064
  _is_grammar_pattern = True
2065
  # Gender with ي: ذكي→ذكية
2066
+ elif (_c_cl.endswith('ية') and _c_cl[:-1] == _o_cl[:-1] + 'ي' and _o_cl.endswith('ي') and len(_o_cl) >= 3):
 
2067
  _is_grammar_pattern = True
2068
 
2069