Commit ·
2fe1569
1
Parent(s): 9cae8f8
HOTFIX: Remove broad preposition/nasb regex rules — caused Hallucination 0%, +38 FP
Browse filesThe regex patterns (ون→ين after prepositions, ون→وا after nasb particles)
were matching correct text everywhere. CamelTools-based rules handle these
with POS-tag awareness. Keep only narrow five-nouns patterns.
src/nlp/grammar/grammar_rules.py
CHANGED
|
@@ -351,13 +351,10 @@ class ArabicGrammarGuard:
|
|
| 351 |
text = re.sub(r'\b([وف]?[بل])(أبوك|أباك|أخوك|أخاك|ذو|ذا)\b',
|
| 352 |
lambda m: f"{m.group(1)}{m.group(2).replace('و', 'ي').replace('ا', 'ي')}", text)
|
| 353 |
|
| 354 |
-
#
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
# ── NEW: Nasb/Jazm ون→وا (لن يذهبون→يذهبوا) ──
|
| 360 |
-
text = re.sub(r'\b(لن|لم|كي|لكي|حتى|أن)\s+([أ-ي]+)(ون)\b', r'\1 \2وا', text)
|
| 361 |
|
| 362 |
return text
|
| 363 |
|
|
|
|
| 351 |
text = re.sub(r'\b([وف]?[بل])(أبوك|أباك|أخوك|أخاك|ذو|ذا)\b',
|
| 352 |
lambda m: f"{m.group(1)}{m.group(2).replace('و', 'ي').replace('ا', 'ي')}", text)
|
| 353 |
|
| 354 |
+
# NOTE: Broad preposition case (ون→ين) and nasb (ون→وا) regex rules
|
| 355 |
+
# were REMOVED because they caused massive overcorrection on correct text.
|
| 356 |
+
# These patterns are handled by CamelTools-based rules (fix_prepositions_advanced,
|
| 357 |
+
# fix_verbs_nasb_and_jazm) which have POS-tag awareness.
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
return text
|
| 360 |
|
tests/phase10/reports/phase10_results.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|