Spaces:

bayan10
/

bayan-api

Running

App Files Files Community

youssefreda9 commited on 15 days ago

Commit

01b11d4

1 Parent(s): 61cee96

revert: restore backend/model files to cf07939 state, keep UI/UX changes only

Browse files

Files changed (4) hide show

src/app.py +0 -254
src/nlp/punctuation/punctuation_rules.py +0 -14
src/nlp/spelling/araspell_rules.py +0 -15
tests/test_bug_fixes.py +0 -181

src/app.py CHANGED Viewed

@@ -6,9 +6,6 @@ Provides API endpoints for the Bayan web application.
 import os
 import logging
 import time
-import hashlib
-from collections import OrderedDict
-from functools import wraps
 from flask import Flask, request, jsonify, Response
 from flask_cors import CORS
 from pathlib import Path
@@ -79,119 +76,6 @@ MAX_TEXT_LENGTH = 5000  # Maximum characters for input text
 MAX_SUMMARY_LENGTH = 512  # Maximum tokens for summary
 MIN_TEXT_LENGTH = 10  # Minimum characters for summarization
-# ── Response Cache (P3) ──
-# LRU cache for /api/analyze: hash(text) → (response_dict, timestamp)
-_ANALYZE_CACHE_MAX = 500
-_ANALYZE_CACHE_TTL = 300  # 5 minutes
-_analyze_cache = OrderedDict()
-# ── Rate Limiter (P3) ──
-_RATE_LIMIT_MAX = 30  # requests per window
-_RATE_LIMIT_WINDOW = 60  # seconds
-_rate_limit_store = {}  # ip → [(timestamp, ...)]
-# ── Ta Marbuta Dictionary (P2) ──
-# Common words where ه at the end should be ة
-_TA_MARBUTA_DICT = {
-    'المدرسه': 'المدرسة', 'الجامعه': 'الجامعة', 'المكتبه': 'المكتبة',
-    'الحياه': 'الحياة', 'الصلاه': 'الصلاة', 'الزكاه': 'الزكاة',
-    'القراءه': 'القراءة', 'الكتابه': 'الكتابة', 'المعرفه': 'المعرفة',
-    'الثقافه': 'الثقافة', 'السياسه': 'السياسة', 'الاقتصاديه': 'الاقتصادية',
-    'العربيه': 'العربية', 'الاسلاميه': 'الإسلامية', 'التربيه': 'التربية',
-    'الشريعه': 'الشريعة', 'الدوله': 'الدولة', 'الحكومه': 'الحكومة',
-    'المدينه': 'المدينة', 'القريه': 'القرية', 'الغرفه': 'الغرفة',
-    'السياره': 'السيارة', 'الطاوله': 'الطاولة', 'الرساله': 'الرسالة',
-    'المقاله': 'المقالة', 'الصحيفه': 'الصحيفة', 'الجريده': 'الجريدة',
-    'القصه': 'القصة', 'الروايه': 'الرواية', 'اللغه': 'اللغة',
-    'الفكره': 'الفكرة', 'الخطوه': 'الخطوة', 'المرحله': 'المرحلة',
-    'النتيجه': 'النتيجة', 'المشكله': 'المشكلة', 'الطريقه': 'الطريقة',
-    'الحاله': 'الحالة', 'الصوره': 'الصورة', 'القوه': 'القوة',
-    'الوحده': 'الوحدة', 'العلاقه': 'العلاقة', 'التجربه': 'التجربة',
-    'الحركه': 'الحركة', 'السلطه': 'السلطة', 'المنطقه': 'المنطقة',
-    'الساعه': 'الساعة', 'اللحظه': 'اللحظة', 'الفتره': 'الفترة',
-    'الاداره': 'الإدارة', 'البيئه': 'البيئة', 'الماده': 'المادة',
-    'الاسره': 'الأسرة', 'العائله': 'العائلة', 'الشركه': 'الشركة',
-    'المؤسسه': 'المؤسسة', 'المنظمه': 'المنظمة', 'الجمعيه': 'الجمعية',
-    'الوزاره': 'الوزارة', 'السفاره': 'السفارة', 'القياده': 'القيادة',
-    'الزياره': 'الزيارة', 'المحاوله': 'المحاولة', 'الدراسه': 'الدراسة',
-    'الممارسه': 'الممارسة', 'المتابعه': 'المتابعة', 'الخدمه': 'الخدمة',
-    'التقنيه': 'التقنية', 'الهندسه': 'الهندسة', 'الفلسفه': 'الفلسفة',
-    'مدرسه': 'مدرسة', 'جامعه': 'جامعة', 'مكتبه': 'مكتبة',
-    'حياه': 'حياة', 'صلاه': 'صلاة', 'زكاه': 'زكاة',
-    'لغه': 'لغة', 'قصه': 'قصة', 'فكره': 'فكرة',
-    'خطوه': 'خطوة', 'صوره': 'صورة', 'قوه': 'قوة',
-    'سياره': 'سيارة', 'رساله': 'رسالة', 'ساعه': 'ساعة',
-    'غرفه': 'غرفة', 'شركه': 'شركة', 'دوله': 'دولة',
-}
-def _fix_ta_marbuta(text):
-    """Fix common ه→ة errors at pipeline level using dictionary lookup."""
-    words = text.split()
-    fixed_words = []
-    changes = []
-    pos = 0
-    for word in words:
-        start = text.find(word, pos)
-        end = start + len(word)
-        # Check bare word
-        if word in _TA_MARBUTA_DICT:
-            fixed_words.append(_TA_MARBUTA_DICT[word])
-            changes.append({'start': start, 'end': end, 'original': word, 'correction': _TA_MARBUTA_DICT[word]})
-        # Check word ending in ه that should be ة (pattern match)
-        elif word.endswith('ه') and len(word) >= 3:
-            candidate = word[:-1] + 'ة'
-            if candidate in _TA_MARBUTA_DICT.values():
-                fixed_words.append(candidate)
-                changes.append({'start': start, 'end': end, 'original': word, 'correction': candidate})
-            else:
-                fixed_words.append(word)
-        else:
-            fixed_words.append(word)
-        pos = end
-    return ' '.join(fixed_words), changes
-def _check_rate_limit(ip):
-    """Check if IP has exceeded rate limit. Returns True if allowed."""
-    now = time.time()
-    if ip not in _rate_limit_store:
-        _rate_limit_store[ip] = []
-    # Clean old entries
-    _rate_limit_store[ip] = [t for t in _rate_limit_store[ip] if now - t < _RATE_LIMIT_WINDOW]
-    if len(_rate_limit_store[ip]) >= _RATE_LIMIT_MAX:
-        return False
-    _rate_limit_store[ip].append(now)
-    return True
-def _get_cache_key(text):
-    """Generate cache key from text."""
-    return hashlib.md5(text.encode('utf-8')).hexdigest()
-def _get_cached_response(text):
-    """Get cached response if exists and not expired."""
-    key = _get_cache_key(text)
-    if key in _analyze_cache:
-        data, ts = _analyze_cache[key]
-        if time.time() - ts < _ANALYZE_CACHE_TTL:
-            _analyze_cache.move_to_end(key)
-            return data
-        else:
-            del _analyze_cache[key]
-    return None
-def _set_cached_response(text, response_data):
-    """Store response in cache."""
-    key = _get_cache_key(text)
-    _analyze_cache[key] = (response_data, time.time())
-    # Evict oldest if over limit
-    while len(_analyze_cache) > _ANALYZE_CACHE_MAX:
-        _analyze_cache.popitem(last=False)
 # Global model instances
 summarization_model = None
 spelling_model = None
@@ -1033,12 +917,6 @@ def _is_small_spelling_change(orig_word, corr_word, vocab_manager=None):
         ('ء', 'أ'), ('أ', 'ء'),  # standalone hamza ↔ hamza on alef
         ('ء', 'ؤ'), ('ؤ', 'ء'),  # standalone hamza ↔ hamza on waw
         ('ء', 'ئ'), ('ئ', 'ء'),  # standalone hamza ↔ hamza on ya
-        # Common Arabic letter confusions (sound-alike pairs)
-        ('ص', 'س'), ('س', 'ص'),  # emphatic/plain sibilant (المدرصة→المدرسة)
-        ('ض', 'ظ'), ('ظ', 'ض'),  # emphatic pair confusion
-        ('ذ', 'ز'), ('ز', 'ذ'),  # voiced fricatives
-        ('ث', 'س'), ('س', 'ث'),  # voiceless fricatives
-        ('ط', 'ت'), ('ت', 'ط'),  # emphatic/plain stop
     }
     # Check every character pair — reject if ANY non-orthographic change
     if len(orig_word) != len(corr_word):
@@ -1191,14 +1069,6 @@ def _is_orthographic_variant(word1: str, word2: str) -> bool:
 @app.route('/api/analyze', methods=['POST'])
 def analyze_text():
-    # ── Rate Limiting (P3) ──
-    client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
-    if not _check_rate_limit(client_ip):
-        return jsonify({
-            'error': 'Rate limit exceeded. Please wait before making more requests.',
-            'status': 'error'
-        }), 429
     """
     Perform sequential analysis (Spelling -> Grammar -> Punctuation)
     and return word-level suggestions with offsets.
@@ -1220,12 +1090,6 @@ def analyze_text():
         if not text:
             return jsonify({'error': 'Text is required', 'status': 'error'}), 400
-        # ── Cache Check (P3) ──
-        cached = _get_cached_response(text)
-        if cached:
-            logger.info(f"[ANALYZE] Cache hit for text (len={len(text)})")
-            return jsonify(cached)
         # Reject inputs that are predominantly non-Arabic (code, markup, etc.)
         arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
         alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
@@ -1494,22 +1358,6 @@ def analyze_text():
         except Exception as e:
             logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
-        # ── Ta Marbuta fix pass (P2) ──
-        # Catches common ه→ة errors like المدرسه→المدرسة at pipeline level.
-        try:
-            ta_fixed, ta_changes = _fix_ta_marbuta(current_text)
-            if ta_fixed != current_text:
-                for tc in ta_changes:
-                    ctx.add_patch(
-                        'spelling', tc['start'], tc['end'],
-                        tc['correction'], confidence=0.95,
-                    )
-                    logger.info(f"[TA-MARBUTA] '{tc['original']}' → '{tc['correction']}'")
-                ctx.mutate_text(ta_fixed, OffsetMapper)
-                current_text = ctx.current_text
-        except Exception as e:
-            logger.error(f"[ANALYZE] Ta Marbuta fix failed: {type(e).__name__}: {e}")
         # 2. Grammar (runs on spelling-corrected text — word-level dependency)
         try:
             t0 = time.time()
@@ -1618,22 +1466,6 @@ def analyze_text():
                     stage_label = 'grammar'
                     if _is_spelling_only_change(orig_text, corr_text):
                         stage_label = 'spelling'
-                    # ── Directional blocks for grammar (mirrors spelling filter) ──
-                    # Prevents grammar from making meaning-changing corrections
-                    # like كان→كأن ("was" → "as if").
-                    _GRAMMAR_BLOCKS = {
-                        'كان': {'كأن'}, 'كأن': {'كان'},
-                        'هذه': {'هذة'}, 'هذا': {'هذة', 'هذه'},
-                        'إلى': {'ع��ى', 'علي'}, 'على': {'إلى', 'علي'},
-                        'لكن': {'لاكن'}, 'ذلك': {'ذالك'},
-                    }
-                    if corr_text in _GRAMMAR_BLOCKS.get(orig_text, set()):
-                        logger.info(
-                            f"[GRAMMAR] Blocked directional: '{orig_text}'→'{corr_text}'"
-                        )
-                        continue
                     ctx.add_patch(
                         stage_label, d['start'], d['end'],
                         corr_text, confidence=1.0
@@ -1670,13 +1502,6 @@ def analyze_text():
             from nlp.punctuation.punctuation_service import get_punctuation_model
             punc_checker = get_punctuation_model()
             corrected_punc = punc_checker.correct(ctx.current_text)
-            # ── Post-process: strip duplicate trailing punctuation ──
-            # Model sometimes turns "..." into "...." or "." into ".."
-            import re as _punc_re
-            # Collapse non-dot duplicate punctuation: ,, → , ;; → ; etc.
-            corrected_punc = _punc_re.sub(r'([،؛:!?؟])\1+', r'\1', corrected_punc)
-            # Collapse 4+ dots into ellipsis (3 dots), preserve intentional ...
-            corrected_punc = _punc_re.sub(r'\.{4,}', '...', corrected_punc)
             timing_ms['punctuation_ms'] = int((time.time() - t0) * 1000)
             logger.info(f"[ANALYZE] Step 3: Punctuation done in {timing_ms['punctuation_ms']}ms")
             if corrected_punc != ctx.current_text:
@@ -1703,20 +1528,6 @@ def analyze_text():
                             f"'{d.get('original','')}' \u2192 '{d.get('correction','')}' "
                             f"(locked by {owner}[{ls}:{le}])"
                         )
-                    # ── Mid-word split guard ──
-                    # Reject punctuation diffs where the original is NOT a complete
-                    # word — i.e., the character after the diff end is still Arabic.
-                    # This catches cases like الدفتر being split into الدفت.ر
-                    d_end = d['end']
-                    if d_end < len(ctx.current_text):
-                        next_ch = ctx.current_text[d_end]
-                        if '\u0600' <= next_ch <= '\u06FF':
-                            logger.info(
-                                f"[PUNC-SAFETY] Rejected mid-word split [{d['start']}:{d_end}] "
-                                f"'{d.get('original','')}' → '{d.get('correction','')}' "
-                                f"(next char '{next_ch}' is Arabic — word was split)"
-                            )
-                            continue
                     # Punctuation safety layer: reject non-punctuation changes
                     if not validate_punctuation_diff(d):
                         logger.info(
@@ -1724,21 +1535,6 @@ def analyze_text():
                             f"'{d.get('original','')}' → '{d.get('correction','')}' — not a safe punctuation change"
                         )
                         continue
-                    # ── Duplicate punctuation guard ──
-                    # Reject corrections that just append punctuation to already-punctuated text
-                    # e.g. "الحديقة." → "الحديقة.." or "..." → "...."
-                    import re as _re2
-                    orig_txt = d.get('original', '')
-                    corr_txt = d.get('correction', '')
-                    _PUNC_CHARS = set('.,،؛:!?؟…。')
-                    if orig_txt and corr_txt and len(corr_txt) > len(orig_txt):
-                        suffix_added = corr_txt[len(orig_txt):]
-                        if all(c in _PUNC_CHARS for c in suffix_added) and orig_txt[-1] in _PUNC_CHARS:
-                            logger.info(
-                                f"[PUNC-DUP] Rejected duplicate punctuation [{d['start']}:{d['end']}] "
-                                f"'{orig_txt}' → '{corr_txt}' — already has punctuation"
-                            )
-                            continue
                     ctx.add_patch(
                         'punctuation', d['start'], d['end'],
                         d['correction'], confidence=0.8
@@ -1809,10 +1605,6 @@ def analyze_text():
         if stage_errors:
             response_data['warnings'] = stage_errors
-        # ── Cache Store (P3) ──
-        if response_status == 'success':
-            _set_cached_response(text, response_data)
         return jsonify(response_data)
     except Exception as e:
@@ -1825,52 +1617,6 @@ def analyze_text():
         }), 500
-@app.route('/api/feedback', methods=['POST'])
-def submit_feedback():
-    """Accept user feedback on correction suggestions."""
-    try:
-        if not request.is_json:
-            return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400
-        data = request.get_json()
-        suggestion_id = data.get('suggestion_id', '')
-        helpful = data.get('helpful', None)
-        text = data.get('text', '')[:200]  # Truncate for safety
-        original = data.get('original', '')[:100]
-        correction = data.get('correction', '')[:100]
-        if helpful is None:
-            return jsonify({'error': 'helpful field is required', 'status': 'error'}), 400
-        # Log feedback (simple file-based for now)
-        feedback_entry = {
-            'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
-            'suggestion_id': suggestion_id,
-            'helpful': helpful,
-            'original': original,
-            'correction': correction,
-            'text_snippet': text,
-            'ip': request.headers.get('X-Forwarded-For', request.remote_addr),
-        }
-        logger.info(f"[FEEDBACK] {feedback_entry}")
-        # Append to feedback log file
-        try:
-            feedback_dir = Path(__file__).parent.parent / 'logs'
-            feedback_dir.mkdir(exist_ok=True)
-            with open(feedback_dir / 'feedback.jsonl', 'a', encoding='utf-8') as f:
-                import json
-                f.write(json.dumps(feedback_entry, ensure_ascii=False) + '\n')
-        except Exception as log_err:
-            logger.warning(f"[FEEDBACK] Could not write to file: {log_err}")
-        return jsonify({'status': 'success', 'message': 'شكراً لملاحظاتك!'})
-    except Exception as e:
-        logger.error(f"[FEEDBACK] Error: {e}")
-        return jsonify({'error': 'Failed to submit feedback', 'status': 'error'}), 500
 @app.errorhandler(404)
 def not_found(error):
     """Handle 404 errors."""

 import os
 import logging
 import time
 from flask import Flask, request, jsonify, Response
 from flask_cors import CORS
 from pathlib import Path
 MAX_SUMMARY_LENGTH = 512  # Maximum tokens for summary
 MIN_TEXT_LENGTH = 10  # Minimum characters for summarization
 # Global model instances
 summarization_model = None
 spelling_model = None
         ('ء', 'أ'), ('أ', 'ء'),  # standalone hamza ↔ hamza on alef
         ('ء', 'ؤ'), ('ؤ', 'ء'),  # standalone hamza ↔ hamza on waw
         ('ء', 'ئ'), ('ئ', 'ء'),  # standalone hamza ↔ hamza on ya
     }
     # Check every character pair — reject if ANY non-orthographic change
     if len(orig_word) != len(corr_word):
 @app.route('/api/analyze', methods=['POST'])
 def analyze_text():
     """
     Perform sequential analysis (Spelling -> Grammar -> Punctuation)
     and return word-level suggestions with offsets.
         if not text:
             return jsonify({'error': 'Text is required', 'status': 'error'}), 400
         # Reject inputs that are predominantly non-Arabic (code, markup, etc.)
         arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
         alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
         except Exception as e:
             logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
         # 2. Grammar (runs on spelling-corrected text — word-level dependency)
         try:
             t0 = time.time()
                     stage_label = 'grammar'
                     if _is_spelling_only_change(orig_text, corr_text):
                         stage_label = 'spelling'
                     ctx.add_patch(
                         stage_label, d['start'], d['end'],
                         corr_text, confidence=1.0
             from nlp.punctuation.punctuation_service import get_punctuation_model
             punc_checker = get_punctuation_model()
             corrected_punc = punc_checker.correct(ctx.current_text)
             timing_ms['punctuation_ms'] = int((time.time() - t0) * 1000)
             logger.info(f"[ANALYZE] Step 3: Punctuation done in {timing_ms['punctuation_ms']}ms")
             if corrected_punc != ctx.current_text:
                             f"'{d.get('original','')}' \u2192 '{d.get('correction','')}' "
                             f"(locked by {owner}[{ls}:{le}])"
                         )
                     # Punctuation safety layer: reject non-punctuation changes
                     if not validate_punctuation_diff(d):
                         logger.info(
                             f"'{d.get('original','')}' → '{d.get('correction','')}' — not a safe punctuation change"
                         )
                         continue
                     ctx.add_patch(
                         'punctuation', d['start'], d['end'],
                         d['correction'], confidence=0.8
         if stage_errors:
             response_data['warnings'] = stage_errors
         return jsonify(response_data)
     except Exception as e:
         }), 500
 @app.errorhandler(404)
 def not_found(error):
     """Handle 404 errors."""

src/nlp/punctuation/punctuation_rules.py CHANGED Viewed

@@ -137,19 +137,5 @@ def validate_punctuation_diff(diff: dict) -> bool:
     if punct_delta > MAX_PUNCT_DELTA:
         return False
-    # ── Rule 6: Reject mid-word punctuation insertion ──
-    # If the correction ends with a punctuation mark followed by nothing,
-    # but the original word is a PREFIX of a longer word in context,
-    # this indicates mid-word split (e.g. الدفت→الدفت. when word was الدفتر).
-    # Detect by checking if correction has punctuation NOT at word boundary.
-    for pc in ARABIC_PUNCT_CHARS:
-        if pc in correction:
-            # Check if punctuation is followed by an Arabic letter (mid-word)
-            idx = correction.find(pc)
-            if idx >= 0 and idx < len(correction) - 1:
-                next_char = correction[idx + 1]
-                if '\u0600' <= next_char <= '\u06FF':
-                    return False
     return True

     if punct_delta > MAX_PUNCT_DELTA:
         return False
     return True

src/nlp/spelling/araspell_rules.py CHANGED Viewed

@@ -184,16 +184,6 @@ class AraSpellPostProcessor:
         Also handles prefixed words: و/ف/ب/ك/ل + whitelist word.
         e.g. واصدقائي → وأصدقائي, بالاسعار → بالأسعار
         """
-        # Words that must NOT be decomposed by prefix stripping
-        # كان (was) ≠ ك+أن, بان (appeared) ≠ ب+أن, لان (softened) ≠ ل+أن, فان (van) ≠ ف+أن
-        HAMZA_PREFIX_BLACKLIST = {
-            'كان', 'كانت', 'كانوا', 'كانا',
-            'بان', 'بانت', 'بانوا',
-            'لان', 'لانت',
-            'فان', 'فانت',
-            'وان', 'وانت',
-            'كانه', 'كانها', 'كانهم',
-        }
         words = text.split()
         result = []
         for word in words:
@@ -202,11 +192,6 @@ class AraSpellPostProcessor:
                 result.append(AraSpellPostProcessor.HAMZA_WHITELIST[word])
                 continue
-            # Skip words in the blacklist — they are valid as-is
-            if word in HAMZA_PREFIX_BLACKLIST:
-                result.append(word)
-                continue
             # Try stripping common prefixes and looking up the remainder
             fixed = False
             for prefix in AraSpellPostProcessor.HAMZA_PREFIXES:

         Also handles prefixed words: و/ف/ب/ك/ل + whitelist word.
         e.g. واصدقائي → وأصدقائي, بالاسعار → بالأسعار
         """
         words = text.split()
         result = []
         for word in words:
                 result.append(AraSpellPostProcessor.HAMZA_WHITELIST[word])
                 continue
             # Try stripping common prefixes and looking up the remainder
             fixed = False
             for prefix in AraSpellPostProcessor.HAMZA_PREFIXES:

tests/test_bug_fixes.py CHANGED Viewed

@@ -657,186 +657,5 @@ class TestSuffixCorruption(unittest.TestCase):
                          "Verb+pronoun كتبته→كتبتة must be blocked")
-# ═══════════════════════════════════════════════════════════════
-# P2: Ta Marbuta Fix Tests
-# ═══════════════════════════════════════════════════════════════
-class TestTaMarbutaFix(unittest.TestCase):
-    """Tests for the _fix_ta_marbuta pipeline function."""
-    @classmethod
-    def setUpClass(cls):
-        from app import _fix_ta_marbuta, _TA_MARBUTA_DICT
-        cls.fix = staticmethod(_fix_ta_marbuta)
-        cls.dict = _TA_MARBUTA_DICT
-    def test_basic_fix(self):
-        """المدرسه should be corrected to المدرسة."""
-        result, changes = self.fix('ذهبت الى المدرسه')
-        self.assertIn('المدرسة', result)
-        self.assertEqual(len(changes), 1)
-        self.assertEqual(changes[0]['original'], 'المدرسه')
-        self.assertEqual(changes[0]['correction'], 'المدرسة')
-    def test_multiple_fixes(self):
-        """Multiple ta marbuta errors in one sentence."""
-        result, changes = self.fix('الحياه في المدينه جميله')
-        self.assertIn('الحياة', result)
-        self.assertIn('المدينة', result)
-        self.assertGreaterEqual(len(changes), 2)
-    def test_no_false_positives(self):
-        """Words ending in ه that are NOT ta marbuta should be left alone."""
-        result, changes = self.fix('الله أكبر')
-        self.assertEqual(result, 'الله أكبر')
-        self.assertEqual(len(changes), 0)
-    def test_correct_text_untouched(self):
-        """Already correct text should not be changed."""
-        result, changes = self.fix('ذهبت إلى المدرسة')
-        self.assertEqual(result, 'ذهبت إلى المدرسة')
-        self.assertEqual(len(changes), 0)
-    def test_without_alef_lam(self):
-        """Bare words without ال should also be fixed."""
-        result, changes = self.fix('هذه مدرسه كبيره')
-        self.assertIn('مدرسة', result)
-    def test_dict_coverage(self):
-        """Dictionary should have significant coverage."""
-        self.assertGreater(len(self.dict), 50)
-# ═══════════════════════════════════════════════════════════════
-# P2: Hamza Whitelist Tests
-# ═══════════════════════════════════════════════════════════════
-class TestHamzaWhitelist(unittest.TestCase):
-    """Tests for hamza fix function."""
-    @classmethod
-    def setUpClass(cls):
-        try:
-            from nlp.spelling.araspell_rules import AraSpellPostProcessor
-            cls.fix = staticmethod(AraSpellPostProcessor.fix_common_hamza)
-            cls.available = True
-        except Exception:
-            cls.available = False
-    def test_anta_fix(self):
-        """انت should become أنت."""
-        if not self.available:
-            self.skipTest("AraSpellPostProcessor not available")
-        result = self.fix('انت طالب')
-        self.assertIn('أنت', result)
-    def test_ana_fix(self):
-        """انا should become أنا."""
-        if not self.available:
-            self.skipTest("AraSpellPostProcessor not available")
-        result = self.fix('انا ذاهب')
-        self.assertIn('أنا', result)
-    def test_alaan_fix(self):
-        """الان should become الآن."""
-        if not self.available:
-            self.skipTest("AraSpellPostProcessor not available")
-        result = self.fix('اذهب الان')
-        self.assertIn('الآن', result)
-    def test_correct_hamza_untouched(self):
-        """Already correct hamza should not be changed."""
-        if not self.available:
-            self.skipTest("AraSpellPostProcessor not available")
-        result = self.fix('أنت ذاهب إلى المدرسة')
-        self.assertEqual(result, 'أنت ذاهب إلى المدرسة')
-# ═══════════════════════════════════════════════════════════════
-# P3: Caching & Rate Limiting Tests
-# ═══════════════════════════════════════════════════════════════
-class TestCachingAndRateLimiting(unittest.TestCase):
-    """Tests for response caching and rate limiting."""
-    @classmethod
-    def setUpClass(cls):
-        from app import (
-            _get_cache_key, _get_cached_response,
-            _set_cached_response, _check_rate_limit,
-            _analyze_cache, _rate_limit_store
-        )
-        cls._get_cache_key = staticmethod(_get_cache_key)
-        cls._get_cached = staticmethod(_get_cached_response)
-        cls._set_cached = staticmethod(_set_cached_response)
-        cls._check_rate = staticmethod(_check_rate_limit)
-        cls._cache = _analyze_cache
-        cls._rate_store = _rate_limit_store
-    def setUp(self):
-        self._cache.clear()
-        self._rate_store.clear()
-    def test_cache_key_deterministic(self):
-        """Same text should produce same cache key."""
-        key1 = self._get_cache_key('مرحبا')
-        key2 = self._get_cache_key('مرحبا')
-        self.assertEqual(key1, key2)
-    def test_cache_key_different(self):
-        """Different texts should produce different keys."""
-        key1 = self._get_cache_key('مرحبا')
-        key2 = self._get_cache_key('أهلا')
-        self.assertNotEqual(key1, key2)
-    def test_cache_store_and_retrieve(self):
-        """Cached response should be retrievable."""
-        data = {'original': 'test', 'corrected': 'test', 'suggestions': []}
-        self._set_cached('مرحبا', data)
-        result = self._get_cached('مرحبا')
-        self.assertIsNotNone(result)
-        self.assertEqual(result['original'], 'test')
-    def test_cache_miss(self):
-        """Non-cached text should return None."""
-        result = self._get_cached('نص جديد')
-        self.assertIsNone(result)
-    def test_rate_limit_allows(self):
-        """First request should be allowed."""
-        self.assertTrue(self._check_rate('127.0.0.1'))
-    def test_rate_limit_blocks(self):
-        """Should block after exceeding limit."""
-        for _ in range(30):
-            self._check_rate('test_ip')
-        self.assertFalse(self._check_rate('test_ip'))
-# ═══════════════════════════════════════════════════════════════
-# P2: Grammar Splitting Tests
-# ═══════════════════════════════════════════════════════════════
-class TestGrammarSplitting(unittest.TestCase):
-    """Tests for grammar multi-word diff splitting logic."""
-    def test_split_logic(self):
-        """Multi-word grammar diffs should be split into individual words."""
-        # Simulate the splitting logic from analyze_text
-        orig_text = 'الي المدرسه الاستاذ'
-        corr_text = 'إلى المدرسة الأستاذ'
-        orig_words = orig_text.split()
-        corr_words = corr_text.split()
-        self.assertEqual(len(orig_words), len(corr_words))
-        diffs = []
-        for ow, cw in zip(orig_words, corr_words):
-            if ow != cw:
-                diffs.append({'original': ow, 'correction': cw})
-        self.assertEqual(len(diffs), 3)
-        self.assertEqual(diffs[0]['original'], 'الي')
-        self.assertEqual(diffs[0]['correction'], 'إلى')
-        self.assertEqual(diffs[1]['original'], 'المدرسه')
-        self.assertEqual(diffs[1]['correction'], 'المدرسة')
 if __name__ == '__main__':
     unittest.main()

                          "Verb+pronoun كتبته→كتبتة must be blocked")
 if __name__ == '__main__':
     unittest.main()