youssefreda9 commited on
Commit
0678259
·
1 Parent(s): a28ff8b

feat: P1-P3 all 8 items - ta marbuta fix, UI feedback, caching, rate limiting, unit tests

Browse files
hybrid_module.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # hybrid_module.py
3
+
4
+ import torch
5
+ import pickle
6
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
7
+ from huggingface_hub import hf_hub_download
8
+
9
+ # ---------- Load Bigram ----------
10
+
11
+ def load_bigram(repo_id="bayan10/AutoComplete", filename="bigram_model_v4.pkl"):
12
+ path = hf_hub_download(repo_id=repo_id, filename=filename)
13
+ with open(path, "rb") as f:
14
+ data = pickle.load(f)
15
+ return data["unigrams"], data["bigrams"]
16
+
17
+ # ---------- Load GPT-2 ----------
18
+ def load_gpt2(model_name="aubmindlab/aragpt2-base"):
19
+ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
20
+ model = GPT2LMHeadModel.from_pretrained(model_name)
21
+ tokenizer.pad_token = tokenizer.eos_token
22
+ model.config.pad_token_id = tokenizer.eos_token_id
23
+ model.eval()
24
+ return tokenizer, model
25
+
26
+ # ---------- GPT-2 scoring ----------
27
+ def gpt2_next_token_probs(prefix, tokenizer, model, top_k=50):
28
+ inputs = tokenizer(
29
+ prefix,
30
+ return_tensors="pt",
31
+ truncation=True,
32
+ max_length=1024
33
+ )
34
+
35
+ with torch.no_grad():
36
+ outputs = model(**inputs)
37
+ logits = outputs.logits[0, -1]
38
+
39
+ probs = torch.softmax(logits, dim=-1)
40
+ top_probs, top_ids = torch.topk(probs, top_k)
41
+
42
+ prob_dict = {}
43
+ for idx, prob in zip(top_ids, top_probs):
44
+ word = tokenizer.decode([idx]).strip()
45
+ if word:
46
+ prob_dict[word] = prob.item()
47
+
48
+ return prob_dict
49
+
50
+ # ---------- Statistical autocomplete ----------
51
+
52
+
53
+ def statistical_autocomplete(text, unigrams, bigrams, top_k=20):
54
+ tokens = text.strip().split()
55
+ if not tokens:
56
+ return []
57
+
58
+ last_word = tokens[-1]
59
+ candidates = []
60
+
61
+ if last_word in bigrams:
62
+ for w, c in bigrams[last_word].items():
63
+ if len(w) < 3 or w == last_word:
64
+ continue
65
+ candidates.append((w, c))
66
+
67
+ if not candidates:
68
+ for w, c in unigrams.items():
69
+ if len(w) < 3:
70
+ continue
71
+ candidates.append((w, c))
72
+
73
+ total = sum(c for _, c in candidates)
74
+ preds = [(w, c / total) for w, c in candidates]
75
+ preds.sort(key=lambda x: x[1], reverse=True)
76
+ preds = merge_similar_predictions(preds, top_k=top_k)
77
+ return preds[:top_k]
78
+
79
+ # ---------- Hybrid autocomplete ----------
80
+ def hybrid_autocomplete(prefix, unigrams, bigrams, tokenizer, model, alpha=0.6, k=5):
81
+ words = prefix.strip().split()
82
+ if len(words) < 1:
83
+ return []
84
+
85
+ last_word = words[-1]
86
+ if last_word not in bigrams:
87
+ return []
88
+
89
+ # -------- Statistical (Bigram) --------
90
+ stat_candidates = statistical_autocomplete(
91
+ prefix,
92
+ unigrams,
93
+ bigrams,
94
+ top_k=20
95
+ )
96
+
97
+ # -------- Neural (GPT-2) — ONCE --------
98
+ gpt2_probs = gpt2_next_token_probs(prefix, tokenizer, model, top_k=50)
99
+
100
+ # -------- Hybrid scoring --------
101
+ results = []
102
+ for w, stat_p in stat_candidates:
103
+ neural_p = gpt2_probs.get(w, 1e-8) # small value if not found
104
+ score = alpha * stat_p + (1 - alpha) * neural_p
105
+ results.append((w, score))
106
+
107
+ return sorted(results, key=lambda x: x[1], reverse=True)[:k]
108
+
109
+ import re
110
+ from collections import defaultdict
111
+
112
+ def canonical_form(word):
113
+ word = re.sub("[إأآا]", "ا", word)
114
+ word = re.sub("ى", "ي", word)
115
+ word = re.sub("ؤ", "و", word)
116
+ word = re.sub("ئ", "ي", word)
117
+ word = re.sub("ة", "ه", word)
118
+ word = re.sub(r"[ًٌٍَُِّْ]", "", word)
119
+ return word
120
+
121
+
122
+
123
+ def merge_similar_predictions(preds, top_k=20):
124
+ groups = defaultdict(lambda: {"score": 0.0, "words": []})
125
+
126
+ for w, p in preds:
127
+ key = canonical_form(w)
128
+ groups[key]["score"] += p
129
+ groups[key]["words"].append(w)
130
+
131
+ merged = sorted(
132
+ groups.values(),
133
+ key=lambda x: x["score"],
134
+ reverse=True
135
+ )
136
+
137
+ return [
138
+ (group["words"][0], group["score"])
139
+ for group in merged[:top_k]
140
+ ]
141
+
142
+
143
+
144
+
145
+
146
+
147
+
src/app.py CHANGED
@@ -6,6 +6,9 @@ Provides API endpoints for the Bayan web application.
6
  import os
7
  import logging
8
  import time
 
 
 
9
  from flask import Flask, request, jsonify, Response
10
  from flask_cors import CORS
11
  from pathlib import Path
@@ -76,6 +79,119 @@ MAX_TEXT_LENGTH = 5000 # Maximum characters for input text
76
  MAX_SUMMARY_LENGTH = 512 # Maximum tokens for summary
77
  MIN_TEXT_LENGTH = 10 # Minimum characters for summarization
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Global model instances
80
  summarization_model = None
81
  spelling_model = None
@@ -1069,6 +1185,14 @@ def _is_orthographic_variant(word1: str, word2: str) -> bool:
1069
 
1070
  @app.route('/api/analyze', methods=['POST'])
1071
  def analyze_text():
 
 
 
 
 
 
 
 
1072
  """
1073
  Perform sequential analysis (Spelling -> Grammar -> Punctuation)
1074
  and return word-level suggestions with offsets.
@@ -1090,6 +1214,12 @@ def analyze_text():
1090
  if not text:
1091
  return jsonify({'error': 'Text is required', 'status': 'error'}), 400
1092
 
 
 
 
 
 
 
1093
  # Reject inputs that are predominantly non-Arabic (code, markup, etc.)
1094
  arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
1095
  alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
@@ -1358,6 +1488,22 @@ def analyze_text():
1358
  except Exception as e:
1359
  logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
1360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1361
  # 2. Grammar (runs on spelling-corrected text — word-level dependency)
1362
  try:
1363
  t0 = time.time()
@@ -1605,6 +1751,10 @@ def analyze_text():
1605
  if stage_errors:
1606
  response_data['warnings'] = stage_errors
1607
 
 
 
 
 
1608
  return jsonify(response_data)
1609
 
1610
  except Exception as e:
@@ -1617,6 +1767,52 @@ def analyze_text():
1617
  }), 500
1618
 
1619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1620
  @app.errorhandler(404)
1621
  def not_found(error):
1622
  """Handle 404 errors."""
 
6
  import os
7
  import logging
8
  import time
9
+ import hashlib
10
+ from collections import OrderedDict
11
+ from functools import wraps
12
  from flask import Flask, request, jsonify, Response
13
  from flask_cors import CORS
14
  from pathlib import Path
 
79
  MAX_SUMMARY_LENGTH = 512 # Maximum tokens for summary
80
  MIN_TEXT_LENGTH = 10 # Minimum characters for summarization
81
 
82
+ # ── Response Cache (P3) ──
83
+ # LRU cache for /api/analyze: hash(text) → (response_dict, timestamp)
84
+ _ANALYZE_CACHE_MAX = 500
85
+ _ANALYZE_CACHE_TTL = 300 # 5 minutes
86
+ _analyze_cache = OrderedDict()
87
+
88
+ # ── Rate Limiter (P3) ──
89
+ _RATE_LIMIT_MAX = 30 # requests per window
90
+ _RATE_LIMIT_WINDOW = 60 # seconds
91
+ _rate_limit_store = {} # ip → [(timestamp, ...)]
92
+
93
+ # ── Ta Marbuta Dictionary (P2) ──
94
+ # Common words where ه at the end should be ة
95
+ _TA_MARBUTA_DICT = {
96
+ 'المدرسه': 'المدرسة', 'الجامعه': 'الجامعة', 'المكتبه': 'المكتبة',
97
+ 'الحياه': 'الحياة', 'الصلاه': 'الصلاة', 'الزكاه': 'الزكاة',
98
+ 'القراءه': 'القراءة', 'الكتابه': 'الكتابة', 'المعرفه': 'المعرفة',
99
+ 'الثقافه': 'الثقافة', 'السياسه': 'السياسة', 'الاقتصاديه': 'الاقتصادية',
100
+ 'العربيه': 'العربية', 'الاسلاميه': 'الإسلامية', 'التربيه': 'التربية',
101
+ 'الشريعه': 'الشريعة', 'الدوله': 'الدولة', 'الحكومه': 'الحكومة',
102
+ 'المدينه': 'المدينة', 'القريه': 'القرية', 'الغرفه': 'الغرفة',
103
+ 'السياره': 'السيارة', 'الطاوله': 'الطاولة', 'الرساله': 'الرسالة',
104
+ 'المقاله': 'المقالة', 'الصحيفه': 'الصحيفة', 'الجريده': 'الجريدة',
105
+ 'القصه': 'القصة', 'الروايه': 'الرواية', 'اللغه': 'اللغة',
106
+ 'الفكره': 'الفكرة', 'الخطوه': 'الخطوة', 'المرحله': 'المرحلة',
107
+ 'النتيجه': 'النتيجة', 'المشكله': 'المشكلة', 'الطريقه': 'الطريقة',
108
+ 'الحاله': 'الحالة', 'الصوره': 'الصورة', 'القوه': 'القوة',
109
+ 'الوحده': 'الوحدة', 'العلاقه': 'العلاقة', 'التجربه': 'التجربة',
110
+ 'الحركه': 'الحركة', 'السلطه': 'السلطة', 'المنطقه': 'المنطقة',
111
+ 'الساعه': 'الساعة', 'اللحظه': 'اللحظة', 'الفتره': 'الفترة',
112
+ 'الاداره': 'الإدارة', 'البيئه': 'البيئة', 'الماده': 'المادة',
113
+ 'الاسره': 'الأسرة', 'العائله': 'العائلة', 'الشركه': 'الشركة',
114
+ 'المؤسسه': 'المؤسسة', 'المنظمه': 'المنظمة', 'الجمعيه': 'الجمعية',
115
+ 'الوزاره': 'الوزارة', 'السفاره': 'السفارة', 'القياده': 'القيادة',
116
+ 'الزياره': 'الزيارة', 'المحاوله': 'المحاولة', 'الدراسه': 'الدراسة',
117
+ 'الممارسه': 'الممارسة', 'المتابعه': 'المتابعة', 'الخدمه': 'الخدمة',
118
+ 'التقنيه': 'التقنية', 'الهندسه': 'الهندسة', 'الفلسفه': 'الفلسفة',
119
+ 'مدرسه': 'مدرسة', 'جامعه': 'جامعة', 'مكتبه': 'مكتبة',
120
+ 'حياه': 'حياة', 'صلاه': 'صلاة', 'زكاه': 'زكاة',
121
+ 'لغه': 'لغة', 'قصه': 'قصة', 'فكره': 'فكرة',
122
+ 'خطوه': 'خطوة', 'صوره': 'صورة', 'قوه': 'قوة',
123
+ 'سياره': 'سيارة', 'رساله': 'رسالة', 'ساعه': 'ساعة',
124
+ 'غرفه': 'غرفة', 'شركه': 'شركة', 'دوله': 'دولة',
125
+ }
126
+
127
+
128
+ def _fix_ta_marbuta(text):
129
+ """Fix common ه→ة errors at pipeline level using dictionary lookup."""
130
+ words = text.split()
131
+ fixed_words = []
132
+ changes = []
133
+ pos = 0
134
+ for word in words:
135
+ start = text.find(word, pos)
136
+ end = start + len(word)
137
+ # Check bare word
138
+ if word in _TA_MARBUTA_DICT:
139
+ fixed_words.append(_TA_MARBUTA_DICT[word])
140
+ changes.append({'start': start, 'end': end, 'original': word, 'correction': _TA_MARBUTA_DICT[word]})
141
+ # Check word ending in ه that should be ة (pattern match)
142
+ elif word.endswith('ه') and len(word) >= 3:
143
+ candidate = word[:-1] + 'ة'
144
+ if candidate in _TA_MARBUTA_DICT.values():
145
+ fixed_words.append(candidate)
146
+ changes.append({'start': start, 'end': end, 'original': word, 'correction': candidate})
147
+ else:
148
+ fixed_words.append(word)
149
+ else:
150
+ fixed_words.append(word)
151
+ pos = end
152
+ return ' '.join(fixed_words), changes
153
+
154
+
155
+ def _check_rate_limit(ip):
156
+ """Check if IP has exceeded rate limit. Returns True if allowed."""
157
+ now = time.time()
158
+ if ip not in _rate_limit_store:
159
+ _rate_limit_store[ip] = []
160
+ # Clean old entries
161
+ _rate_limit_store[ip] = [t for t in _rate_limit_store[ip] if now - t < _RATE_LIMIT_WINDOW]
162
+ if len(_rate_limit_store[ip]) >= _RATE_LIMIT_MAX:
163
+ return False
164
+ _rate_limit_store[ip].append(now)
165
+ return True
166
+
167
+
168
+ def _get_cache_key(text):
169
+ """Generate cache key from text."""
170
+ return hashlib.md5(text.encode('utf-8')).hexdigest()
171
+
172
+
173
+ def _get_cached_response(text):
174
+ """Get cached response if exists and not expired."""
175
+ key = _get_cache_key(text)
176
+ if key in _analyze_cache:
177
+ data, ts = _analyze_cache[key]
178
+ if time.time() - ts < _ANALYZE_CACHE_TTL:
179
+ _analyze_cache.move_to_end(key)
180
+ return data
181
+ else:
182
+ del _analyze_cache[key]
183
+ return None
184
+
185
+
186
+ def _set_cached_response(text, response_data):
187
+ """Store response in cache."""
188
+ key = _get_cache_key(text)
189
+ _analyze_cache[key] = (response_data, time.time())
190
+ # Evict oldest if over limit
191
+ while len(_analyze_cache) > _ANALYZE_CACHE_MAX:
192
+ _analyze_cache.popitem(last=False)
193
+
194
+
195
  # Global model instances
196
  summarization_model = None
197
  spelling_model = None
 
1185
 
1186
  @app.route('/api/analyze', methods=['POST'])
1187
  def analyze_text():
1188
+ # ── Rate Limiting (P3) ──
1189
+ client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
1190
+ if not _check_rate_limit(client_ip):
1191
+ return jsonify({
1192
+ 'error': 'Rate limit exceeded. Please wait before making more requests.',
1193
+ 'status': 'error'
1194
+ }), 429
1195
+
1196
  """
1197
  Perform sequential analysis (Spelling -> Grammar -> Punctuation)
1198
  and return word-level suggestions with offsets.
 
1214
  if not text:
1215
  return jsonify({'error': 'Text is required', 'status': 'error'}), 400
1216
 
1217
+ # ── Cache Check (P3) ──
1218
+ cached = _get_cached_response(text)
1219
+ if cached:
1220
+ logger.info(f"[ANALYZE] Cache hit for text (len={len(text)})")
1221
+ return jsonify(cached)
1222
+
1223
  # Reject inputs that are predominantly non-Arabic (code, markup, etc.)
1224
  arabic_chars = len(re.findall(r'[\u0600-\u06FF]', text))
1225
  alpha_chars = len(re.findall(r'[a-zA-Z\u0600-\u06FF]', text))
 
1488
  except Exception as e:
1489
  logger.error(f"[ANALYZE] Hamza fix failed: {type(e).__name__}: {e}")
1490
 
1491
+ # ── Ta Marbuta fix pass (P2) ──
1492
+ # Catches common ه→ة errors like المدرسه→المدرسة at pipeline level.
1493
+ try:
1494
+ ta_fixed, ta_changes = _fix_ta_marbuta(current_text)
1495
+ if ta_fixed != current_text:
1496
+ for tc in ta_changes:
1497
+ ctx.add_patch(
1498
+ 'spelling', tc['start'], tc['end'],
1499
+ tc['correction'], confidence=0.95,
1500
+ )
1501
+ logger.info(f"[TA-MARBUTA] '{tc['original']}' → '{tc['correction']}'")
1502
+ ctx.mutate_text(ta_fixed, OffsetMapper)
1503
+ current_text = ctx.current_text
1504
+ except Exception as e:
1505
+ logger.error(f"[ANALYZE] Ta Marbuta fix failed: {type(e).__name__}: {e}")
1506
+
1507
  # 2. Grammar (runs on spelling-corrected text — word-level dependency)
1508
  try:
1509
  t0 = time.time()
 
1751
  if stage_errors:
1752
  response_data['warnings'] = stage_errors
1753
 
1754
+ # ── Cache Store (P3) ──
1755
+ if response_status == 'success':
1756
+ _set_cached_response(text, response_data)
1757
+
1758
  return jsonify(response_data)
1759
 
1760
  except Exception as e:
 
1767
  }), 500
1768
 
1769
 
1770
+ @app.route('/api/feedback', methods=['POST'])
1771
+ def submit_feedback():
1772
+ """Accept user feedback on correction suggestions."""
1773
+ try:
1774
+ if not request.is_json:
1775
+ return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400
1776
+
1777
+ data = request.get_json()
1778
+ suggestion_id = data.get('suggestion_id', '')
1779
+ helpful = data.get('helpful', None)
1780
+ text = data.get('text', '')[:200] # Truncate for safety
1781
+ original = data.get('original', '')[:100]
1782
+ correction = data.get('correction', '')[:100]
1783
+
1784
+ if helpful is None:
1785
+ return jsonify({'error': 'helpful field is required', 'status': 'error'}), 400
1786
+
1787
+ # Log feedback (simple file-based for now)
1788
+ feedback_entry = {
1789
+ 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
1790
+ 'suggestion_id': suggestion_id,
1791
+ 'helpful': helpful,
1792
+ 'original': original,
1793
+ 'correction': correction,
1794
+ 'text_snippet': text,
1795
+ 'ip': request.headers.get('X-Forwarded-For', request.remote_addr),
1796
+ }
1797
+ logger.info(f"[FEEDBACK] {feedback_entry}")
1798
+
1799
+ # Append to feedback log file
1800
+ try:
1801
+ feedback_dir = Path(__file__).parent.parent / 'logs'
1802
+ feedback_dir.mkdir(exist_ok=True)
1803
+ with open(feedback_dir / 'feedback.jsonl', 'a', encoding='utf-8') as f:
1804
+ import json
1805
+ f.write(json.dumps(feedback_entry, ensure_ascii=False) + '\n')
1806
+ except Exception as log_err:
1807
+ logger.warning(f"[FEEDBACK] Could not write to file: {log_err}")
1808
+
1809
+ return jsonify({'status': 'success', 'message': 'شكراً لملاحظاتك!'})
1810
+
1811
+ except Exception as e:
1812
+ logger.error(f"[FEEDBACK] Error: {e}")
1813
+ return jsonify({'error': 'Failed to submit feedback', 'status': 'error'}), 500
1814
+
1815
+
1816
  @app.errorhandler(404)
1817
  def not_found(error):
1818
  """Handle 404 errors."""
src/css/components.css CHANGED
@@ -796,6 +796,87 @@
796
  margin-top: var(--spacing-sm);
797
  }
798
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
799
  /* ── Alt chips in suggestion cards ── */
800
  .suggestion-card-alts {
801
  display: flex;
 
796
  margin-top: var(--spacing-sm);
797
  }
798
 
799
+ /* ── Type Icon ── */
800
+ .popover-type-icon {
801
+ font-size: 16px;
802
+ line-height: 1;
803
+ }
804
+
805
+ /* ── Original Label ── */
806
+ #tooltip-original {
807
+ text-decoration: none;
808
+ color: var(--color-text-secondary);
809
+ margin-bottom: var(--spacing-sm);
810
+ font-size: var(--font-size-sm);
811
+ }
812
+
813
+ .popover-original-label {
814
+ font-weight: var(--font-weight-bold);
815
+ color: var(--color-text-muted);
816
+ font-size: var(--font-size-label);
817
+ }
818
+
819
+ .popover-original-word {
820
+ text-decoration: line-through;
821
+ color: var(--color-error);
822
+ font-weight: var(--font-weight-semibold);
823
+ }
824
+
825
+ /* ── Feedback Buttons ── */
826
+ .popover-feedback {
827
+ display: flex;
828
+ align-items: center;
829
+ gap: 8px;
830
+ margin-top: 8px;
831
+ padding-top: 8px;
832
+ border-top: 1px solid var(--color-border);
833
+ justify-content: center;
834
+ }
835
+
836
+ .popover-feedback-label {
837
+ font-size: var(--font-size-label);
838
+ color: var(--color-text-muted);
839
+ }
840
+
841
+ .popover-feedback-btn {
842
+ width: 32px;
843
+ height: 32px;
844
+ border: 1px solid var(--color-border);
845
+ border-radius: 50%;
846
+ background: var(--color-surface);
847
+ cursor: pointer;
848
+ font-size: 14px;
849
+ display: flex;
850
+ align-items: center;
851
+ justify-content: center;
852
+ transition: all 0.2s ease;
853
+ }
854
+
855
+ .popover-feedback-yes:hover {
856
+ background: rgba(34, 197, 94, 0.15);
857
+ border-color: #22c55e;
858
+ transform: scale(1.1);
859
+ }
860
+
861
+ .popover-feedback-no:hover {
862
+ background: rgba(239, 68, 68, 0.15);
863
+ border-color: #ef4444;
864
+ transform: scale(1.1);
865
+ }
866
+
867
+ .popover-feedback-thanks {
868
+ font-size: var(--font-size-label);
869
+ color: #22c55e;
870
+ font-weight: var(--font-weight-bold);
871
+ animation: fadeIn 0.3s ease;
872
+ }
873
+
874
+ @keyframes fadeIn {
875
+ from { opacity: 0; transform: translateY(4px); }
876
+ to { opacity: 1; transform: translateY(0); }
877
+ }
878
+
879
+
880
  /* ── Alt chips in suggestion cards ── */
881
  .suggestion-card-alts {
882
  display: flex;
src/js/editor.js CHANGED
@@ -363,18 +363,19 @@ function showTooltip(element) {
363
  const alternativesEl = document.getElementById('tooltip-alternatives');
364
 
365
  const typeMap = {
366
- spelling: 'خطأ إملائي',
367
- grammar: 'خطأ نحوي',
368
- punctuation: 'علامات ترقيم'
369
  };
370
 
371
  if (typeEl) {
372
- typeEl.textContent = typeMap[suggestion.type] || suggestion.type;
 
373
  typeEl.className = `popover-type popover-type--${suggestion.type}`;
374
  }
375
 
376
  if (originalEl) {
377
- originalEl.textContent = suggestion.original;
378
  }
379
 
380
  // Render alternatives
@@ -392,25 +393,38 @@ function showTooltip(element) {
392
  if (isKeep) return; // render keep button last
393
  const isMain = i === 0;
394
  const btnClass = isMain ? 'popover-alt-btn popover-alt-main' : 'popover-alt-btn';
395
- html += `<button class="${btnClass}" data-alt-correction="${escapeHtml(alt)}" type="button">${escapeHtml(alt)}</button>`;
396
  });
397
  // Render keep button at end
398
  html += `<button class="popover-alt-btn popover-alt-keep" data-alt-correction="${escapeHtml(suggestion.original)}" type="button">إبقاء كما هي</button>`;
 
 
 
 
 
 
399
  alternativesEl.innerHTML = html;
400
 
401
- // Bind click events
402
  alternativesEl.querySelectorAll('.popover-alt-btn').forEach(btn => {
403
  btn.addEventListener('click', () => {
404
  const correctionText = btn.dataset.altCorrection;
405
  if (correctionText === suggestion.original) {
406
- // "Keep as-is" — just dismiss the suggestion
407
  dismissSuggestion(suggestion);
408
  } else {
409
- // Apply this alternative correction
410
  applyAlternativeCorrection(suggestion, correctionText);
411
  }
412
  });
413
  });
 
 
 
 
 
 
 
 
 
414
  }
415
 
416
  const rect = element.getBoundingClientRect();
@@ -520,6 +534,9 @@ function applySuggestionAtOffsets(suggestion) {
520
  } finally {
521
  _isApplyingSuggestion = false;
522
  }
 
 
 
523
  }
524
 
525
  function applyCorrection() {
@@ -603,6 +620,8 @@ function applyAlternativeCorrection(suggestion, correctionText) {
603
  } finally {
604
  _isApplyingSuggestion = false;
605
  }
 
 
606
  }
607
 
608
  function dismissSuggestion(suggestion) {
@@ -682,6 +701,8 @@ function applyAllSuggestions() {
682
  } finally {
683
  _isApplyingSuggestion = false;
684
  }
 
 
685
  }
686
 
687
  function clearEditor() {
@@ -745,6 +766,22 @@ function copyText() {
745
  });
746
  }
747
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
748
  if (typeof module !== 'undefined' && module.exports) {
749
  module.exports = {
750
  initEditor,
 
363
  const alternativesEl = document.getElementById('tooltip-alternatives');
364
 
365
  const typeMap = {
366
+ spelling: { label: 'خطأ إملائي', icon: '🔤' },
367
+ grammar: { label: 'خطأ نحوي', icon: '📝' },
368
+ punctuation: { label: 'علامات ترقيم', icon: '✏️' }
369
  };
370
 
371
  if (typeEl) {
372
+ const typeInfo = typeMap[suggestion.type] || { label: suggestion.type, icon: '📋' };
373
+ typeEl.innerHTML = `<span class="popover-type-icon">${typeInfo.icon}</span> ${typeInfo.label}`;
374
  typeEl.className = `popover-type popover-type--${suggestion.type}`;
375
  }
376
 
377
  if (originalEl) {
378
+ originalEl.innerHTML = `<span class="popover-original-label">الأصل:</span> <span class="popover-original-word">${escapeHtml(suggestion.original)}</span>`;
379
  }
380
 
381
  // Render alternatives
 
393
  if (isKeep) return; // render keep button last
394
  const isMain = i === 0;
395
  const btnClass = isMain ? 'popover-alt-btn popover-alt-main' : 'popover-alt-btn';
396
+ html += `<button class="${btnClass}" data-alt-correction="${escapeHtml(alt)}" type="button">${isMain ? '✓ ' : ''}${escapeHtml(alt)}</button>`;
397
  });
398
  // Render keep button at end
399
  html += `<button class="popover-alt-btn popover-alt-keep" data-alt-correction="${escapeHtml(suggestion.original)}" type="button">إبقاء كما هي</button>`;
400
+ // Render feedback buttons
401
+ html += `<div class="popover-feedback">
402
+ <span class="popover-feedback-label">هل الاقتراح مفيد؟</span>
403
+ <button class="popover-feedback-btn popover-feedback-yes" data-feedback="yes" type="button" title="مفيد">👍</button>
404
+ <button class="popover-feedback-btn popover-feedback-no" data-feedback="no" type="button" title="غير مفيد">👎</button>
405
+ </div>`;
406
  alternativesEl.innerHTML = html;
407
 
408
+ // Bind click events for alternatives
409
  alternativesEl.querySelectorAll('.popover-alt-btn').forEach(btn => {
410
  btn.addEventListener('click', () => {
411
  const correctionText = btn.dataset.altCorrection;
412
  if (correctionText === suggestion.original) {
 
413
  dismissSuggestion(suggestion);
414
  } else {
 
415
  applyAlternativeCorrection(suggestion, correctionText);
416
  }
417
  });
418
  });
419
+
420
+ // Bind feedback buttons
421
+ alternativesEl.querySelectorAll('.popover-feedback-btn').forEach(btn => {
422
+ btn.addEventListener('click', () => {
423
+ const helpful = btn.dataset.feedback === 'yes';
424
+ _sendFeedback(suggestion, helpful);
425
+ btn.closest('.popover-feedback').innerHTML = '<span class="popover-feedback-thanks">شكراً لملاحظاتك! ✓</span>';
426
+ });
427
+ });
428
  }
429
 
430
  const rect = element.getBoundingClientRect();
 
534
  } finally {
535
  _isApplyingSuggestion = false;
536
  }
537
+ // P2/User Request: Auto re-analyze after applying suggestion
538
+ // Calls analyzeText() DIRECTLY (not delayed) for instant re-analysis.
539
+ setTimeout(() => { analyzeText(); }, 300);
540
  }
541
 
542
  function applyCorrection() {
 
620
  } finally {
621
  _isApplyingSuggestion = false;
622
  }
623
+ // P2/User Request: Auto re-analyze after applying alternative correction
624
+ setTimeout(() => { analyzeText(); }, 300);
625
  }
626
 
627
  function dismissSuggestion(suggestion) {
 
701
  } finally {
702
  _isApplyingSuggestion = false;
703
  }
704
+ // P2/User Request: Auto re-analyze after applying all suggestions
705
+ setTimeout(() => { analyzeText(); }, 300);
706
  }
707
 
708
  function clearEditor() {
 
766
  });
767
  }
768
 
769
+ // ── Feedback API (P2) ──
770
+ function _sendFeedback(suggestion, helpful) {
771
+ const apiBase = window.BAYAN_API_BASE || '';
772
+ fetch(`${apiBase}/api/feedback`, {
773
+ method: 'POST',
774
+ headers: { 'Content-Type': 'application/json' },
775
+ body: JSON.stringify({
776
+ suggestion_id: suggestion.id || '',
777
+ helpful: helpful,
778
+ original: suggestion.original || '',
779
+ correction: suggestion.correction || '',
780
+ text: (document.getElementById('editor-container')?.textContent || '').substring(0, 200),
781
+ })
782
+ }).catch(err => console.warn('[Feedback] Failed:', err));
783
+ }
784
+
785
  if (typeof module !== 'undefined' && module.exports) {
786
  module.exports = {
787
  initEditor,
tests/deep_dive_expanded.json ADDED
@@ -0,0 +1,1323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-06-20T19:17:40.208323+00:00",
3
+ "api_base": "https://bayan10-bayan-api.hf.space",
4
+ "health": {
5
+ "environment": "huggingface_spaces",
6
+ "mode": "hf_spaces_local",
7
+ "models": {
8
+ "autocomplete": true,
9
+ "grammar": true,
10
+ "punctuation": true,
11
+ "spelling": true,
12
+ "summarization": true
13
+ },
14
+ "note": "Free tier: summarization local, other models return input unchanged",
15
+ "status": "healthy",
16
+ "supabase": {
17
+ "configured": true
18
+ }
19
+ },
20
+ "cat1": [
21
+ {
22
+ "id": "C1-01",
23
+ "category": 1,
24
+ "input": "كانت الفتيات يلعبون في الحديقه",
25
+ "a_spelling": "كانت الفتيات يلعبون في الحديقة",
26
+ "a_grammar_on_original": "كانت الفتيات يلعبن في الحديقة",
27
+ "a_grammar_on_spell_corrected": "كانت الفتيات يلعبن في الحديقة",
28
+ "a_punctuation": "كانت الفتيات يلعبون في الحديقه.",
29
+ "grammar_diff_orig_vs_spell": [],
30
+ "b_corrected": "كانت الفتيات يلعبن في الحديقة.",
31
+ "b_suggestions": [
32
+ {
33
+ "alternatives": [],
34
+ "confidence": 1.0,
35
+ "correction": "يلعبن",
36
+ "end": 19,
37
+ "id": "48d68e84-19e8-4af0-b1ea-b7ae4c7427f4",
38
+ "locked": true,
39
+ "original": "يلعبون",
40
+ "priority": 3,
41
+ "start": 13,
42
+ "type": "grammar"
43
+ },
44
+ {
45
+ "alternatives": [],
46
+ "confidence": 0.8,
47
+ "correction": "الحديقة.",
48
+ "end": 30,
49
+ "id": "0012d2dc-08f6-44ad-a9d1-e07230045474",
50
+ "locked": true,
51
+ "original": "الحديقه",
52
+ "priority": 2,
53
+ "start": 23,
54
+ "type": "punctuation"
55
+ }
56
+ ]
57
+ },
58
+ {
59
+ "id": "C1-02",
60
+ "category": 1,
61
+ "input": "ان الطالبات ذهبو الى الجامعه",
62
+ "a_spelling": "ان الط ابت ذهبوا إلى الجامعه",
63
+ "a_grammar_on_original": "إن الطالبات ذهبن إلى الجامعة",
64
+ "a_grammar_on_spell_corrected": "إن الطلاب ذهبوا إلى الجامعة",
65
+ "a_punctuation": "ان الطالبات ذهبو الى الجامعه!",
66
+ "grammar_diff_orig_vs_spell": [
67
+ {
68
+ "word_idx": 1,
69
+ "gram_on_orig": "الطالبات",
70
+ "gram_on_spell": "الطلاب"
71
+ },
72
+ {
73
+ "word_idx": 2,
74
+ "gram_on_orig": "ذهبن",
75
+ "gram_on_spell": "ذهبوا"
76
+ }
77
+ ],
78
+ "b_corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
79
+ "b_suggestions": [
80
+ {
81
+ "alternatives": [],
82
+ "confidence": 1.0,
83
+ "correction": "ذهبن",
84
+ "end": 16,
85
+ "id": "8a21b71d-8a87-4e29-a828-4ce2b343ae2a",
86
+ "locked": true,
87
+ "original": "ذهبو",
88
+ "priority": 3,
89
+ "start": 12,
90
+ "type": "grammar"
91
+ },
92
+ {
93
+ "alternatives": [],
94
+ "confidence": 0.8,
95
+ "correction": "الجامعة.",
96
+ "end": 28,
97
+ "id": "0c9e127e-8e64-4cc4-a928-03651da1dd15",
98
+ "locked": true,
99
+ "original": "الجامعه",
100
+ "priority": 2,
101
+ "start": 21,
102
+ "type": "punctuation"
103
+ },
104
+ {
105
+ "alternatives": [],
106
+ "confidence": 1.0,
107
+ "correction": "إن",
108
+ "end": 2,
109
+ "id": "156cd1ce-37cd-4ade-888c-9e8d12a83b05",
110
+ "locked": true,
111
+ "original": "ان",
112
+ "priority": 1,
113
+ "start": 0,
114
+ "type": "spelling"
115
+ },
116
+ {
117
+ "alternatives": [
118
+ "ذهبوا",
119
+ "ال",
120
+ "الم",
121
+ "الى"
122
+ ],
123
+ "confidence": 0.9,
124
+ "correction": "ذهبوا",
125
+ "end": 20,
126
+ "id": "836d7346-3ea8-4851-bc69-53df3e1ff6b4",
127
+ "locked": true,
128
+ "original": "الى",
129
+ "priority": 1,
130
+ "start": 17,
131
+ "type": "spelling"
132
+ }
133
+ ]
134
+ },
135
+ {
136
+ "id": "C1-03",
137
+ "category": 1,
138
+ "input": "هذة المدينه جميله جدا ومناخها معتدل",
139
+ "a_spelling": "هذة المدينه جميله جدا ومناخها معتدل",
140
+ "a_grammar_on_original": "هذه المدينة جميلة جدا ومناخها معتدل",
141
+ "a_grammar_on_spell_corrected": "هذه المدينة جميلة جدا ومناخها معتدل",
142
+ "a_punctuation": "هذة المدينه جميله جدا ومناخها معتدل.",
143
+ "grammar_diff_orig_vs_spell": [],
144
+ "b_corrected": "هذه المدينة جميلة جدا ومناخها معتدل.",
145
+ "b_suggestions": [
146
+ {
147
+ "alternatives": [],
148
+ "confidence": 0.8,
149
+ "correction": "معتدل.",
150
+ "end": 35,
151
+ "id": "b960d84a-bcf6-40c3-976e-29632ad7f302",
152
+ "locked": true,
153
+ "original": "معتدل",
154
+ "priority": 2,
155
+ "start": 30,
156
+ "type": "punctuation"
157
+ },
158
+ {
159
+ "alternatives": [],
160
+ "confidence": 1.0,
161
+ "correction": "هذه المدينة جميلة",
162
+ "end": 17,
163
+ "id": "a89acaad-669a-4867-9ebd-6bd5cbfad2ea",
164
+ "locked": true,
165
+ "original": "هذة المدينه جميله",
166
+ "priority": 1,
167
+ "start": 0,
168
+ "type": "spelling"
169
+ }
170
+ ]
171
+ },
172
+ {
173
+ "id": "C1-04",
174
+ "category": 1,
175
+ "input": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه",
176
+ "a_spelling": "الطلاب الاجتهادوا في دراستهم وحققوا نتائج ممتازه",
177
+ "a_grammar_on_original": "الطلاب اجتهدو في دراستهم وحققوا نتائج ممتازة",
178
+ "a_grammar_on_spell_corrected": "الطلاب اجتهدوا في دراستهم وحققوا نتائج ممتازة",
179
+ "a_punctuation": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه.",
180
+ "grammar_diff_orig_vs_spell": [
181
+ {
182
+ "word_idx": 1,
183
+ "gram_on_orig": "اجتهدو",
184
+ "gram_on_spell": "اجتهدوا"
185
+ }
186
+ ],
187
+ "b_corrected": "الطلاب اجتهدو في دراستهم وحققوا نتائج ممتازة.",
188
+ "b_suggestions": [
189
+ {
190
+ "alternatives": [],
191
+ "confidence": 0.8,
192
+ "correction": "ممتازة.",
193
+ "end": 43,
194
+ "id": "19a6216b-b7a1-4c0f-acd9-5c698617443b",
195
+ "locked": true,
196
+ "original": "ممتازه",
197
+ "priority": 2,
198
+ "start": 37,
199
+ "type": "punctuation"
200
+ },
201
+ {
202
+ "alternatives": [
203
+ "وحققوا",
204
+ "وحقوق",
205
+ "وحقق",
206
+ "وحققو"
207
+ ],
208
+ "confidence": 0.9,
209
+ "correction": "وحققوا",
210
+ "end": 30,
211
+ "id": "cdcf61bd-8983-44c8-9c3d-8792dc8027c3",
212
+ "locked": true,
213
+ "original": "وحققو",
214
+ "priority": 1,
215
+ "start": 25,
216
+ "type": "spelling"
217
+ }
218
+ ]
219
+ },
220
+ {
221
+ "id": "C1-05",
222
+ "category": 1,
223
+ "input": "ذهب الولد الى المكتبه وقرا كتاب مفيد",
224
+ "a_spelling": "ذهب الولد إلى المكتبة وقرا كتاب مفيد",
225
+ "a_grammar_on_original": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
226
+ "a_grammar_on_spell_corrected": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
227
+ "a_punctuation": "ذهب الولد الى المكتبه وقرا، كتاب مفيد",
228
+ "grammar_diff_orig_vs_spell": [],
229
+ "b_corrected": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
230
+ "b_suggestions": [
231
+ {
232
+ "alternatives": [],
233
+ "confidence": 1.0,
234
+ "correction": "كتابا مفيدا",
235
+ "end": 36,
236
+ "id": "ede929d9-8112-4d65-a460-7a44cee535f5",
237
+ "locked": true,
238
+ "original": "كتاب مفيد",
239
+ "priority": 3,
240
+ "start": 27,
241
+ "type": "grammar"
242
+ },
243
+ {
244
+ "alternatives": [
245
+ "إلى",
246
+ "ال",
247
+ "الم",
248
+ "الى"
249
+ ],
250
+ "confidence": 0.9,
251
+ "correction": "إلى",
252
+ "end": 13,
253
+ "id": "affb4882-0466-4184-93e7-fb3463132a83",
254
+ "locked": true,
255
+ "original": "الى",
256
+ "priority": 1,
257
+ "start": 10,
258
+ "type": "spelling"
259
+ },
260
+ {
261
+ "alternatives": [
262
+ "المكتبة",
263
+ "المكتب",
264
+ "المشتبه",
265
+ "المكتبه"
266
+ ],
267
+ "confidence": 0.9,
268
+ "correction": "المكتبة",
269
+ "end": 21,
270
+ "id": "3f73b099-d428-4c05-98f8-138fb1d83c54",
271
+ "locked": true,
272
+ "original": "المكتبه",
273
+ "priority": 1,
274
+ "start": 14,
275
+ "type": "spelling"
276
+ }
277
+ ]
278
+ }
279
+ ],
280
+ "cat7": [
281
+ {
282
+ "id": "C7-01",
283
+ "category": 7,
284
+ "input": "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب",
285
+ "desc": "3-stage chain: spelling الى→إلى, grammar المدرسه→المدرسة, punc adds marks",
286
+ "a_spelling": "ذهب الولد إلى المدرسه وقابل المعلمه وأخذ الكتاب",
287
+ "a_grammar": "ذهب الولد إلى المدرسة وقابل المعلمة وأخذ الكتاب",
288
+ "a_punc": "ذهب الولد الى المدرسه وقابل، المعلمه واخذ الكتاب،",
289
+ "b_corrected": "ذهب الولد إلى المدرسة وقابل المعلمة وأخ�� الكتاب.",
290
+ "b_suggestions": [
291
+ {
292
+ "alternatives": [],
293
+ "confidence": 0.8,
294
+ "correction": "الكتاب.",
295
+ "end": 47,
296
+ "id": "3e740303-1dcd-42ec-bd6c-7f0af8069e44",
297
+ "locked": true,
298
+ "original": "الكتاب",
299
+ "priority": 2,
300
+ "start": 41,
301
+ "type": "punctuation"
302
+ },
303
+ {
304
+ "alternatives": [],
305
+ "confidence": 1.0,
306
+ "correction": "المدرسة",
307
+ "end": 21,
308
+ "id": "84f953ae-2d0a-4e99-a07d-7d35638ba843",
309
+ "locked": true,
310
+ "original": "المدرسه",
311
+ "priority": 1,
312
+ "start": 14,
313
+ "type": "spelling"
314
+ },
315
+ {
316
+ "alternatives": [],
317
+ "confidence": 1.0,
318
+ "correction": "المعلمة",
319
+ "end": 35,
320
+ "id": "2e793ab2-29fc-454b-8490-ca7cfdfe4404",
321
+ "locked": true,
322
+ "original": "المعلمه",
323
+ "priority": 1,
324
+ "start": 28,
325
+ "type": "spelling"
326
+ },
327
+ {
328
+ "alternatives": [
329
+ "إلى",
330
+ "ال",
331
+ "الم",
332
+ "الى"
333
+ ],
334
+ "confidence": 0.9,
335
+ "correction": "إلى",
336
+ "end": 13,
337
+ "id": "d1b951c7-f94e-4941-986c-8ce5fa51bab0",
338
+ "locked": true,
339
+ "original": "الى",
340
+ "priority": 1,
341
+ "start": 10,
342
+ "type": "spelling"
343
+ },
344
+ {
345
+ "alternatives": [
346
+ "وأخذ",
347
+ "والذ",
348
+ "واخت",
349
+ "واخذ"
350
+ ],
351
+ "confidence": 0.9,
352
+ "correction": "وأخذ",
353
+ "end": 40,
354
+ "id": "c024f6ae-4740-4d87-9a0d-95818699e4f3",
355
+ "locked": true,
356
+ "original": "واخذ",
357
+ "priority": 1,
358
+ "start": 36,
359
+ "type": "spelling"
360
+ }
361
+ ],
362
+ "b_suggestion_count": 5,
363
+ "overlapping_suggestions": []
364
+ },
365
+ {
366
+ "id": "C7-02",
367
+ "category": 7,
368
+ "input": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
369
+ "desc": "Multiple overlapping corrections across all stages",
370
+ "a_spelling": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
371
+ "a_grammar": "كانت البنات يلعبن في الحديقة الجميلة وفجأة سقطت أختهن",
372
+ "a_punc": "كانت البنات يلعبون في الحديقه الجميله وفجأه، سقطت احداهن",
373
+ "b_corrected": "كانت البنات يلعبن في الحديقة الجميلة وفجأة سقطت أختهن.",
374
+ "b_suggestions": [
375
+ {
376
+ "alternatives": [],
377
+ "confidence": 1.0,
378
+ "correction": "يلعبن",
379
+ "end": 18,
380
+ "id": "2d1e033f-60d9-4921-bb65-d13b890f44d4",
381
+ "locked": true,
382
+ "original": "يلعبون",
383
+ "priority": 3,
384
+ "start": 12,
385
+ "type": "grammar"
386
+ },
387
+ {
388
+ "alternatives": [],
389
+ "confidence": 0.8,
390
+ "correction": "أختهن.",
391
+ "end": 55,
392
+ "id": "409bca6e-33d5-4339-8d75-d8dc1e3a8cea",
393
+ "locked": true,
394
+ "original": "احداهن",
395
+ "priority": 2,
396
+ "start": 49,
397
+ "type": "punctuation"
398
+ },
399
+ {
400
+ "alternatives": [],
401
+ "confidence": 1.0,
402
+ "correction": "الحديقة الجميلة وفجأة",
403
+ "end": 43,
404
+ "id": "ac5b5008-0563-4168-99aa-46c8de59d1d1",
405
+ "locked": true,
406
+ "original": "الحديقه الجميله وفجأه",
407
+ "priority": 1,
408
+ "start": 22,
409
+ "type": "spelling"
410
+ }
411
+ ],
412
+ "b_suggestion_count": 3,
413
+ "overlapping_suggestions": []
414
+ },
415
+ {
416
+ "id": "C7-03",
417
+ "category": 7,
418
+ "input": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة",
419
+ "desc": "Long sentence with corrections from all 3 stages",
420
+ "a_spelling": "أن الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثة ولذا ذلك يجب الاهتمام بة",
421
+ "a_grammar": "إن الذكاء الاصطناعي يؤدي دورا مهمّا في تطوير التكنولوجيا الحديثة ولذلك يجب الاهتمام به",
422
+ "a_punc": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك؛ يجب الاهتمام بة",
423
+ "b_corrected": "أن الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثة ولذا ذلك يجب الاهتمام به",
424
+ "b_suggestions": [
425
+ {
426
+ "alternatives": [],
427
+ "confidence": 1.0,
428
+ "correction": "به",
429
+ "end": 86,
430
+ "id": "0a44f7d9-1554-428c-809a-706631ffebdd",
431
+ "locked": true,
432
+ "original": "بة",
433
+ "priority": 1,
434
+ "start": 84,
435
+ "type": "spelling"
436
+ },
437
+ {
438
+ "alternatives": [
439
+ "أن",
440
+ "ال",
441
+ "من",
442
+ "ان"
443
+ ],
444
+ "confidence": 0.9,
445
+ "correction": "أن",
446
+ "end": 2,
447
+ "id": "512b3c30-eadb-40fc-a481-1d4cd0909459",
448
+ "locked": true,
449
+ "original": "ان",
450
+ "priority": 1,
451
+ "start": 0,
452
+ "type": "spelling"
453
+ },
454
+ {
455
+ "alternatives": [
456
+ "الحديثة",
457
+ "الحديث",
458
+ "الحديثي",
459
+ "الحديثه"
460
+ ],
461
+ "confidence": 0.9,
462
+ "correction": "الحديثة",
463
+ "end": 63,
464
+ "id": "9eb1f571-7852-4813-975d-f7cd79102ec8",
465
+ "locked": true,
466
+ "original": "الحديثه",
467
+ "priority": 1,
468
+ "start": 56,
469
+ "type": "spelling"
470
+ },
471
+ {
472
+ "alternatives": [
473
+ "ولذا ذلك",
474
+ "ولذالك"
475
+ ],
476
+ "confidence": 0.85,
477
+ "correction": "ولذا ذلك",
478
+ "end": 70,
479
+ "id": "266db5db-ed2a-49be-accb-76db9e07697f",
480
+ "locked": true,
481
+ "original": "ولذالك",
482
+ "priority": 1,
483
+ "start": 64,
484
+ "type": "spelling"
485
+ }
486
+ ],
487
+ "b_suggestion_count": 4,
488
+ "overlapping_suggestions": []
489
+ },
490
+ {
491
+ "id": "C7-04",
492
+ "category": 7,
493
+ "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
494
+ "desc": "Multiple ه→ة fixes: does grammar lock prevent punc from adding marks near those words?",
495
+ "a_spelling": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكان طيبون جدا",
496
+ "a_grammar": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
497
+ "a_punc": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا.",
498
+ "b_corrected": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام وسكانها طيبون جدا.",
499
+ "b_suggestions": [
500
+ {
501
+ "alternatives": [],
502
+ "confidence": 0.8,
503
+ "correction": "جدا.",
504
+ "end": 64,
505
+ "id": "0dd9d98a-f146-492b-87c7-dba4913bdfd4",
506
+ "locked": true,
507
+ "original": "جدا",
508
+ "priority": 2,
509
+ "start": 61,
510
+ "type": "punctuation"
511
+ },
512
+ {
513
+ "alternatives": [],
514
+ "confidence": 1.0,
515
+ "correction": "هذه المدينة جميلة",
516
+ "end": 17,
517
+ "id": "e12aa2f9-63d7-4f10-a128-13b0b1bbee9a",
518
+ "locked": true,
519
+ "original": "هذة المدينه جميله",
520
+ "priority": 1,
521
+ "start": 0,
522
+ "type": "spelling"
523
+ }
524
+ ],
525
+ "b_suggestion_count": 2,
526
+ "overlapping_suggestions": []
527
+ },
528
+ {
529
+ "id": "C7-05",
530
+ "category": 7,
531
+ "input": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
532
+ "desc": "Heavy corrections needed across stages",
533
+ "a_spelling": "الطلاب اللذين اجتهد في دراستهم حقوق نتائج ممتازه في الأمتحانات الصعبه",
534
+ "a_grammar": "الطلاب الذين اجتهدو في دراستهم حققوا نتائج ممتازة في الامتحانات الصعبة",
535
+ "a_punc": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه.",
536
+ "b_corrected": "الطلاب اللذين اجتهد في دراستهم حققوا نتائج ممتازة في الأمتحانات الصعبة.",
537
+ "b_suggestions": [
538
+ {
539
+ "alternatives": [],
540
+ "confidence": 1.0,
541
+ "correction": "حققوا",
542
+ "end": 36,
543
+ "id": "b2ae6a56-0879-4572-837f-875895de9020",
544
+ "locked": true,
545
+ "original": "حققو",
546
+ "priority": 3,
547
+ "start": 32,
548
+ "type": "grammar"
549
+ },
550
+ {
551
+ "alternatives": [],
552
+ "confidence": 0.8,
553
+ "correction": "الصعبة.",
554
+ "end": 70,
555
+ "id": "b075d0de-0e6d-4d88-a897-8e79e1845116",
556
+ "locked": true,
557
+ "original": "الصعبه",
558
+ "priority": 2,
559
+ "start": 64,
560
+ "type": "punctuation"
561
+ },
562
+ {
563
+ "alternatives": [],
564
+ "confidence": 1.0,
565
+ "correction": "ممتازة",
566
+ "end": 49,
567
+ "id": "ee092cbc-bc6f-41f0-9c98-7cc2edeee671",
568
+ "locked": true,
569
+ "original": "ممتازه",
570
+ "priority": 1,
571
+ "start": 43,
572
+ "type": "spelling"
573
+ },
574
+ {
575
+ "alternatives": [
576
+ "اجتهد",
577
+ "اجتهدو"
578
+ ],
579
+ "confidence": 0.9,
580
+ "correction": "اجتهد",
581
+ "end": 20,
582
+ "id": "97a2307e-5a5a-4668-a4fe-052bbf86c4d9",
583
+ "locked": true,
584
+ "original": "اجتهدو",
585
+ "priority": 1,
586
+ "start": 14,
587
+ "type": "spelling"
588
+ }
589
+ ],
590
+ "b_suggestion_count": 4,
591
+ "overlapping_suggestions": []
592
+ }
593
+ ],
594
+ "cat8x": [
595
+ {
596
+ "id": "C8X-مدرسة-al",
597
+ "category": 8,
598
+ "input": "المدرسة",
599
+ "root": "مدرسة",
600
+ "prefix_combo": "al",
601
+ "track_a_spelling": "المدرسة",
602
+ "changed": false
603
+ },
604
+ {
605
+ "id": "C8X-مدرسة-wal",
606
+ "category": 8,
607
+ "input": "والمدرسة",
608
+ "root": "مدرسة",
609
+ "prefix_combo": "wal",
610
+ "track_a_spelling": "والمدرسة في المدرسة",
611
+ "changed": true
612
+ },
613
+ {
614
+ "id": "C8X-مدرسة-bal",
615
+ "category": 8,
616
+ "input": "بالمدرسة",
617
+ "root": "مدرسة",
618
+ "prefix_combo": "bal",
619
+ "track_a_spelling": "بالمدرسة في المدرسة",
620
+ "changed": true
621
+ },
622
+ {
623
+ "id": "C8X-مدرسة-lal",
624
+ "category": 8,
625
+ "input": "للمدرسة",
626
+ "root": "مدرسة",
627
+ "prefix_combo": "lal",
628
+ "track_a_spelling": "للمدرسة",
629
+ "changed": false
630
+ },
631
+ {
632
+ "id": "C8X-شمس-al",
633
+ "category": 8,
634
+ "input": "الشمس",
635
+ "root": "شمس",
636
+ "prefix_combo": "al",
637
+ "track_a_spelling": "الشمس",
638
+ "changed": false
639
+ },
640
+ {
641
+ "id": "C8X-شمس-wal",
642
+ "category": 8,
643
+ "input": "والشمس",
644
+ "root": "شمس",
645
+ "prefix_combo": "wal",
646
+ "track_a_spelling": "والشمس والشمس",
647
+ "changed": true
648
+ },
649
+ {
650
+ "id": "C8X-شمس-bal",
651
+ "category": 8,
652
+ "input": "بالشمس",
653
+ "root": "شمس",
654
+ "prefix_combo": "bal",
655
+ "track_a_spelling": "الشمس",
656
+ "changed": true
657
+ },
658
+ {
659
+ "id": "C8X-شمس-lal",
660
+ "category": 8,
661
+ "input": "للشمس",
662
+ "root": "شمس",
663
+ "prefix_combo": "lal",
664
+ "track_a_spelling": "للشمس",
665
+ "changed": false
666
+ },
667
+ {
668
+ "id": "C8X-أمة-al",
669
+ "category": 8,
670
+ "input": "الأمة",
671
+ "root": "أمة",
672
+ "prefix_combo": "al",
673
+ "track_a_spelling": "الأمة",
674
+ "changed": false
675
+ },
676
+ {
677
+ "id": "C8X-أمة-wal",
678
+ "category": 8,
679
+ "input": "والأمة",
680
+ "root": "أمة",
681
+ "prefix_combo": "wal",
682
+ "track_a_spelling": "والأمة الأمة",
683
+ "changed": true
684
+ },
685
+ {
686
+ "id": "C8X-أمة-bal",
687
+ "category": 8,
688
+ "input": "بالأمة",
689
+ "root": "أمة",
690
+ "prefix_combo": "bal",
691
+ "track_a_spelling": "الأمة",
692
+ "changed": true
693
+ },
694
+ {
695
+ "id": "C8X-أمة-lal",
696
+ "category": 8,
697
+ "input": "للأمة",
698
+ "root": "أمة",
699
+ "prefix_combo": "lal",
700
+ "track_a_spelling": "للأمة",
701
+ "changed": false
702
+ },
703
+ {
704
+ "id": "C8X-نافذة-al",
705
+ "category": 8,
706
+ "input": "النافذة",
707
+ "root": "نافذة",
708
+ "prefix_combo": "al",
709
+ "track_a_spelling": "النافذة",
710
+ "changed": false
711
+ },
712
+ {
713
+ "id": "C8X-نافذة-wal",
714
+ "category": 8,
715
+ "input": "والنافذة",
716
+ "root": "نافذة",
717
+ "prefix_combo": "wal",
718
+ "track_a_spelling": "النافذة",
719
+ "changed": true
720
+ },
721
+ {
722
+ "id": "C8X-نافذة-bal",
723
+ "category": 8,
724
+ "input": "بالنافذة",
725
+ "root": "نافذة",
726
+ "prefix_combo": "bal",
727
+ "track_a_spelling": "النافذة",
728
+ "changed": true
729
+ },
730
+ {
731
+ "id": "C8X-نافذة-lal",
732
+ "category": 8,
733
+ "input": "للنافذة",
734
+ "root": "نافذة",
735
+ "prefix_combo": "lal",
736
+ "track_a_spelling": "النافذة",
737
+ "changed": true
738
+ },
739
+ {
740
+ "id": "C8X-علم-al",
741
+ "category": 8,
742
+ "input": "العلم",
743
+ "root": "علم",
744
+ "prefix_combo": "al",
745
+ "track_a_spelling": "العلم",
746
+ "changed": false
747
+ },
748
+ {
749
+ "id": "C8X-علم-wal",
750
+ "category": 8,
751
+ "input": "والعلم",
752
+ "root": "علم",
753
+ "prefix_combo": "wal",
754
+ "track_a_spelling": "والعلم هو العلم",
755
+ "changed": true
756
+ },
757
+ {
758
+ "id": "C8X-علم-bal",
759
+ "category": 8,
760
+ "input": "بالعلم",
761
+ "root": "علم",
762
+ "prefix_combo": "bal",
763
+ "track_a_spelling": "العلم بالعلم",
764
+ "changed": true
765
+ },
766
+ {
767
+ "id": "C8X-علم-lal",
768
+ "category": 8,
769
+ "input": "للعلم",
770
+ "root": "علم",
771
+ "prefix_combo": "lal",
772
+ "track_a_spelling": "للعلم",
773
+ "changed": false
774
+ },
775
+ {
776
+ "id": "C8X-اقتصاد-al",
777
+ "category": 8,
778
+ "input": "الاقتصاد",
779
+ "root": "اقتصاد",
780
+ "prefix_combo": "al",
781
+ "track_a_spelling": "الاقتصاد",
782
+ "changed": false
783
+ },
784
+ {
785
+ "id": "C8X-اقتصاد-wal",
786
+ "category": 8,
787
+ "input": "والاقتصاد",
788
+ "root": "اقتصاد",
789
+ "prefix_combo": "wal",
790
+ "track_a_spelling": "والاقتصاد",
791
+ "changed": false
792
+ },
793
+ {
794
+ "id": "C8X-اقتصاد-bal",
795
+ "category": 8,
796
+ "input": "بالاقتصاد",
797
+ "root": "اقتصاد",
798
+ "prefix_combo": "bal",
799
+ "track_a_spelling": "بالاقتصاد في الاقتصاد",
800
+ "changed": true
801
+ },
802
+ {
803
+ "id": "C8X-اقتصاد-lal",
804
+ "category": 8,
805
+ "input": "للاقتصاد",
806
+ "root": "اقتصاد",
807
+ "prefix_combo": "lal",
808
+ "track_a_spelling": "للاقتصاد الاقتصادي",
809
+ "changed": true
810
+ }
811
+ ],
812
+ "cat9x": [
813
+ {
814
+ "id": "C9X-01",
815
+ "category": 9,
816
+ "input": "إنّ",
817
+ "context": "isolation",
818
+ "concern": "stays إنّ",
819
+ "track_a_spelling": "إن إن",
820
+ "changed": true
821
+ },
822
+ {
823
+ "id": "C9X-02",
824
+ "category": 9,
825
+ "input": "أنّ",
826
+ "context": "isolation",
827
+ "concern": "stays أنّ",
828
+ "track_a_spelling": "أن أن",
829
+ "changed": true
830
+ },
831
+ {
832
+ "id": "C9X-03",
833
+ "category": 9,
834
+ "input": "إنّ العلم نور",
835
+ "context": "sentence",
836
+ "concern": "إنّ stays",
837
+ "track_a_spelling": "إن العلم نور",
838
+ "changed": true
839
+ },
840
+ {
841
+ "id": "C9X-04",
842
+ "category": 9,
843
+ "input": "علمت أنّ الامتحان صعب",
844
+ "context": "sentence",
845
+ "concern": "أنّ stays",
846
+ "track_a_spelling": "علمت أن الامتحان صعب",
847
+ "changed": true
848
+ },
849
+ {
850
+ "id": "C9X-05",
851
+ "category": 9,
852
+ "input": "علي",
853
+ "context": "isolation",
854
+ "concern": "could be name علي or على",
855
+ "track_a_spelling": "علي",
856
+ "changed": false
857
+ },
858
+ {
859
+ "id": "C9X-06",
860
+ "category": 9,
861
+ "input": "ذهب علي إلى المدرسة",
862
+ "context": "sentence",
863
+ "concern": "علي is a name here",
864
+ "track_a_spelling": "ذهب علي إلى المدرسة",
865
+ "changed": false
866
+ },
867
+ {
868
+ "id": "C9X-07",
869
+ "category": 9,
870
+ "input": "جلس علي الكرسي",
871
+ "context": "sentence",
872
+ "concern": "AMBIGUOUS: علي=name or على=on",
873
+ "track_a_spelling": "جلس علي الكرسي",
874
+ "changed": false
875
+ }
876
+ ],
877
+ "cat10x": [
878
+ {
879
+ "id": "C10X-01a",
880
+ "category": 10,
881
+ "input": "الحديقه جميلة جدا",
882
+ "concern": "error_at_start",
883
+ "track_a_spelling": "الحديقه جميلة جدا",
884
+ "a_changed": false,
885
+ "track_b_corrected": "الحديقة جميلة جدا.",
886
+ "track_b_suggestions": 2
887
+ },
888
+ {
889
+ "id": "C10X-01b",
890
+ "category": 10,
891
+ "input": "الجو حار في الحديقه",
892
+ "concern": "error_at_end",
893
+ "track_a_spelling": "الجو حار في الحديقة",
894
+ "a_changed": true,
895
+ "track_b_corrected": "الجو حار في الحديقة.",
896
+ "track_b_suggestions": 1
897
+ },
898
+ {
899
+ "id": "C10X-02a",
900
+ "category": 10,
901
+ "input": "الى المدرسة ذهب الولد",
902
+ "concern": "error_at_start",
903
+ "track_a_spelling": "إلى المدرسة ذهب الولد",
904
+ "a_changed": true,
905
+ "track_b_corrected": "إلى المدرسة ذهب الولد.",
906
+ "track_b_suggestions": 2
907
+ },
908
+ {
909
+ "id": "C10X-02b",
910
+ "category": 10,
911
+ "input": "ذهب الولد الى المدرسة",
912
+ "concern": "error_at_end",
913
+ "track_a_spelling": "ذهب الولد إلى المدرسة",
914
+ "a_changed": true,
915
+ "track_b_corrected": "ذهب الولد إلى المدرسة.",
916
+ "track_b_suggestions": 2
917
+ },
918
+ {
919
+ "id": "C10X-DRIFT",
920
+ "category": 10,
921
+ "input_len": 713,
922
+ "word_count": 119,
923
+ "total_suggestions": 16,
924
+ "front_half_suggestions": 11,
925
+ "back_half_suggestions": 5,
926
+ "coordinate_mismatches": [],
927
+ "suggestions_detail": [
928
+ {
929
+ "alternatives": [],
930
+ "confidence": 1.0,
931
+ "correction": "إحدىهن وبدأت",
932
+ "end": 62,
933
+ "id": "e892df95-0d05-40bd-969a-ccda1305cf2c",
934
+ "locked": true,
935
+ "original": "احداهن وبدءت",
936
+ "priority": 3,
937
+ "start": 50,
938
+ "type": "grammar"
939
+ },
940
+ {
941
+ "alternatives": [],
942
+ "confidence": 1.0,
943
+ "correction": "اجتهدوا",
944
+ "end": 243,
945
+ "id": "eef5aad7-31f7-4c1e-8095-88dbdda98944",
946
+ "locked": true,
947
+ "original": "اجتهدو",
948
+ "priority": 3,
949
+ "start": 237,
950
+ "type": "grammar"
951
+ },
952
+ {
953
+ "alternatives": [],
954
+ "confidence": 1.0,
955
+ "correction": "حققوا",
956
+ "end": 259,
957
+ "id": "abfaa89c-119e-4899-9456-6ee78c929298",
958
+ "locked": true,
959
+ "original": "حققو",
960
+ "priority": 3,
961
+ "start": 255,
962
+ "type": "grammar"
963
+ },
964
+ {
965
+ "alternatives": [],
966
+ "confidence": 1.0,
967
+ "correction": "",
968
+ "end": 712,
969
+ "id": "afcc69ca-f5d8-4907-b85a-e348d0d06a12",
970
+ "locked": true,
971
+ "original": "بين الأشجار",
972
+ "priority": 3,
973
+ "start": 701,
974
+ "type": "grammar"
975
+ },
976
+ {
977
+ "alternatives": [],
978
+ "confidence": 0.8,
979
+ "correction": "محمد،",
980
+ "end": 282,
981
+ "id": "4eef8996-7a31-4d0e-83ca-e05604b975e0",
982
+ "locked": true,
983
+ "original": "محمد",
984
+ "priority": 2,
985
+ "start": 278,
986
+ "type": "punctuation"
987
+ },
988
+ {
989
+ "alternatives": [],
990
+ "confidence": 0.8,
991
+ "correction": "جمهورية،",
992
+ "end": 424,
993
+ "id": "7e82e486-59af-4002-be9b-5b202dfe8492",
994
+ "locked": true,
995
+ "original": "جمهورية",
996
+ "priority": 2,
997
+ "start": 417,
998
+ "type": "punctuation"
999
+ },
1000
+ {
1001
+ "alternatives": [],
1002
+ "confidence": 0.8,
1003
+ "correction": "بين،",
1004
+ "end": 497,
1005
+ "id": "6b95fcb5-e190-4dec-8d69-22520c1bb6fe",
1006
+ "locked": true,
1007
+ "original": "بين",
1008
+ "priority": 2,
1009
+ "start": 494,
1010
+ "type": "punctuation"
1011
+ },
1012
+ {
1013
+ "alternatives": [],
1014
+ "confidence": 1.0,
1015
+ "correction": "الحديقة الجميلة وفجأة",
1016
+ "end": 44,
1017
+ "id": "73493796-1711-4996-9ee4-7013191bc9d8",
1018
+ "locked": true,
1019
+ "original": "الحديقه الجميله وفجأه",
1020
+ "priority": 1,
1021
+ "start": 23,
1022
+ "type": "spelling"
1023
+ },
1024
+ {
1025
+ "alternatives": [],
1026
+ "confidence": 1.0,
1027
+ "correction": "بشدة",
1028
+ "end": 72,
1029
+ "id": "13c914ea-5b75-4128-aa42-05576b3d55ae",
1030
+ "locked": true,
1031
+ "original": "بشده",
1032
+ "priority": 1,
1033
+ "start": 68,
1034
+ "type": "spelling"
1035
+ },
1036
+ {
1037
+ "alternatives": [],
1038
+ "confidence": 1.0,
1039
+ "correction": "إلى المدرسة",
1040
+ "end": 94,
1041
+ "id": "4e827496-d6f1-4a53-b2f7-c78c2d911195",
1042
+ "locked": true,
1043
+ "original": "الى المدرسه",
1044
+ "priority": 1,
1045
+ "start": 83,
1046
+ "type": "spelling"
1047
+ },
1048
+ {
1049
+ "alternatives": [],
1050
+ "confidence": 1.0,
1051
+ "correction": "المعلمة وأخذ",
1052
+ "end": 113,
1053
+ "id": "d7548b59-6379-4b95-a5ed-806e5d1d0cfb",
1054
+ "locked": true,
1055
+ "original": "المعلمه واخذ",
1056
+ "priority": 1,
1057
+ "start": 101,
1058
+ "type": "spelling"
1059
+ },
1060
+ {
1061
+ "alternatives": [],
1062
+ "confidence": 1.0,
1063
+ "correction": "أن",
1064
+ "end": 123,
1065
+ "id": "aa9b0140-5740-4343-a3a2-3adfa61fa9d9",
1066
+ "locked": true,
1067
+ "original": "ان",
1068
+ "priority": 1,
1069
+ "start": 121,
1070
+ "type": "spelling"
1071
+ },
1072
+ {
1073
+ "alternatives": [],
1074
+ "confidence": 1.0,
1075
+ "correction": "هذه المدينة جميلة",
1076
+ "end": 194,
1077
+ "id": "03378376-a164-46c8-8493-55a0dcd97e3e",
1078
+ "locked": true,
1079
+ "original": "هذة المدينه جميله",
1080
+ "priority": 1,
1081
+ "start": 177,
1082
+ "type": "spelling"
1083
+ },
1084
+ {
1085
+ "alternatives": [],
1086
+ "confidence": 1.0,
1087
+ "correction": "ممتازة",
1088
+ "end": 272,
1089
+ "id": "b3aac62f-6a7c-4625-b608-5258fea91fcd",
1090
+ "locked": true,
1091
+ "original": "ممتازه",
1092
+ "priority": 1,
1093
+ "start": 266,
1094
+ "type": "spelling"
1095
+ },
1096
+ {
1097
+ "alternatives": [],
1098
+ "confidence": 1.0,
1099
+ "correction": "هذه المحاضرة",
1100
+ "end": 632,
1101
+ "id": "e7994f0f-dc27-4c01-b055-0040683a7643",
1102
+ "locked": true,
1103
+ "original": "هذة المحاضره",
1104
+ "priority": 1,
1105
+ "start": 620,
1106
+ "type": "spelling"
1107
+ },
1108
+ {
1109
+ "alternatives": [],
1110
+ "confidence": 1.0,
1111
+ "correction": "أهمية",
1112
+ "end": 641,
1113
+ "id": "f68bfac9-17f8-4bbe-9def-1ee35e6ac76a",
1114
+ "locked": true,
1115
+ "original": "اهمية",
1116
+ "priority": 1,
1117
+ "start": 636,
1118
+ "type": "spelling"
1119
+ }
1120
+ ]
1121
+ }
1122
+ ],
1123
+ "cat11": [
1124
+ {
1125
+ "id": "C11-01",
1126
+ "category": 11,
1127
+ "input": "",
1128
+ "desc": "empty_string",
1129
+ "input_len": 0,
1130
+ "crashed": false,
1131
+ "b_corrected": "",
1132
+ "b_suggestions": 0,
1133
+ "error": "HTTP 400: {\"error\":\"Text is required\",\"status\":\"error\"}\n"
1134
+ },
1135
+ {
1136
+ "id": "C11-02",
1137
+ "category": 11,
1138
+ "input": " ",
1139
+ "desc": "whitespace_only",
1140
+ "input_len": 1,
1141
+ "crashed": false,
1142
+ "b_corrected": " ",
1143
+ "b_suggestions": 0,
1144
+ "error": "HTTP 400: {\"error\":\"Text is required\",\"status\":\"error\"}\n"
1145
+ },
1146
+ {
1147
+ "id": "C11-03",
1148
+ "category": 11,
1149
+ "input": "أ",
1150
+ "desc": "single_char",
1151
+ "input_len": 1,
1152
+ "crashed": false,
1153
+ "b_corrected": "أ؟",
1154
+ "b_suggestions": 1,
1155
+ "error": null
1156
+ },
1157
+ {
1158
+ "id": "C11-04",
1159
+ "category": 11,
1160
+ "input": "مستشفياتهم",
1161
+ "desc": "long_single_word",
1162
+ "input_len": 10,
1163
+ "crashed": false,
1164
+ "b_corrected": "في مستشفيات هم",
1165
+ "b_suggestions": 1,
1166
+ "error": null
1167
+ },
1168
+ {
1169
+ "id": "C11-05",
1170
+ "category": 11,
1171
+ "input": "ذهبالولدالىالمدرسةوقابلالمعلمة",
1172
+ "desc": "no_spaces",
1173
+ "input_len": 30,
1174
+ "crashed": false,
1175
+ "b_corrected": "ذهبالولدالىالمدرسةوقابلالمعلمة.",
1176
+ "b_suggestions": 1,
1177
+ "error": null
1178
+ },
1179
+ {
1180
+ "id": "C11-06",
1181
+ "category": 11,
1182
+ "input": "...!؟،،؛؛::...",
1183
+ "desc": "all_punctuation",
1184
+ "input_len": 14,
1185
+ "crashed": false,
1186
+ "b_corrected": ". ! ؟ ، ؛ ::.",
1187
+ "b_suggestions": 1,
1188
+ "error": null
1189
+ },
1190
+ {
1191
+ "id": "C11-07",
1192
+ "category": 11,
1193
+ "input": "(([{هذا النص}]))",
1194
+ "desc": "unbalanced_brackets",
1195
+ "input_len": 16,
1196
+ "crashed": false,
1197
+ "b_corrected": "( ( [ { هذا النص } ] ، و",
1198
+ "b_suggestions": 1,
1199
+ "error": null
1200
+ },
1201
+ {
1202
+ "id": "C11-08",
1203
+ "category": 11,
1204
+ "input": "\"هذا\" 'نص' «اختبار»",
1205
+ "desc": "mixed_quotes",
1206
+ "input_len": 19,
1207
+ "crashed": false,
1208
+ "b_corrected": "\" هذا \" مُنصا ' ' « اختبارا »",
1209
+ "b_suggestions": 1,
1210
+ "error": null
1211
+ },
1212
+ {
1213
+ "id": "C11-09",
1214
+ "category": 11,
1215
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
1216
+ "desc": "len_299",
1217
+ "input_len": 299,
1218
+ "crashed": false,
1219
+ "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء الاالعميق،ناعي التقنيات التالتعلم،م العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ا",
1220
+ "b_suggestions": 7,
1221
+ "error": null
1222
+ },
1223
+ {
1224
+ "id": "C11-10",
1225
+ "category": 11,
1226
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
1227
+ "desc": "len_300",
1228
+ "input_len": 300,
1229
+ "crashed": false,
1230
+ "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء االعميق،صطنالذكاء، التقنيات التعلم العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ال",
1231
+ "b_suggestions": 14,
1232
+ "error": null
1233
+ },
1234
+ {
1235
+ "id": "C11-11",
1236
+ "category": 11,
1237
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
1238
+ "desc": "len_301",
1239
+ "input_len": 301,
1240
+ "crashed": false,
1241
+ "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء االعميق،صطنالذكاء، التقنيات التعلم العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ال",
1242
+ "b_suggestions": 14,
1243
+ "error": null
1244
+ },
1245
+ {
1246
+ "id": "C11-12",
1247
+ "category": 11,
1248
+ "input": "يلعب الطلاب في الحديقه بعد المدرسه وقبل العشاء",
1249
+ "desc": "multi_stage_disagreement",
1250
+ "input_len": 46,
1251
+ "crashed": false,
1252
+ "b_corrected": "يلعب الطلاب في الحديقة بعد المدرسة وقبل العشاء.",
1253
+ "b_suggestions": 3,
1254
+ "error": null
1255
+ },
1256
+ {
1257
+ "id": "C11-13",
1258
+ "category": 11,
1259
+ "input": "الحمد لله",
1260
+ "desc": "model_returns_identical",
1261
+ "input_len": 9,
1262
+ "crashed": false,
1263
+ "b_corrected": "الحمد لله.",
1264
+ "b_suggestions": 1,
1265
+ "error": null
1266
+ },
1267
+ {
1268
+ "id": "C11-14",
1269
+ "category": 11,
1270
+ "input": "مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مر",
1271
+ "desc": "100x_repeated_word",
1272
+ "input_len": 600,
1273
+ "crashed": false,
1274
+ "b_corrected": "مرحبا مرحبا مرحبا ومرحبا مرحبا مرحبامرحبا مرحبا مرحبا مرحب مرحبا مرحبا وسهلا مرحبا مرحبا ترحيبا مرحبا مرحبا يا مرحبا مرحبا نرحب مرحبا مرحبا ترحيب مرحبا مرحبا أهلا مرحبا مرحبا اهلا مرحبا مرحبا وداعا مر",
1275
+ "b_suggestions": 4,
1276
+ "error": null
1277
+ },
1278
+ {
1279
+ "id": "C11-15",
1280
+ "category": 11,
1281
+ "input": "I went to the مدرسة and met the معلم in the فصل",
1282
+ "desc": "heavy_code_switch",
1283
+ "input_len": 47,
1284
+ "crashed": false,
1285
+ "b_corrected": "I went to the مدرسة and met the معلم in the الفصل.",
1286
+ "b_suggestions": 1,
1287
+ "error": null
1288
+ },
1289
+ {
1290
+ "id": "C11-16",
1291
+ "category": 11,
1292
+ "input": "ايش هالحكي يا زلمة",
1293
+ "desc": "levantine_dialect",
1294
+ "input_len": 18,
1295
+ "crashed": false,
1296
+ "b_corrected": "إيش هالحكي يا زلمة؟",
1297
+ "b_suggestions": 2,
1298
+ "error": null
1299
+ },
1300
+ {
1301
+ "id": "C11-17",
1302
+ "category": 11,
1303
+ "input": "شنو تسوي هسه",
1304
+ "desc": "iraqi_dialect",
1305
+ "input_len": 12,
1306
+ "crashed": false,
1307
+ "b_corrected": "شنو تسوي هسة",
1308
+ "b_suggestions": 1,
1309
+ "error": null
1310
+ },
1311
+ {
1312
+ "id": "C11-RACE",
1313
+ "category": 11,
1314
+ "input": "كانت الفتيات يلعبون في الحديقه",
1315
+ "desc": "parallel_race_condition",
1316
+ "r1_corrected": "كانت الفتيات يلعبن في الحديقة.",
1317
+ "r2_corrected": "كانت الفتيات يلعبن في الحديقة.",
1318
+ "r1_suggestions": 2,
1319
+ "r2_suggestions": 2,
1320
+ "identical": true
1321
+ }
1322
+ ]
1323
+ }
tests/deep_dive_expanded.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BAYAN Deep-Dive Test Harness — EXPANDED (ALL Categories)
3
+ Covers every item from the original prompt that was missing.
4
+ """
5
+ import sys, os, re, json, time, argparse, concurrent.futures
6
+ from datetime import datetime, timezone
7
+ import requests
8
+
9
+ API_BASE = "https://bayan10-bayan-api.hf.space"
10
+ TIMEOUT = 60
11
+
12
+ def api_call(endpoint, text, retries=2):
13
+ url = f"{API_BASE}{endpoint}"
14
+ for attempt in range(retries + 1):
15
+ try:
16
+ t0 = time.time()
17
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
18
+ elapsed = int((time.time() - t0) * 1000)
19
+ if resp.status_code == 200:
20
+ data = resp.json()
21
+ data['_elapsed_ms'] = elapsed
22
+ data['_timestamp'] = datetime.now(timezone.utc).isoformat()
23
+ return data
24
+ else:
25
+ if attempt < retries:
26
+ time.sleep(2)
27
+ continue
28
+ return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
29
+ except requests.exceptions.Timeout:
30
+ return {"error": f"Timeout after {TIMEOUT}s"}
31
+ except Exception as e:
32
+ return {"error": str(e)}
33
+
34
+ def track_a_spelling(text):
35
+ r = api_call("/api/spelling", text)
36
+ if "error" in r and "corrected_text" not in r:
37
+ return {"input": text, "output": text, "error": r["error"], "changed": False}
38
+ c = r.get("corrected_text", text)
39
+ return {"input": text, "output": c, "changed": c != text, "elapsed_ms": r.get("_elapsed_ms")}
40
+
41
+ def track_a_grammar(text):
42
+ r = api_call("/api/grammar", text)
43
+ if "error" in r and "corrected_text" not in r:
44
+ return {"input": text, "output": text, "error": r["error"], "changed": False}
45
+ c = r.get("corrected_text", text)
46
+ return {"input": text, "output": c, "changed": c != text, "elapsed_ms": r.get("_elapsed_ms"), "timestamp": r.get("_timestamp")}
47
+
48
+ def track_a_punctuation(text):
49
+ r = api_call("/api/punctuation", text)
50
+ if "error" in r and "corrected_text" not in r:
51
+ return {"input": text, "output": text, "error": r["error"], "changed": False}
52
+ c = r.get("corrected_text", text)
53
+ PUNC = '.,;:!?،؛؟'
54
+ return {"input": text, "output": c, "changed": c != text,
55
+ "marks_added": sum(1 for ch in c if ch in PUNC) - sum(1 for ch in text if ch in PUNC),
56
+ "elapsed_ms": r.get("_elapsed_ms")}
57
+
58
+ def track_b_analyze(text):
59
+ r = api_call("/api/analyze", text)
60
+ if "error" in r and "suggestions" not in r:
61
+ return {"input": text, "error": r["error"], "suggestions": [], "corrected": text}
62
+ return {
63
+ "input": text, "original": r.get("original", text),
64
+ "corrected": r.get("corrected", text),
65
+ "suggestions": r.get("suggestions", []),
66
+ "timing_ms": r.get("timing_ms", {}),
67
+ "elapsed_ms": r.get("_elapsed_ms"),
68
+ }
69
+
70
+ def log(msg):
71
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
72
+
73
+ # ═══════════════════════════════════════════════════════════════════
74
+ # CATEGORY 1 — Cross-model mismatch
75
+ # ═══════════════════════════════════════════════════════════════════
76
+ def run_cat1():
77
+ log("=== CATEGORY 1: Cross-model mismatch ===")
78
+ results = []
79
+ inputs = [
80
+ {"id": "C1-01", "input": "كانت الفتيات يلعبون في الحديقه"},
81
+ {"id": "C1-02", "input": "ان الطالبات ذهبو الى الجامعه"},
82
+ {"id": "C1-03", "input": "هذة المدينه جميله جدا ومناخها معتدل"},
83
+ {"id": "C1-04", "input": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه"},
84
+ {"id": "C1-05", "input": "ذهب الولد الى المكتبه وقرا كتاب مفيد"},
85
+ ]
86
+ for test in inputs:
87
+ log(f" {test['id']}: {test['input'][:50]}...")
88
+ # Track A: each model on ORIGINAL independently
89
+ a_spell = track_a_spelling(test['input'])
90
+ a_gram_on_orig = track_a_grammar(test['input'])
91
+ # NEW: grammar on SPELLING-CORRECTED text
92
+ a_gram_on_spell = track_a_grammar(a_spell['output'])
93
+ a_punc = track_a_punctuation(test['input'])
94
+ # Track B
95
+ b = track_b_analyze(test['input'])
96
+
97
+ # Diff: grammar on original vs grammar on spell-corrected
98
+ gram_orig_words = a_gram_on_orig['output'].split()
99
+ gram_spell_words = a_gram_on_spell['output'].split()
100
+ gram_diff = []
101
+ for i, (w1, w2) in enumerate(zip(gram_orig_words, gram_spell_words)):
102
+ if w1 != w2:
103
+ gram_diff.append({"word_idx": i, "gram_on_orig": w1, "gram_on_spell": w2})
104
+
105
+ result = {
106
+ "id": test['id'], "category": 1, "input": test['input'],
107
+ "a_spelling": a_spell['output'],
108
+ "a_grammar_on_original": a_gram_on_orig['output'],
109
+ "a_grammar_on_spell_corrected": a_gram_on_spell['output'],
110
+ "a_punctuation": a_punc['output'],
111
+ "grammar_diff_orig_vs_spell": gram_diff,
112
+ "b_corrected": b.get('corrected', ''),
113
+ "b_suggestions": b.get('suggestions', []),
114
+ }
115
+ log(f" A_spell: {a_spell['output'][:60]}")
116
+ log(f" A_gram(orig): {a_gram_on_orig['output'][:60]}")
117
+ log(f" A_gram(spell): {a_gram_on_spell['output'][:60]}")
118
+ log(f" Grammar diff: {gram_diff}")
119
+ log(f" B_final: {b.get('corrected','')[:60]}")
120
+ results.append(result)
121
+ return results
122
+
123
+ # ═══════════════════════════════════════════════════════════════════
124
+ # CATEGORY 7 — StageLocker adversarial tests
125
+ # ═══════════════════════════════════════════════════════════════════
126
+ def run_cat7():
127
+ log("=== CATEGORY 7: StageLocker directionality ===")
128
+ results = []
129
+ # 3+ chained mutations: spelling changes length, grammar changes length, punc adds marks
130
+ inputs = [
131
+ {"id": "C7-01", "input": "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب",
132
+ "desc": "3-stage chain: spelling الى→إلى, grammar المدرسه→المدرسة, punc adds marks"},
133
+ {"id": "C7-02", "input": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
134
+ "desc": "Multiple overlapping corrections across all stages"},
135
+ {"id": "C7-03", "input": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة",
136
+ "desc": "Long sentence with corrections from all 3 stages"},
137
+ {"id": "C7-04", "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
138
+ "desc": "Multiple ه→ة fixes: does grammar lock prevent punc from adding marks near those words?"},
139
+ {"id": "C7-05", "input": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
140
+ "desc": "Heavy corrections needed across stages"},
141
+ ]
142
+ for test in inputs:
143
+ log(f" {test['id']}: {test['input'][:50]}...")
144
+ a_spell = track_a_spelling(test['input'])
145
+ a_gram = track_a_grammar(test['input'])
146
+ a_punc = track_a_punctuation(test['input'])
147
+ b = track_b_analyze(test['input'])
148
+
149
+ # Check: are any suggestions at positions that overlap with corrections from earlier stages?
150
+ sugg = b.get('suggestions', [])
151
+ overlaps = []
152
+ for i, s1 in enumerate(sugg):
153
+ for j, s2 in enumerate(sugg):
154
+ if i < j and s1.get('start',0) < s2.get('end',0) and s2.get('start',0) < s1.get('end',0):
155
+ overlaps.append({"s1": s1, "s2": s2})
156
+
157
+ result = {
158
+ "id": test['id'], "category": 7, "input": test['input'],
159
+ "desc": test['desc'],
160
+ "a_spelling": a_spell['output'],
161
+ "a_grammar": a_gram['output'],
162
+ "a_punc": a_punc['output'],
163
+ "b_corrected": b.get('corrected', ''),
164
+ "b_suggestions": sugg,
165
+ "b_suggestion_count": len(sugg),
166
+ "overlapping_suggestions": overlaps,
167
+ }
168
+ log(f" B_final: {b.get('corrected','')[:60]}")
169
+ log(f" Suggestions: {len(sugg)}, Overlaps: {len(overlaps)}")
170
+ results.append(result)
171
+ return results
172
+
173
+ # ═══════════════════════════════════════════════════════════════════
174
+ # CATEGORY 8 EXPANDED — with ال + prefix combos
175
+ # ═══════════════════════════════════════════════════════════════════
176
+ def run_cat8_expanded():
177
+ log("=== CATEGORY 8 EXPANDED: ال + prefix combos ===")
178
+ results = []
179
+ combos = [
180
+ # root, al_form, wal_form, bal_form, lal_form
181
+ ("مدرسة", "المدرسة", "والمدرسة", "بالمدرسة", "للمدرسة"),
182
+ ("شمس", "الشمس", "والشمس", "بالشمس", "للشمس"),
183
+ ("أمة", "الأمة", "والأمة", "بالأمة", "للأمة"),
184
+ ("نافذة", "النافذة", "والنافذة", "بالنافذة", "للنافذة"),
185
+ ("علم", "العلم", "والعلم", "بالعلم", "للعلم"),
186
+ ("اقتصاد", "الاقتصاد", "والاقتصاد", "بالاقتصاد", "للاقتصاد"),
187
+ ]
188
+ for root, al, wal, bal, lal in combos:
189
+ for label, word in [("al", al), ("wal", wal), ("bal", bal), ("lal", lal)]:
190
+ a = track_a_spelling(word)
191
+ result = {
192
+ "id": f"C8X-{root}-{label}", "category": 8, "input": word,
193
+ "root": root, "prefix_combo": label,
194
+ "track_a_spelling": a['output'], "changed": a.get('changed', False),
195
+ }
196
+ if a.get('changed'):
197
+ log(f" ⚠ C8X-{root}-{label}: '{word}' → '{a['output']}'")
198
+ results.append(result)
199
+ return results
200
+
201
+ # ═══════════════════════════════════════════════════════════════════
202
+ # CATEGORY 9 EXPANDED — missing pairs
203
+ # ═══════════════════════════════════════════════════════════════════
204
+ def run_cat9_expanded():
205
+ log("=== CATEGORY 9 EXPANDED: Missing confusable pairs ===")
206
+ results = []
207
+ tests = [
208
+ # إنّ / أنّ (with shadda)
209
+ {"id": "C9X-01", "input": "إنّ", "context": "isolation", "concern": "stays إنّ"},
210
+ {"id": "C9X-02", "input": "أنّ", "context": "isolation", "concern": "stays أنّ"},
211
+ {"id": "C9X-03", "input": "إنّ العلم نور", "context": "sentence", "concern": "إنّ stays"},
212
+ {"id": "C9X-04", "input": "علمت أنّ الامتحان صعب", "context": "sentence", "concern": "أنّ stays"},
213
+ # على vs علي (name)
214
+ {"id": "C9X-05", "input": "علي", "context": "isolation", "concern": "could be name علي or على"},
215
+ {"id": "C9X-06", "input": "ذهب علي إلى المدرسة", "context": "sentence", "concern": "علي is a name here"},
216
+ {"id": "C9X-07", "input": "جلس علي الكرسي", "context": "sentence", "concern": "AMBIGUOUS: علي=name or على=on"},
217
+ ]
218
+ for test in tests:
219
+ a = track_a_spelling(test['input'])
220
+ result = {
221
+ "id": test['id'], "category": 9, "input": test['input'],
222
+ "context": test['context'], "concern": test['concern'],
223
+ "track_a_spelling": a['output'], "changed": a.get('changed', False),
224
+ }
225
+ if a.get('changed'):
226
+ log(f" ⚠ {test['id']}: '{test['input']}' → '{a['output']}' ({test['concern']})")
227
+ else:
228
+ log(f" ✓ {test['id']}: no change")
229
+ results.append(result)
230
+ return results
231
+
232
+ # ═══════════════════════════════════════════════════════════════════
233
+ # CATEGORY 10 EXPANDED — sentence position + 200-word drift test
234
+ # ═══════════════════════════════════════════════════════════════════
235
+ def run_cat10_expanded():
236
+ log("=== CATEGORY 10 EXPANDED: Position + Cumulative drift ===")
237
+ results = []
238
+
239
+ # Same error at sentence start vs middle
240
+ log(" Sentence-initial vs mid-sentence:")
241
+ position_tests = [
242
+ {"id": "C10X-01a", "input": "الحديقه جميلة جدا", "concern": "error_at_start"},
243
+ {"id": "C10X-01b", "input": "الجو حار في الحديقه", "concern": "error_at_end"},
244
+ {"id": "C10X-02a", "input": "الى المدرسة ذهب الولد", "concern": "error_at_start"},
245
+ {"id": "C10X-02b", "input": "ذهب الولد الى المدرسة", "concern": "error_at_end"},
246
+ ]
247
+ for test in position_tests:
248
+ a = track_a_spelling(test['input'])
249
+ b = track_b_analyze(test['input'])
250
+ result = {
251
+ "id": test['id'], "category": 10, "input": test['input'],
252
+ "concern": test['concern'],
253
+ "track_a_spelling": a['output'], "a_changed": a.get('changed', False),
254
+ "track_b_corrected": b.get('corrected', ''),
255
+ "track_b_suggestions": len(b.get('suggestions', [])),
256
+ }
257
+ log(f" {test['id']}: A='{a['output'][:40]}' B_sugg={len(b.get('suggestions',[]))}")
258
+ results.append(result)
259
+
260
+ # 200+ word cumulative drift test
261
+ log("\n 200+ word cumulative drift test:")
262
+ long_text = (
263
+ "كانت الفتيات يلعبون في الحديقه الجميله وفجأه سقطت احداهن وبدءت تبكي بشده "
264
+ "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب "
265
+ "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا "
266
+ "هذة المدينه جميله جدا ومناخها معتدل طوال العام "
267
+ "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه "
268
+ "سافر محمد إلى دبي للعمل في شركة جوجل وقابل أصدقاءه القدامى "
269
+ "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات الضخمة "
270
+ "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها وتقع على ضفاف نهر النيل "
271
+ "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية في فصل الصيف "
272
+ "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين "
273
+ "بسم الله الرحمن الرحيم نبدأ هذة المحاضره عن اهمية التعليم "
274
+ "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار "
275
+ )
276
+ word_count = len(long_text.split())
277
+ log(f" Input: {word_count} words, {len(long_text)} chars")
278
+
279
+ b = track_b_analyze(long_text)
280
+ sugg = b.get('suggestions', [])
281
+ # Check coordinates in the back half
282
+ mid_char = len(long_text) // 2
283
+ back_half_sugg = [s for s in sugg if s.get('start', 0) >= mid_char]
284
+ front_half_sugg = [s for s in sugg if s.get('start', 0) < mid_char]
285
+
286
+ # Verify coordinates: does original[start:end] == suggestion['original']?
287
+ coord_mismatches = []
288
+ for s in sugg:
289
+ start, end = s.get('start', 0), s.get('end', 0)
290
+ expected_text = long_text[start:end]
291
+ actual_text = s.get('original', '')
292
+ if expected_text != actual_text:
293
+ coord_mismatches.append({
294
+ "start": start, "end": end,
295
+ "expected_from_coords": expected_text,
296
+ "actual_in_suggestion": actual_text,
297
+ "correction": s.get('correction', ''),
298
+ "type": s.get('type', ''),
299
+ })
300
+
301
+ result = {
302
+ "id": "C10X-DRIFT", "category": 10, "input_len": len(long_text),
303
+ "word_count": word_count,
304
+ "total_suggestions": len(sugg),
305
+ "front_half_suggestions": len(front_half_sugg),
306
+ "back_half_suggestions": len(back_half_sugg),
307
+ "coordinate_mismatches": coord_mismatches,
308
+ "suggestions_detail": sugg,
309
+ }
310
+ log(f" Total suggestions: {len(sugg)} (front: {len(front_half_sugg)}, back: {len(back_half_sugg)})")
311
+ log(f" Coordinate mismatches: {len(coord_mismatches)}")
312
+ for m in coord_mismatches:
313
+ log(f" [{m['start']}:{m['end']}] expected='{m['expected_from_coords']}' got='{m['actual_in_suggestion']}'")
314
+ results.append(result)
315
+
316
+ return results
317
+
318
+ # ═══════════════════════════════════════════════════════════════════
319
+ # CATEGORY 11 — Genuine stress tests / edge cases
320
+ # ═══════════════════════════════════════════════════════════════════
321
+ def run_cat11():
322
+ log("=== CATEGORY 11: Edge case discovery (stress tests) ===")
323
+ results = []
324
+ tests = [
325
+ # Pathological inputs
326
+ {"id": "C11-01", "input": "", "desc": "empty_string"},
327
+ {"id": "C11-02", "input": " ", "desc": "whitespace_only"},
328
+ {"id": "C11-03", "input": "أ", "desc": "single_char"},
329
+ {"id": "C11-04", "input": "مستشفياتهم", "desc": "long_single_word"},
330
+ {"id": "C11-05", "input": "ذهبالولدالىالمدرسةوقابلالمعلمة", "desc": "no_spaces"},
331
+ {"id": "C11-06", "input": "...!؟،،؛؛::...", "desc": "all_punctuation"},
332
+ {"id": "C11-07", "input": "(([{هذا النص}]))", "desc": "unbalanced_brackets"},
333
+ {"id": "C11-08", "input": "\"هذا\" 'نص' «اختبار»", "desc": "mixed_quotes"},
334
+ # Boundary lengths (299, 300, 301 chars)
335
+ {"id": "C11-09", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:299], "desc": "len_299"},
336
+ {"id": "C11-10", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:300], "desc": "len_300"},
337
+ {"id": "C11-11", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:301], "desc": "len_301"},
338
+ # Max disagreement: word that is both plausible spelling error AND grammatically ambiguous
339
+ {"id": "C11-12", "input": "يلعب الطلاب في الحديقه بعد المدرسه وقبل العشاء", "desc": "multi_stage_disagreement"},
340
+ # Correction identical to original (model returns same text)
341
+ {"id": "C11-13", "input": "الحمد لله", "desc": "model_returns_identical"},
342
+ # Very long repetitive text
343
+ {"id": "C11-14", "input": "مرحبا " * 100, "desc": "100x_repeated_word"},
344
+ # Mixed Arabic and English heavily
345
+ {"id": "C11-15", "input": "I went to the مدرسة and met the معلم in the فصل", "desc": "heavy_code_switch"},
346
+ # Dialectal variations
347
+ {"id": "C11-16", "input": "ايش هالحكي يا زلمة", "desc": "levantine_dialect"},
348
+ {"id": "C11-17", "input": "شنو تسوي هسه", "desc": "iraqi_dialect"},
349
+ ]
350
+ for test in tests:
351
+ log(f" {test['id']}: '{test['input'][:40]}...' [{test['desc']}]")
352
+ # Track B only for stress tests (we want to see if pipeline crashes)
353
+ b = track_b_analyze(test['input'])
354
+ crashed = "error" in b and "suggestions" not in b
355
+ result = {
356
+ "id": test['id'], "category": 11, "input": test['input'][:200],
357
+ "desc": test['desc'], "input_len": len(test['input']),
358
+ "crashed": crashed,
359
+ "b_corrected": b.get('corrected', '')[:200] if not crashed else "CRASH",
360
+ "b_suggestions": len(b.get('suggestions', [])),
361
+ "error": b.get('error', None),
362
+ }
363
+ status = "💥 CRASH" if crashed else f"✓ ({len(b.get('suggestions',[]))} sugg)"
364
+ log(f" {status}")
365
+ results.append(result)
366
+
367
+ # Race condition: 2 parallel requests with same input
368
+ log("\n Race condition test (2 parallel requests):")
369
+ race_input = "كانت الفتيات يلعبون في الحديقه"
370
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as ex:
371
+ f1 = ex.submit(track_b_analyze, race_input)
372
+ f2 = ex.submit(track_b_analyze, race_input)
373
+ r1, r2 = f1.result(), f2.result()
374
+ race_match = r1.get('corrected') == r2.get('corrected') and len(r1.get('suggestions',[])) == len(r2.get('suggestions',[]))
375
+ race_result = {
376
+ "id": "C11-RACE", "category": 11, "input": race_input,
377
+ "desc": "parallel_race_condition",
378
+ "r1_corrected": r1.get('corrected', ''),
379
+ "r2_corrected": r2.get('corrected', ''),
380
+ "r1_suggestions": len(r1.get('suggestions', [])),
381
+ "r2_suggestions": len(r2.get('suggestions', [])),
382
+ "identical": race_match,
383
+ }
384
+ log(f" Race test: identical={race_match}")
385
+ results.append(race_result)
386
+
387
+ return results
388
+
389
+ # ═══════════════════════════════════════════════════════════════════
390
+ # MAIN
391
+ # ═══════════════════════════════════════════════════════════════════
392
+ def main():
393
+ parser = argparse.ArgumentParser()
394
+ parser.add_argument('--stage', choices=['cat1', 'cat7', 'cat8x', 'cat9x', 'cat10x', 'cat11', 'all'], default='all')
395
+ args = parser.parse_args()
396
+
397
+ all_results = {"timestamp": datetime.now(timezone.utc).isoformat(), "api_base": API_BASE}
398
+
399
+ # Health check
400
+ log(f"Health check: {API_BASE}")
401
+ try:
402
+ resp = requests.get(f"{API_BASE}/api/health", timeout=10)
403
+ log(f" OK: {resp.status_code}")
404
+ all_results['health'] = resp.json()
405
+ except Exception as e:
406
+ log(f" FAIL: {e}")
407
+ return
408
+
409
+ if args.stage in ('cat1', 'all'):
410
+ all_results['cat1'] = run_cat1()
411
+ if args.stage in ('cat7', 'all'):
412
+ all_results['cat7'] = run_cat7()
413
+ if args.stage in ('cat8x', 'all'):
414
+ all_results['cat8x'] = run_cat8_expanded()
415
+ if args.stage in ('cat9x', 'all'):
416
+ all_results['cat9x'] = run_cat9_expanded()
417
+ if args.stage in ('cat10x', 'all'):
418
+ all_results['cat10x'] = run_cat10_expanded()
419
+ if args.stage in ('cat11', 'all'):
420
+ all_results['cat11'] = run_cat11()
421
+
422
+ output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_expanded.json')
423
+ with open(output_path, 'w', encoding='utf-8') as f:
424
+ json.dump(all_results, f, ensure_ascii=False, indent=2)
425
+ log(f"\nSaved to {output_path}")
426
+
427
+ if __name__ == '__main__':
428
+ main()
tests/deep_dive_gaps.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-06-20T19:33:59.110768+00:00",
3
+ "gap1_drift": {
4
+ "word_count": 187,
5
+ "char_count": 1104,
6
+ "total_suggestions": 0,
7
+ "front_half": 0,
8
+ "back_half": 0,
9
+ "coordinate_mismatches": [],
10
+ "a_spelling_changed": true,
11
+ "a_grammar_changed": true,
12
+ "a_punc_changed": true,
13
+ "suggestions": []
14
+ },
15
+ "gap2_priority": [
16
+ {
17
+ "id": "G2-01",
18
+ "input": "الطلاب اجتهدو في الامتحان",
19
+ "desc": "اجتهدو — spelling should add ا, grammar may do different fix. Grammar wins (priority 3 > 1)",
20
+ "a_spelling": "الطلاب اجتهدو في الامتحان",
21
+ "a_grammar": "الطلاب اجتهدو في الامتحان",
22
+ "a_punctuation": "الطلاب اجتهدو في الامتحان.",
23
+ "b_corrected": "الطلاب اجتهد في الامتحين",
24
+ "b_suggestions": [
25
+ {
26
+ "alternatives": [],
27
+ "confidence": 1.0,
28
+ "correction": "الامتحين",
29
+ "end": 25,
30
+ "id": "502647e7-18fd-41d2-b1c7-2978a3ee7704",
31
+ "locked": true,
32
+ "original": "الامتحان",
33
+ "priority": 3,
34
+ "start": 17,
35
+ "type": "grammar"
36
+ },
37
+ {
38
+ "alternatives": [
39
+ "اجتهد",
40
+ "اجتهدو"
41
+ ],
42
+ "confidence": 0.9,
43
+ "correction": "اجتهد",
44
+ "end": 13,
45
+ "id": "0a93f071-af36-4219-b6f5-d11e748c4601",
46
+ "locked": true,
47
+ "original": "اجتهدو",
48
+ "priority": 1,
49
+ "start": 7,
50
+ "type": "spelling"
51
+ }
52
+ ]
53
+ },
54
+ {
55
+ "id": "G2-02",
56
+ "input": "البنات ذهبو الى البيت",
57
+ "desc": "ذهبو — spelling could give ذهبوا, grammar could give ذهبن (fem). Grammar wins.",
58
+ "a_spelling": "البنات ذهبو إلى البيت",
59
+ "a_grammar": "البنات ذهبن الى البيت",
60
+ "a_punctuation": "البنات ذهبو الى البيت.",
61
+ "b_corrected": "البنات ذهبن إلى البيت.",
62
+ "b_suggestions": [
63
+ {
64
+ "alternatives": [],
65
+ "confidence": 1.0,
66
+ "correction": "ذهبن",
67
+ "end": 11,
68
+ "id": "0f00a9ab-1166-4e4d-8dd7-ae6dba1f9f1e",
69
+ "locked": true,
70
+ "original": "ذهبو",
71
+ "priority": 3,
72
+ "start": 7,
73
+ "type": "grammar"
74
+ },
75
+ {
76
+ "alternatives": [],
77
+ "confidence": 0.8,
78
+ "correction": "البيت.",
79
+ "end": 21,
80
+ "id": "af3a0a21-5e1e-45f5-a1ad-9c3730b4ab25",
81
+ "locked": true,
82
+ "original": "البيت",
83
+ "priority": 2,
84
+ "start": 16,
85
+ "type": "punctuation"
86
+ },
87
+ {
88
+ "alternatives": [
89
+ "إلى",
90
+ "ال",
91
+ "الم",
92
+ "الى"
93
+ ],
94
+ "confidence": 0.9,
95
+ "correction": "إلى",
96
+ "end": 15,
97
+ "id": "1b7096dc-6043-4e1a-9de3-d59204327b86",
98
+ "locked": true,
99
+ "original": "الى",
100
+ "priority": 1,
101
+ "start": 12,
102
+ "type": "spelling"
103
+ }
104
+ ]
105
+ },
106
+ {
107
+ "id": "G2-03",
108
+ "input": "وفجأه سقطت الكتب",
109
+ "desc": "وفجأه — spelling may fix ه→ة; punctuation may want comma after it. Overlap?",
110
+ "a_spelling": "وفجأه سقطت الكتب",
111
+ "a_grammar": "وفجأة سقطت الكتب",
112
+ "a_punctuation": "وفجأه سقطت الكتب.",
113
+ "b_corrected": "وفجأة سقطت الكتب.",
114
+ "b_suggestions": [
115
+ {
116
+ "alternatives": [],
117
+ "confidence": 0.8,
118
+ "correction": "الكتب.",
119
+ "end": 16,
120
+ "id": "fc257e46-4368-4d32-acb0-de5b6d461aaf",
121
+ "locked": true,
122
+ "original": "الكتب",
123
+ "priority": 2,
124
+ "start": 11,
125
+ "type": "punctuation"
126
+ },
127
+ {
128
+ "alternatives": [],
129
+ "confidence": 1.0,
130
+ "correction": "وفجأة",
131
+ "end": 5,
132
+ "id": "7397e7e6-e238-4ed4-a184-461f576a74f6",
133
+ "locked": true,
134
+ "original": "وفجأه",
135
+ "priority": 1,
136
+ "start": 0,
137
+ "type": "spelling"
138
+ }
139
+ ]
140
+ }
141
+ ],
142
+ "gap3_dropped": {
143
+ "tests": [
144
+ {
145
+ "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
146
+ "a_spell_diffs": [
147
+ {
148
+ "word_idx": 2,
149
+ "original": "اجتهدو",
150
+ "corrected": "اجتهد"
151
+ },
152
+ {
153
+ "word_idx": 5,
154
+ "original": "حققو",
155
+ "corrected": "حقوق"
156
+ }
157
+ ],
158
+ "a_gram_diffs": [
159
+ {
160
+ "word_idx": 5,
161
+ "original": "حققو",
162
+ "corrected": "حققوا"
163
+ },
164
+ {
165
+ "word_idx": 7,
166
+ "original": "ممتازه",
167
+ "corrected": "ممتازة"
168
+ }
169
+ ],
170
+ "a_punc_diffs": [
171
+ {
172
+ "word_idx": 9,
173
+ "original": "الامتحانات",
174
+ "corrected": "الامتحانات."
175
+ }
176
+ ],
177
+ "b_suggestion_count": 4,
178
+ "dropped_spell": [],
179
+ "dropped_gram": [],
180
+ "dropped_punc": []
181
+ }
182
+ ]
183
+ },
184
+ "gap4_rare": {
185
+ "tests": [
186
+ {
187
+ "id": "R-01",
188
+ "input": "استوقفني المشهد فتأملته مليا",
189
+ "domain": "literary",
190
+ "output": "استوقفني المشهد فتأملتة مليا",
191
+ "changed": true
192
+ },
193
+ {
194
+ "id": "R-02",
195
+ "input": "تستأثر القوى العظمى بالنفوذ الدولي",
196
+ "domain": "political_literary",
197
+ "output": "تستأثر القوى العظمى بالنفوذ الدولي",
198
+ "changed": false
199
+ },
200
+ {
201
+ "id": "R-03",
202
+ "input": "استقطب المؤتمر ثلة من العلماء الأفذاذ",
203
+ "domain": "formal_rare",
204
+ "output": "استقطب المؤتمر ثلة من العلماء الأفذاذ",
205
+ "changed": false
206
+ },
207
+ {
208
+ "id": "R-04",
209
+ "input": "يتسنى للمرء أن يستشف الحقيقة من بين السطور",
210
+ "domain": "literary_verb",
211
+ "output": "يتسنى للمرء أن يكتشف الحقيقة من بين السطور",
212
+ "changed": true
213
+ },
214
+ {
215
+ "id": "R-05",
216
+ "input": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين",
217
+ "domain": "oratory",
218
+ "output": "ألقى المحاضر خطبة علماء استحوذت على إعجاب الحاضرين",
219
+ "changed": true
220
+ },
221
+ {
222
+ "id": "R-06",
223
+ "input": "تمخض الاجتماع عن قرارات مصيرية",
224
+ "domain": "formal_verb",
225
+ "output": "تمخض الاجتماع عن قرارات مصيرية",
226
+ "changed": false
227
+ },
228
+ {
229
+ "id": "R-07",
230
+ "input": "أرهقته المسغبة فاستكان للقدر",
231
+ "domain": "classical",
232
+ "output": "طريقتة المسببة فاستكان القدر",
233
+ "changed": true
234
+ },
235
+ {
236
+ "id": "R-08",
237
+ "input": "نستشرف آفاق المستقبل بثقة واقتدار",
238
+ "domain": "formal_speech",
239
+ "output": "نستشرف آفاق المستقبل بثقة واقتدار",
240
+ "changed": false
241
+ },
242
+ {
243
+ "id": "R-09",
244
+ "input": "اعتراه القلق فتملكه الأرق",
245
+ "domain": "literary_psych",
246
+ "output": "اعتراه القلق فتملكة الأرق",
247
+ "changed": true
248
+ },
249
+ {
250
+ "id": "R-10",
251
+ "input": "استأنف العمل بعد فترة من التقاعس",
252
+ "domain": "formal_verb",
253
+ "output": "استأنف العمل بعد فترة من التقاعد",
254
+ "changed": true
255
+ }
256
+ ],
257
+ "fp_count": 6,
258
+ "total": 10
259
+ }
260
+ }
tests/deep_dive_gaps.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gap-filler tests for items explicitly requested in the prompt but not yet covered:
3
+ 1. 200+ word cumulative drift test (Cat 10)
4
+ 2. Lower-priority-wins limitation (Cat 4)
5
+ 3. Systematic dropped patch logging (Cat 3)
6
+ 4. Rare/literary vocabulary overcorrection (Cat 2)
7
+ """
8
+ import sys, os, json, time, requests
9
+ from datetime import datetime, timezone
10
+
11
+ API_BASE = "https://bayan10-bayan-api.hf.space"
12
+ TIMEOUT = 60
13
+
14
+ def api_call(endpoint, text, retries=2):
15
+ url = f"{API_BASE}{endpoint}"
16
+ for attempt in range(retries + 1):
17
+ try:
18
+ t0 = time.time()
19
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
20
+ elapsed = int((time.time() - t0) * 1000)
21
+ if resp.status_code == 200:
22
+ data = resp.json()
23
+ data['_elapsed_ms'] = elapsed
24
+ return data
25
+ else:
26
+ if attempt < retries:
27
+ time.sleep(2)
28
+ continue
29
+ return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
30
+ except Exception as e:
31
+ return {"error": str(e)}
32
+
33
+ def log(msg):
34
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
35
+
36
+ results = {"timestamp": datetime.now(timezone.utc).isoformat()}
37
+
38
+ # ═══════════════════════════════════════════════════════════════
39
+ # GAP 1: 200+ word cumulative drift test (Cat 10)
40
+ # ═══════════════════════════════════════════════════════════════
41
+ log("=== GAP 1: 200+ word cumulative drift test ===")
42
+
43
+ # Build a 200+ word paragraph with deliberate errors throughout
44
+ long_para = (
45
+ "كانت الفتيات يلعبون في الحديقه الجميله وفجأه سقطت احداهن وبدءت تبكي بشده "
46
+ "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب وبدأ يقرأ بتركيز شديد "
47
+ "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة "
48
+ "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون ومحبون للخير "
49
+ "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات النهائيه "
50
+ "سافر محمد إلى دبي للعمل في شركة جوجل وقابل أصدقاءه القدامى هناك "
51
+ "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات الضخمة والتحليل "
52
+ "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها وتقع على ضفاف نهر النيل العظيم "
53
+ "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية في فصل الصيف الحار "
54
+ "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين إياك نعبد وإياك نستعين "
55
+ "بسم الله الرحمن الرحيم نبدأ هذة المحاضره عن اهمية التعليم في حياة الانسان "
56
+ "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار والزهور الجميلة "
57
+ "إن العلم نور والجهل ظلام فاحرصوا على طلب العلم من المهد إلى اللحد "
58
+ "كان الرجل يمشي في الشارع وفجأه رأى صديقه القديم فسلم عليه وتحدثا طويلا "
59
+ "المعلم الذي يحب عمله يجتهد في تعليم طلابه ويحرص على نجاحهم في الحياه "
60
+ )
61
+
62
+ word_count = len(long_para.split())
63
+ char_count = len(long_para)
64
+ log(f" Input: {word_count} words, {char_count} chars")
65
+
66
+ # Track A: each model on the full long text
67
+ log(" Running Track A (each model independently on original)...")
68
+ a_spell = api_call("/api/spelling", long_para)
69
+ a_gram = api_call("/api/grammar", long_para)
70
+ a_punc = api_call("/api/punctuation", long_para)
71
+
72
+ # Track B: full pipeline
73
+ log(" Running Track B (full pipeline)...")
74
+ b = api_call("/api/analyze", long_para)
75
+
76
+ sugg = b.get("suggestions", [])
77
+ mid_char = char_count // 2
78
+
79
+ # Verify ALL coordinates
80
+ coord_mismatches = []
81
+ for s in sugg:
82
+ start, end = s.get('start', 0), s.get('end', 0)
83
+ expected_text = long_para[start:end]
84
+ actual_text = s.get('original', '')
85
+ if expected_text != actual_text:
86
+ coord_mismatches.append({
87
+ "start": start, "end": end,
88
+ "expected": expected_text,
89
+ "actual": actual_text,
90
+ "correction": s.get('correction', ''),
91
+ "type": s.get('type', ''),
92
+ })
93
+
94
+ back_half = [s for s in sugg if s.get('start', 0) >= mid_char]
95
+ front_half = [s for s in sugg if s.get('start', 0) < mid_char]
96
+
97
+ # Log every suggestion with its verified coordinate
98
+ log(f" Total: {len(sugg)} suggestions, {len(coord_mismatches)} coordinate mismatches")
99
+ log(f" Front half ({mid_char} chars): {len(front_half)} suggestions")
100
+ log(f" Back half: {len(back_half)} suggestions")
101
+ for s in sugg:
102
+ st, en = s.get('start',0), s.get('end',0)
103
+ in_back = "BACK" if st >= mid_char else "FRONT"
104
+ verified = "✓" if long_para[st:en] == s.get('original','') else "✗ MISMATCH"
105
+ log(f" [{in_back}] [{st}:{en}] '{s.get('original','')}' → '{s.get('correction','')}' ({s.get('type','')}) {verified}")
106
+
107
+ for m in coord_mismatches:
108
+ log(f" MISMATCH: [{m['start']}:{m['end']}] expected='{m['expected']}' actual='{m['actual']}'")
109
+
110
+ results['gap1_drift'] = {
111
+ "word_count": word_count, "char_count": char_count,
112
+ "total_suggestions": len(sugg),
113
+ "front_half": len(front_half), "back_half": len(back_half),
114
+ "coordinate_mismatches": coord_mismatches,
115
+ "a_spelling_changed": a_spell.get("corrected_text","") != long_para,
116
+ "a_grammar_changed": a_gram.get("corrected_text","") != long_para,
117
+ "a_punc_changed": a_punc.get("corrected_text","") != long_para,
118
+ "suggestions": sugg,
119
+ }
120
+
121
+ # ═══════════════════════════════════════════════════════════════
122
+ # GAP 2: Lower-priority-wins limitation doc (Cat 4)
123
+ # ═══════════════════════════════════════════════════════════════
124
+ log("\n=== GAP 2: Lower-priority stage was more important (Cat 4) ===")
125
+
126
+ # Construct case: spelling corrects اجتهدو→اجتهدوا (correct, priority 1)
127
+ # but grammar might also touch it with a different correction (priority 3)
128
+ # Grammar WINS because higher priority. But what if grammar is wrong here?
129
+ gap2_tests = [
130
+ {
131
+ "id": "G2-01",
132
+ "input": "الطلاب اجتهدو في الامتحان",
133
+ "desc": "اجتهدو — spelling should add ا, grammar may do different fix. Grammar wins (priority 3 > 1)",
134
+ },
135
+ {
136
+ "id": "G2-02",
137
+ "input": "البنات ذهبو الى البيت",
138
+ "desc": "ذهبو — spelling could give ذهبوا, grammar could give ذهبن (fem). Grammar wins.",
139
+ },
140
+ {
141
+ "id": "G2-03",
142
+ "input": "وفجأه سقطت الكتب",
143
+ "desc": "وفجأه — spelling may fix ه→ة; punctuation may want comma after it. Overlap?",
144
+ },
145
+ ]
146
+
147
+ for test in gap2_tests:
148
+ log(f" {test['id']}: {test['input']}")
149
+ a_sp = api_call("/api/spelling", test['input'])
150
+ a_gr = api_call("/api/grammar", test['input'])
151
+ a_pu = api_call("/api/punctuation", test['input'])
152
+ b = api_call("/api/analyze", test['input'])
153
+
154
+ a_sp_out = a_sp.get("corrected_text", test['input'])
155
+ a_gr_out = a_gr.get("corrected_text", test['input'])
156
+ a_pu_out = a_pu.get("corrected_text", test['input'])
157
+
158
+ log(f" A_spell: {a_sp_out}")
159
+ log(f" A_gram: {a_gr_out}")
160
+ log(f" A_punc: {a_pu_out}")
161
+ log(f" B_final: {b.get('corrected','')}")
162
+ log(f" B_sugg: {len(b.get('suggestions',[]))}")
163
+
164
+ # Which stage's correction won for each word?
165
+ b_sugg = b.get('suggestions', [])
166
+ for s in b_sugg:
167
+ log(f" [{s.get('type','')}] [{s.get('start',0)}:{s.get('end',0)}] '{s.get('original','')}' → '{s.get('correction','')}'")
168
+
169
+ test['a_spelling'] = a_sp_out
170
+ test['a_grammar'] = a_gr_out
171
+ test['a_punctuation'] = a_pu_out
172
+ test['b_corrected'] = b.get('corrected', '')
173
+ test['b_suggestions'] = b_sugg
174
+
175
+ results['gap2_priority'] = gap2_tests
176
+
177
+ # ═══════════════════════════════════════════════════════════════
178
+ # GAP 3: Systematic dropped patch logging (Cat 3)
179
+ # ═══════════════════════════════════════════════════════════════
180
+ log("\n=== GAP 3: Systematic dropped patch comparison (Cat 3) ===")
181
+
182
+ # For each test: run all 3 models independently, count expected patches,
183
+ # compare with actual Track B patches. Any patch Track A produces but
184
+ # Track B doesn't = dropped patch.
185
+ gap3_tests = [
186
+ "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده",
187
+ "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
188
+ "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
189
+ "ذهب الولد الى المكتبه وقرا كتاب مفيد",
190
+ "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
191
+ ]
192
+
193
+ for i, text in enumerate(gap3_tests):
194
+ log(f" Test {i+1}: {text[:50]}...")
195
+ a_sp = api_call("/api/spelling", text)
196
+ a_gr = api_call("/api/grammar", text)
197
+ a_pu = api_call("/api/punctuation", text)
198
+ b = api_call("/api/analyze", text)
199
+
200
+ a_sp_out = a_sp.get("corrected_text", text)
201
+ a_gr_out = a_gr.get("corrected_text", text)
202
+ a_pu_out = a_pu.get("corrected_text", text)
203
+
204
+ # Find word-level changes from each model
205
+ def word_diffs(orig, corrected):
206
+ o_words = orig.split()
207
+ c_words = corrected.split()
208
+ diffs = []
209
+ for j, (ow, cw) in enumerate(zip(o_words, c_words)):
210
+ if ow != cw:
211
+ diffs.append({"word_idx": j, "original": ow, "corrected": cw})
212
+ return diffs
213
+
214
+ sp_diffs = word_diffs(text, a_sp_out)
215
+ gr_diffs = word_diffs(text, a_gr_out)
216
+ pu_diffs = word_diffs(text, a_pu_out)
217
+
218
+ b_sugg = b.get('suggestions', [])
219
+ b_corrections = set()
220
+ for s in b_sugg:
221
+ b_corrections.add(s.get('original', ''))
222
+
223
+ # Track A produced these corrections; check which survived to Track B
224
+ dropped_spell = [d for d in sp_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
225
+ dropped_gram = [d for d in gr_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
226
+ dropped_punc = [d for d in pu_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
227
+
228
+ log(f" Track A changes: spell={len(sp_diffs)}, gram={len(gr_diffs)}, punc={len(pu_diffs)}")
229
+ log(f" Track B suggestions: {len(b_sugg)}")
230
+ log(f" Dropped: spell={len(dropped_spell)}, gram={len(dropped_gram)}, punc={len(dropped_punc)}")
231
+
232
+ for d in dropped_spell:
233
+ log(f" DROPPED SPELL: '{d['original']}' → '{d['corrected']}' (reason: likely filter blocked)")
234
+ for d in dropped_gram:
235
+ log(f" DROPPED GRAM: '{d['original']}' → '{d['corrected']}' (reason: likely StageLocker)")
236
+ for d in dropped_punc:
237
+ log(f" DROPPED PUNC: '{d['original']}' → '{d['corrected']}' (reason: likely lock/cap/safety)")
238
+
239
+ results[f'gap3_dropped'] = {
240
+ "tests": [
241
+ {
242
+ "input": text,
243
+ "a_spell_diffs": word_diffs(text, api_call("/api/spelling", text).get("corrected_text", text)) if False else sp_diffs,
244
+ "a_gram_diffs": gr_diffs,
245
+ "a_punc_diffs": pu_diffs,
246
+ "b_suggestion_count": len(b_sugg),
247
+ "dropped_spell": dropped_spell,
248
+ "dropped_gram": dropped_gram,
249
+ "dropped_punc": dropped_punc,
250
+ }
251
+ for text, sp_diffs, gr_diffs, pu_diffs, b_sugg in [(text, sp_diffs, gr_diffs, pu_diffs, b_sugg)]
252
+ ]
253
+ }
254
+
255
+ # ═══════════════════════════════════════════════════════════════
256
+ # GAP 4: Rare/literary vocabulary (Cat 2)
257
+ # ═══════════════════════════════════════════════════════════════
258
+ log("\n=== GAP 4: Rare/literary vocabulary overcorrection (Cat 2) ===")
259
+
260
+ rare_tests = [
261
+ {"id": "R-01", "input": "استوقفني المشهد فتأملته مليا", "domain": "literary"},
262
+ {"id": "R-02", "input": "تستأثر القوى العظمى بالنفوذ الدولي", "domain": "political_literary"},
263
+ {"id": "R-03", "input": "استقطب المؤتمر ثلة من العلماء الأفذاذ", "domain": "formal_rare"},
264
+ {"id": "R-04", "input": "يتسنى للمرء أن يستشف الحقيقة من بين السطور", "domain": "literary_verb"},
265
+ {"id": "R-05", "input": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين", "domain": "oratory"},
266
+ {"id": "R-06", "input": "تمخض الاجتماع عن قرارات مصيرية", "domain": "formal_verb"},
267
+ {"id": "R-07", "input": "أرهقته المسغبة فاستكان للقدر", "domain": "classical"},
268
+ {"id": "R-08", "input": "نستشرف آفاق المستقبل بثقة واقتدار", "domain": "formal_speech"},
269
+ {"id": "R-09", "input": "اعتراه القلق فتملكه الأرق", "domain": "literary_psych"},
270
+ {"id": "R-10", "input": "استأنف العمل بعد فترة من التقاعس", "domain": "formal_verb"},
271
+ ]
272
+
273
+ fp_count = 0
274
+ for test in rare_tests:
275
+ a = api_call("/api/spelling", test['input'])
276
+ a_out = a.get("corrected_text", test['input'])
277
+ changed = a_out != test['input']
278
+ if changed:
279
+ fp_count += 1
280
+ log(f" ⚠ {test['id']}: '{test['input'][:40]}...' → '{a_out[:40]}...' [{test['domain']}]")
281
+ else:
282
+ log(f" ✓ {test['id']}: no change [{test['domain']}]")
283
+ test['output'] = a_out
284
+ test['changed'] = changed
285
+
286
+ log(f" Rare/literary FP rate: {fp_count}/{len(rare_tests)} ({fp_count*100//len(rare_tests)}%)")
287
+ results['gap4_rare'] = {"tests": rare_tests, "fp_count": fp_count, "total": len(rare_tests)}
288
+
289
+ # ═══════════════════════════════════════════════════════════════
290
+ # SAVE
291
+ # ═══════════════════════════════════════════════════════════════
292
+ output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_gaps.json')
293
+ with open(output_path, 'w', encoding='utf-8') as f:
294
+ json.dump(results, f, ensure_ascii=False, indent=2)
295
+ log(f"\nSaved to {output_path}")
tests/deep_dive_output.json ADDED
@@ -0,0 +1,671 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-06-20T19:00:06.993902+00:00",
3
+ "api_base": "https://bayan10-bayan-api.hf.space",
4
+ "health": {
5
+ "environment": "huggingface_spaces",
6
+ "mode": "hf_spaces_local",
7
+ "models": {
8
+ "autocomplete": true,
9
+ "grammar": true,
10
+ "punctuation": true,
11
+ "spelling": true,
12
+ "summarization": true
13
+ },
14
+ "note": "Free tier: summarization local, other models return input unchanged",
15
+ "status": "healthy",
16
+ "supabase": {
17
+ "configured": true
18
+ }
19
+ },
20
+ "pipeline_tests": [
21
+ {
22
+ "id": "C3-01",
23
+ "category": 3,
24
+ "input": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده",
25
+ "track_a": {
26
+ "spelling": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشدة",
27
+ "spelling_changed": true,
28
+ "grammar": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحدىهن وبدأت تبكي بشدة",
29
+ "grammar_changed": true,
30
+ "punctuation": "كانت الفتيات يلعبون في الحديقه وفجأه، سقطت احداهن وبدءت تبكي بشده",
31
+ "punctuation_changed": true
32
+ },
33
+ "track_b": {
34
+ "corrected": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحدىهن وبدأت تبكي بشدة.",
35
+ "suggestions": [
36
+ {
37
+ "alternatives": [],
38
+ "confidence": 1.0,
39
+ "correction": "يلعبن",
40
+ "end": 19,
41
+ "id": "e984c773-8d33-4a30-b5b8-49cee91e1095",
42
+ "locked": true,
43
+ "original": "يلعبون",
44
+ "priority": 3,
45
+ "start": 13,
46
+ "type": "grammar"
47
+ },
48
+ {
49
+ "alternatives": [],
50
+ "confidence": 1.0,
51
+ "correction": "إحدىهن وبدأت",
52
+ "end": 54,
53
+ "id": "38054ed7-9bd2-4e04-9314-b4a63b84ad07",
54
+ "locked": true,
55
+ "original": "احداهن وبدءت",
56
+ "priority": 3,
57
+ "start": 42,
58
+ "type": "grammar"
59
+ },
60
+ {
61
+ "alternatives": [],
62
+ "confidence": 0.8,
63
+ "correction": "بشدة.",
64
+ "end": 64,
65
+ "id": "16e72e95-6326-4365-a0f3-ad2602bcfc49",
66
+ "locked": true,
67
+ "original": "بشده",
68
+ "priority": 2,
69
+ "start": 60,
70
+ "type": "punctuation"
71
+ },
72
+ {
73
+ "alternatives": [],
74
+ "confidence": 1.0,
75
+ "correction": "الحديقة وفجأة",
76
+ "end": 36,
77
+ "id": "1de0b7c2-e2e5-45e3-8ba3-6fe062ee8fcc",
78
+ "locked": true,
79
+ "original": "الحديقه وفجأه",
80
+ "priority": 1,
81
+ "start": 23,
82
+ "type": "spelling"
83
+ }
84
+ ],
85
+ "timing_ms": {
86
+ "grammar_ms": 4561,
87
+ "punctuation_ms": 1492,
88
+ "spelling_ms": 1529,
89
+ "total_ms": 7587
90
+ }
91
+ }
92
+ },
93
+ {
94
+ "id": "C3-02",
95
+ "category": 3,
96
+ "input": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
97
+ "track_a": {
98
+ "spelling": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
99
+ "spelling_changed": false,
100
+ "grammar": "ان الذكاء الاصطناعي يلعب دورا هاما ولذلك يجب الاهتمام به",
101
+ "grammar_changed": true,
102
+ "punctuation": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك؛ يجب الاهتمام بة",
103
+ "punctuation_changed": true
104
+ },
105
+ "track_b": {
106
+ "corrected": "ان الذكاء الاصطناعي يلعب دورا هاما ولذلك يجب الاهتمام به",
107
+ "suggestions": [
108
+ {
109
+ "alternatives": [],
110
+ "confidence": 1.0,
111
+ "correction": "ولذلك",
112
+ "end": 41,
113
+ "id": "9870eb8d-0bf7-4a58-90cb-940b5475a37e",
114
+ "locked": true,
115
+ "original": "ولذالك",
116
+ "priority": 3,
117
+ "start": 35,
118
+ "type": "grammar"
119
+ },
120
+ {
121
+ "alternatives": [],
122
+ "confidence": 1.0,
123
+ "correction": "به",
124
+ "end": 57,
125
+ "id": "ea9f3fca-eee1-4597-8f4a-00f50558d510",
126
+ "locked": true,
127
+ "original": "بة",
128
+ "priority": 1,
129
+ "start": 55,
130
+ "type": "spelling"
131
+ }
132
+ ],
133
+ "timing_ms": {
134
+ "grammar_ms": 1304,
135
+ "punctuation_ms": 1050,
136
+ "spelling_ms": 1193,
137
+ "total_ms": 3549
138
+ }
139
+ }
140
+ },
141
+ {
142
+ "id": "C3-03",
143
+ "category": 3,
144
+ "input": "التزم الر��اضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب",
145
+ "track_a": {
146
+ "spelling": "التزم الرياضي بتناول وجبات الصحية وحساب سعادتة بدقة رغبة في بناء كتلة عملية قوية ويا له من التزام حديدي يثير الإعجاب",
147
+ "spelling_changed": true,
148
+ "grammar": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب",
149
+ "grammar_changed": false,
150
+ "punctuation": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة؛ في بناء كتلة عضلية قوية ويا له، من التزام حديدي يثير الإعجاب",
151
+ "punctuation_changed": true
152
+ },
153
+ "track_b": {
154
+ "corrected": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة؛ في بناء كتلة عضلية قوية ويا له، من التزام حديدي يثير الإعجاب",
155
+ "suggestions": [
156
+ {
157
+ "alternatives": [],
158
+ "confidence": 0.8,
159
+ "correction": "رغبة؛",
160
+ "end": 57,
161
+ "id": "b7e29bf0-2565-4b46-b815-58e1b56717c1",
162
+ "locked": true,
163
+ "original": "رغبة",
164
+ "priority": 2,
165
+ "start": 53,
166
+ "type": "punctuation"
167
+ },
168
+ {
169
+ "alternatives": [],
170
+ "confidence": 0.8,
171
+ "correction": "له،",
172
+ "end": 88,
173
+ "id": "6d1e2b65-d2a4-41f1-a803-ce06e93e79c4",
174
+ "locked": true,
175
+ "original": "له",
176
+ "priority": 2,
177
+ "start": 86,
178
+ "type": "punctuation"
179
+ }
180
+ ],
181
+ "timing_ms": {
182
+ "grammar_ms": 6012,
183
+ "punctuation_ms": 2080,
184
+ "spelling_ms": 2197,
185
+ "total_ms": 10291
186
+ }
187
+ }
188
+ },
189
+ {
190
+ "id": "C3-04",
191
+ "category": 3,
192
+ "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
193
+ "track_a": {
194
+ "spelling": "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
195
+ "spelling_changed": false,
196
+ "grammar": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام",
197
+ "grammar_changed": true,
198
+ "punctuation": "هذة المدينه جميله جدا ومناخها معتدل طوال العام.",
199
+ "punctuation_changed": true
200
+ },
201
+ "track_b": {
202
+ "corrected": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام.",
203
+ "suggestions": [
204
+ {
205
+ "alternatives": [],
206
+ "confidence": 0.8,
207
+ "correction": "العام.",
208
+ "end": 46,
209
+ "id": "a4de368f-4ae7-451a-bbe2-ff7fca6b3f3b",
210
+ "locked": true,
211
+ "original": "العام",
212
+ "priority": 2,
213
+ "start": 41,
214
+ "type": "punctuation"
215
+ },
216
+ {
217
+ "alternatives": [],
218
+ "confidence": 1.0,
219
+ "correction": "هذه المدينة جميلة",
220
+ "end": 17,
221
+ "id": "9ff77094-1e33-4946-a343-317f51b8b539",
222
+ "locked": true,
223
+ "original": "هذة المدينه جميله",
224
+ "priority": 1,
225
+ "start": 0,
226
+ "type": "spelling"
227
+ }
228
+ ],
229
+ "timing_ms": {
230
+ "grammar_ms": 1461,
231
+ "punctuation_ms": 804,
232
+ "spelling_ms": 970,
233
+ "total_ms": 3236
234
+ }
235
+ }
236
+ },
237
+ {
238
+ "id": "C3-05",
239
+ "category": 3,
240
+ "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
241
+ "track_a": {
242
+ "spelling": "الطلاب الذين اجتهد في دراستهم حقوق نتائج ممتازه في الامتحانات",
243
+ "spelling_changed": true,
244
+ "grammar": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
245
+ "grammar_changed": false,
246
+ "punctuation": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
247
+ "punctuation_changed": false
248
+ },
249
+ "track_b": {
250
+ "corrected": "",
251
+ "suggestions": [],
252
+ "timing_ms": {}
253
+ }
254
+ },
255
+ {
256
+ "id": "C4-01",
257
+ "category": 4,
258
+ "input": "كانت الفتيات يلعبون في الحديقه",
259
+ "runs": [
260
+ {
261
+ "run": 1,
262
+ "corrected": "",
263
+ "suggestions": []
264
+ },
265
+ {
266
+ "run": 2,
267
+ "corrected": "",
268
+ "suggestions": []
269
+ },
270
+ {
271
+ "run": 3,
272
+ "corrected": "",
273
+ "suggestions": []
274
+ }
275
+ ],
276
+ "deterministic": true
277
+ },
278
+ {
279
+ "id": "C4-02",
280
+ "category": 4,
281
+ "input": "ذهب الى المدرسه وقابل المعلمه",
282
+ "runs": [
283
+ {
284
+ "run": 1,
285
+ "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
286
+ "suggestions": [
287
+ {
288
+ "alternatives": [],
289
+ "confidence": 0.8,
290
+ "correction": "المعلمة.",
291
+ "end": 29,
292
+ "id": "3579ef5d-9295-46a9-8056-5a0b15dced2d",
293
+ "locked": true,
294
+ "original": "المعلمه",
295
+ "priority": 2,
296
+ "start": 22,
297
+ "type": "punctuation"
298
+ },
299
+ {
300
+ "alternatives": [],
301
+ "confidence": 1.0,
302
+ "correction": "المدرسة",
303
+ "end": 15,
304
+ "id": "af7b8dd8-f85a-4632-a7c9-b9b733d7e019",
305
+ "locked": true,
306
+ "original": "المدرسه",
307
+ "priority": 1,
308
+ "start": 8,
309
+ "type": "spelling"
310
+ },
311
+ {
312
+ "alternatives": [
313
+ "إلى",
314
+ "ال",
315
+ "الم",
316
+ "الى"
317
+ ],
318
+ "confidence": 0.9,
319
+ "correction": "إلى",
320
+ "end": 7,
321
+ "id": "cd3a78f0-afbc-42d0-8bba-c60ce884dfdf",
322
+ "locked": true,
323
+ "original": "الى",
324
+ "priority": 1,
325
+ "start": 4,
326
+ "type": "spelling"
327
+ }
328
+ ]
329
+ },
330
+ {
331
+ "run": 2,
332
+ "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
333
+ "suggestions": [
334
+ {
335
+ "alternatives": [],
336
+ "confidence": 0.8,
337
+ "correction": "المعلمة.",
338
+ "end": 29,
339
+ "id": "4263a3c3-69cc-40a7-884f-a6e9bfd17eb1",
340
+ "locked": true,
341
+ "original": "المعلمه",
342
+ "priority": 2,
343
+ "start": 22,
344
+ "type": "punctuation"
345
+ },
346
+ {
347
+ "alternatives": [],
348
+ "confidence": 1.0,
349
+ "correction": "المدرسة",
350
+ "end": 15,
351
+ "id": "3c062f0a-95b6-4eee-bd80-36fc9b295206",
352
+ "locked": true,
353
+ "original": "المدرسه",
354
+ "priority": 1,
355
+ "start": 8,
356
+ "type": "spelling"
357
+ },
358
+ {
359
+ "alternatives": [
360
+ "إلى",
361
+ "ال",
362
+ "الم",
363
+ "الى"
364
+ ],
365
+ "confidence": 0.9,
366
+ "correction": "إلى",
367
+ "end": 7,
368
+ "id": "beb1ecbe-3278-47d5-bb14-d28f1eec5b47",
369
+ "locked": true,
370
+ "original": "الى",
371
+ "priority": 1,
372
+ "start": 4,
373
+ "type": "spelling"
374
+ }
375
+ ]
376
+ },
377
+ {
378
+ "run": 3,
379
+ "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
380
+ "suggestions": [
381
+ {
382
+ "alternatives": [],
383
+ "confidence": 0.8,
384
+ "correction": "المعلمة.",
385
+ "end": 29,
386
+ "id": "5361ba1b-5c5f-4740-84be-1c4d96c665db",
387
+ "locked": true,
388
+ "original": "المعلمه",
389
+ "priority": 2,
390
+ "start": 22,
391
+ "type": "punctuation"
392
+ },
393
+ {
394
+ "alternatives": [],
395
+ "confidence": 1.0,
396
+ "correction": "المدرسة",
397
+ "end": 15,
398
+ "id": "f0450147-9d7a-4754-a4fe-403a07219c39",
399
+ "locked": true,
400
+ "original": "المدرسه",
401
+ "priority": 1,
402
+ "start": 8,
403
+ "type": "spelling"
404
+ },
405
+ {
406
+ "alternatives": [
407
+ "إلى",
408
+ "ال",
409
+ "الم",
410
+ "الى"
411
+ ],
412
+ "confidence": 0.9,
413
+ "correction": "إلى",
414
+ "end": 7,
415
+ "id": "a8278394-1555-4d01-ba94-1325efc0a97c",
416
+ "locked": true,
417
+ "original": "الى",
418
+ "priority": 1,
419
+ "start": 4,
420
+ "type": "spelling"
421
+ }
422
+ ]
423
+ }
424
+ ],
425
+ "deterministic": true
426
+ },
427
+ {
428
+ "id": "C4-03",
429
+ "category": 4,
430
+ "input": "ان الطالبات ذهبو الى الجامعه",
431
+ "runs": [
432
+ {
433
+ "run": 1,
434
+ "corrected": "إن الطالبات ذهبن ذه��وا الجامعة.",
435
+ "suggestions": [
436
+ {
437
+ "alternatives": [],
438
+ "confidence": 1.0,
439
+ "correction": "ذهبن",
440
+ "end": 16,
441
+ "id": "bc1d01e1-8d6b-4bda-bbe0-199e841d0f3d",
442
+ "locked": true,
443
+ "original": "ذهبو",
444
+ "priority": 3,
445
+ "start": 12,
446
+ "type": "grammar"
447
+ },
448
+ {
449
+ "alternatives": [],
450
+ "confidence": 0.8,
451
+ "correction": "الجامعة.",
452
+ "end": 28,
453
+ "id": "8cdb866c-0c6f-4cb1-a4ef-d00be9b455f7",
454
+ "locked": true,
455
+ "original": "الجامعه",
456
+ "priority": 2,
457
+ "start": 21,
458
+ "type": "punctuation"
459
+ },
460
+ {
461
+ "alternatives": [],
462
+ "confidence": 1.0,
463
+ "correction": "إن",
464
+ "end": 2,
465
+ "id": "027f98a7-668c-463f-9ecc-acaad6b959b2",
466
+ "locked": true,
467
+ "original": "ان",
468
+ "priority": 1,
469
+ "start": 0,
470
+ "type": "spelling"
471
+ },
472
+ {
473
+ "alternatives": [
474
+ "ذهبوا",
475
+ "ال",
476
+ "الم",
477
+ "الى"
478
+ ],
479
+ "confidence": 0.9,
480
+ "correction": "ذهبوا",
481
+ "end": 20,
482
+ "id": "8aee308b-6200-4c92-b6d1-95333a112ce0",
483
+ "locked": true,
484
+ "original": "الى",
485
+ "priority": 1,
486
+ "start": 17,
487
+ "type": "spelling"
488
+ }
489
+ ]
490
+ },
491
+ {
492
+ "run": 2,
493
+ "corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
494
+ "suggestions": [
495
+ {
496
+ "alternatives": [],
497
+ "confidence": 1.0,
498
+ "correction": "ذهبن",
499
+ "end": 16,
500
+ "id": "0c9ec931-ea50-423c-8429-89a100e1c226",
501
+ "locked": true,
502
+ "original": "ذهبو",
503
+ "priority": 3,
504
+ "start": 12,
505
+ "type": "grammar"
506
+ },
507
+ {
508
+ "alternatives": [],
509
+ "confidence": 0.8,
510
+ "correction": "الجامعة.",
511
+ "end": 28,
512
+ "id": "c67960b7-36f0-480a-8e85-716c57465107",
513
+ "locked": true,
514
+ "original": "الجامعه",
515
+ "priority": 2,
516
+ "start": 21,
517
+ "type": "punctuation"
518
+ },
519
+ {
520
+ "alternatives": [],
521
+ "confidence": 1.0,
522
+ "correction": "إن",
523
+ "end": 2,
524
+ "id": "787d7736-29aa-4625-90ad-e1248acb2d48",
525
+ "locked": true,
526
+ "original": "ان",
527
+ "priority": 1,
528
+ "start": 0,
529
+ "type": "spelling"
530
+ },
531
+ {
532
+ "alternatives": [
533
+ "ذهبوا",
534
+ "ال",
535
+ "الم",
536
+ "الى"
537
+ ],
538
+ "confidence": 0.9,
539
+ "correction": "ذهبوا",
540
+ "end": 20,
541
+ "id": "69c96488-d579-441c-89ea-3b66477f1f2d",
542
+ "locked": true,
543
+ "original": "الى",
544
+ "priority": 1,
545
+ "start": 17,
546
+ "type": "spelling"
547
+ }
548
+ ]
549
+ },
550
+ {
551
+ "run": 3,
552
+ "corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
553
+ "suggestions": [
554
+ {
555
+ "alternatives": [],
556
+ "confidence": 1.0,
557
+ "correction": "ذهبن",
558
+ "end": 16,
559
+ "id": "e9626053-e05b-4774-bd33-2155ee6d7fba",
560
+ "locked": true,
561
+ "original": "ذهبو",
562
+ "priority": 3,
563
+ "start": 12,
564
+ "type": "grammar"
565
+ },
566
+ {
567
+ "alternatives": [],
568
+ "confidence": 0.8,
569
+ "correction": "الجامعة.",
570
+ "end": 28,
571
+ "id": "4ecab998-db9d-47b5-a835-a4516a38b1ae",
572
+ "locked": true,
573
+ "original": "الجامعه",
574
+ "priority": 2,
575
+ "start": 21,
576
+ "type": "punctuation"
577
+ },
578
+ {
579
+ "alternatives": [],
580
+ "confidence": 1.0,
581
+ "correction": "إن",
582
+ "end": 2,
583
+ "id": "864a48a7-d61a-4c9e-8953-72826c279d48",
584
+ "locked": true,
585
+ "original": "ان",
586
+ "priority": 1,
587
+ "start": 0,
588
+ "type": "spelling"
589
+ },
590
+ {
591
+ "alternatives": [
592
+ "ذهبوا",
593
+ "ال",
594
+ "الم",
595
+ "الى"
596
+ ],
597
+ "confidence": 0.9,
598
+ "correction": "ذهبوا",
599
+ "end": 20,
600
+ "id": "05c65f7f-14f4-474c-bff8-0ce52ce5cf5b",
601
+ "locked": true,
602
+ "original": "الى",
603
+ "priority": 1,
604
+ "start": 17,
605
+ "type": "spelling"
606
+ }
607
+ ]
608
+ }
609
+ ],
610
+ "deterministic": true
611
+ }
612
+ ],
613
+ "boundary_tests": [
614
+ {
615
+ "id": "BOUND-299",
616
+ "category": 3,
617
+ "input_len": 299,
618
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
619
+ "has_spelling_suggestions": false,
620
+ "total_suggestions": 6,
621
+ "timing": {
622
+ "grammar_ms": 5256,
623
+ "punctuation_ms": 5490,
624
+ "spelling_ms": 32835,
625
+ "total_ms": 43584
626
+ }
627
+ },
628
+ {
629
+ "id": "BOUND-300",
630
+ "category": 3,
631
+ "input_len": 300,
632
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
633
+ "has_spelling_suggestions": false,
634
+ "total_suggestions": 9,
635
+ "timing": {
636
+ "grammar_ms": 11035,
637
+ "punctuation_ms": 5849,
638
+ "spelling_ms": 18786,
639
+ "total_ms": 35674
640
+ }
641
+ },
642
+ {
643
+ "id": "BOUND-301",
644
+ "category": 3,
645
+ "input_len": 301,
646
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
647
+ "has_spelling_suggestions": false,
648
+ "total_suggestions": 9,
649
+ "timing": {
650
+ "grammar_ms": 12363,
651
+ "punctuation_ms": 6256,
652
+ "spelling_ms": 3209,
653
+ "total_ms": 21833
654
+ }
655
+ },
656
+ {
657
+ "id": "BOUND-500",
658
+ "category": 3,
659
+ "input_len": 500,
660
+ "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
661
+ "has_spelling_suggestions": false,
662
+ "total_suggestions": 23,
663
+ "timing": {
664
+ "grammar_ms": 18635,
665
+ "punctuation_ms": 12917,
666
+ "spelling_ms": 0,
667
+ "total_ms": 31560
668
+ }
669
+ }
670
+ ]
671
+ }
tests/deep_dive_test.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BAYAN Deep-Dive Test Harness — Track A (Raw Models via API) & Track B (Full Pipeline via API)
3
+
4
+ Uses the deployed HF Space API (bayan10/bayan-api) instead of loading models locally.
5
+ This avoids the 1GB model download hang and tests the ACTUAL production behavior.
6
+
7
+ Track A: /api/spelling, /api/grammar, /api/punctuation (individual model endpoints)
8
+ Track B: /api/analyze (full pipeline with StageLocker, OffsetMapper, PatchSet)
9
+
10
+ Usage:
11
+ python tests/deep_dive_test.py --stage spelling
12
+ python tests/deep_dive_test.py --stage grammar
13
+ python tests/deep_dive_test.py --stage punctuation
14
+ python tests/deep_dive_test.py --stage pipeline
15
+ python tests/deep_dive_test.py --stage all
16
+ """
17
+
18
+ import sys, os, re, json, time, argparse
19
+ from datetime import datetime, timezone
20
+
21
+ # ═══════════════════════════════════════════════════════════════════
22
+ # API CLIENT
23
+ # ═══════════════════════════════════════════════════════════════════
24
+
25
+ import requests
26
+
27
+ API_BASE = "https://bayan10-bayan-api.hf.space"
28
+ TIMEOUT = 60 # seconds per request
29
+
30
+ def api_call(endpoint, text, retries=2):
31
+ """Call the deployed API with retry."""
32
+ url = f"{API_BASE}{endpoint}"
33
+ for attempt in range(retries + 1):
34
+ try:
35
+ t0 = time.time()
36
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
37
+ elapsed = int((time.time() - t0) * 1000)
38
+ if resp.status_code == 200:
39
+ data = resp.json()
40
+ data['_elapsed_ms'] = elapsed
41
+ data['_timestamp'] = datetime.now(timezone.utc).isoformat()
42
+ return data
43
+ else:
44
+ if attempt < retries:
45
+ time.sleep(2)
46
+ continue
47
+ return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
48
+ except requests.exceptions.Timeout:
49
+ if attempt < retries:
50
+ time.sleep(2)
51
+ continue
52
+ return {"error": f"Timeout after {TIMEOUT}s", "_elapsed_ms": TIMEOUT * 1000}
53
+ except Exception as e:
54
+ return {"error": str(e)}
55
+
56
+ # ═══════════════════════════════════════════════════════════════════
57
+ # TRACK A — RAW MODEL CALLS (individual endpoints, no pipeline)
58
+ # ═══════════════════════════════════════════════════════════════════
59
+
60
+ def track_a_spelling(text):
61
+ """Call /api/spelling — raw AraSpell output."""
62
+ result = api_call("/api/spelling", text)
63
+ if "error" in result:
64
+ return {"input": text, "output": text, "error": result["error"], "changed": False}
65
+ corrected = result.get("corrected_text", text)
66
+ return {
67
+ "input": text, "output": corrected, "changed": corrected != text,
68
+ "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
69
+ }
70
+
71
+ def track_a_grammar(text):
72
+ """Call /api/grammar — raw grammar model output."""
73
+ result = api_call("/api/grammar", text)
74
+ if "error" in result:
75
+ return {"input": text, "output": text, "error": result["error"], "changed": False}
76
+ corrected = result.get("corrected_text", text)
77
+ return {
78
+ "input": text, "output": corrected, "changed": corrected != text,
79
+ "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
80
+ }
81
+
82
+ def track_a_punctuation(text):
83
+ """Call /api/punctuation — raw PuncAra output."""
84
+ result = api_call("/api/punctuation", text)
85
+ if "error" in result:
86
+ return {"input": text, "output": text, "error": result["error"], "changed": False}
87
+ corrected = result.get("corrected_text", text)
88
+ marks_before = sum(1 for c in text if c in '.,;:!?،؛؟')
89
+ marks_after = sum(1 for c in corrected if c in '.,;:!?،؛؟')
90
+ return {
91
+ "input": text, "output": corrected, "changed": corrected != text,
92
+ "marks_added": marks_after - marks_before,
93
+ "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
94
+ }
95
+
96
+ # ═══════════════════════════════════════════════════════════════════
97
+ # TRACK B — FULL PIPELINE (/api/analyze)
98
+ # ═══════════════════════════════════════════════════════════════════
99
+
100
+ def track_b_analyze(text):
101
+ """Call /api/analyze — full pipeline with all stages."""
102
+ result = api_call("/api/analyze", text)
103
+ if "error" in result and "status" not in result:
104
+ return {"input": text, "error": result["error"], "suggestions": []}
105
+ return {
106
+ "input": text,
107
+ "original": result.get("original", text),
108
+ "corrected": result.get("corrected", text),
109
+ "suggestions": result.get("suggestions", []),
110
+ "timing_ms": result.get("timing_ms", {}),
111
+ "elapsed_ms": result.get("_elapsed_ms"),
112
+ "timestamp": result.get("_timestamp"),
113
+ }
114
+
115
+ # ═══════════════════════════════════════════════════════════════════
116
+ # TEST INPUTS — ALL CATEGORIES
117
+ # ═══════════════════════════════════════════════════════════════════
118
+
119
+ CAT2_OVERCORRECTION = [
120
+ {"id": "C2-01", "input": "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها", "domain": "news"},
121
+ {"id": "C2-02", "input": "يعد نهر النيل أطول أنهار العالم", "domain": "news"},
122
+ {"id": "C2-03", "input": "بسم الله الرحمن الرحيم", "domain": "religious"},
123
+ {"id": "C2-04", "input": "إنا لله وإنا إليه راجعون", "domain": "religious"},
124
+ {"id": "C2-05", "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق", "domain": "technical"},
125
+ {"id": "C2-06", "input": "سافر محمد إلى دبي للعمل في شركة جوجل", "domain": "proper_nouns"},
126
+ {"id": "C2-07", "input": "الرئيس عبد الفتاح السيسي رئيس جمهورية مصر العربية", "domain": "proper_nouns"},
127
+ {"id": "C2-08", "input": "استوقفني المشهد فتأملته مليا", "domain": "literary"},
128
+ {"id": "C2-09", "input": "أضحى التعليم الإلكتروني ضرورة ملحة في عصرنا الحالي", "domain": "formal"},
129
+ {"id": "C2-10", "input": "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية", "domain": "weather"},
130
+ ]
131
+
132
+ CAT8_CLITIC_ROOTS = [
133
+ ('مدرسة', 'moon'), # Moon letter
134
+ ('شمس', 'sun'), # Sun letter
135
+ ('أمة', 'hamza'), # Hamza-initial
136
+ ('نافذة', 'long'), # Long word
137
+ ('علم', 'short'), # Short 3-letter root
138
+ ('اقتصاد', 'alef'), # Alef-initial, long
139
+ ]
140
+ CAT8_PREFIXES = [("bare", ""), ("wa", "و"), ("ba", "ب"), ("la", "ل"), ("ka", "ك")]
141
+ CAT8_TESTS = []
142
+ for root, root_type in CAT8_CLITIC_ROOTS:
143
+ for pfx_name, pfx in CAT8_PREFIXES:
144
+ word = pfx + root
145
+ CAT8_TESTS.append({
146
+ "id": f"C8-{root}-{pfx_name}", "input": word, "root": root,
147
+ "root_type": root_type, "prefix": pfx, "expected": word,
148
+ })
149
+
150
+ CAT9_CONFUSABLE = [
151
+ # === Isolation tests ===
152
+ {"id": "C9-01a", "input": "ان", "context": "isolation", "concern": "should→أن/إن NOT كان"},
153
+ {"id": "C9-01b", "input": "كان", "context": "isolation", "concern": "stays كان"},
154
+ {"id": "C9-02a", "input": "إلى", "context": "isolation", "concern": "stays إلى"},
155
+ {"id": "C9-02b", "input": "على", "context": "isolation", "concern": "stays على"},
156
+ {"id": "C9-03a", "input": "هذا", "context": "isolation", "concern": "stays هذا"},
157
+ {"id": "C9-03b", "input": "هذه", "context": "isolation", "concern": "stays هذه"},
158
+ {"id": "C9-03c", "input": "هذة", "context": "isolation", "concern": "misspelling→هذه"},
159
+ {"id": "C9-04a", "input": "لكن", "context": "isolation", "concern": "stays لكن"},
160
+ {"id": "C9-04b", "input": "لاكن", "context": "isolation", "concern": "misspelling→لكن"},
161
+ {"id": "C9-05a", "input": "ذلك", "context": "isolation", "concern": "stays ذلك"},
162
+ {"id": "C9-05b", "input": "ذالك", "context": "isolation", "concern": "misspelling→ذلك"},
163
+ {"id": "C9-06a", "input": "الى", "context": "isolation", "concern": "should→إلى"},
164
+ # === Sentence-context tests ===
165
+ {"id": "C9-S01", "input": "ان الحياة جميلة", "context": "sentence", "concern": "ان→أن/إن NOT كان"},
166
+ {"id": "C9-S02", "input": "كان الرجل طيبا", "context": "sentence", "concern": "كان stays"},
167
+ {"id": "C9-S03", "input": "ذهب الى المدرسة", "context": "sentence", "concern": "الى→إلى"},
168
+ {"id": "C9-S04", "input": "جلس على الكرسي", "context": "sentence", "concern": "على stays"},
169
+ {"id": "C9-S05", "input": "هذة المدينة جميلة", "context": "sentence", "concern": "هذة→هذه"},
170
+ {"id": "C9-S06", "input": "هو ذكي لاكن كسول", "context": "sentence", "concern": "لاكن→لكن"},
171
+ {"id": "C9-S07", "input": "ذالك الكتاب مفيد", "context": "sentence", "concern": "ذالك→ذلك"},
172
+ {"id": "C9-S08", "input": "هذا البيت كبير", "context": "sentence", "concern": "هذا stays"},
173
+ {"id": "C9-S09", "input": "هذه السيارة سريعة", "context": "sentence", "concern": "هذه stays"},
174
+ {"id": "C9-S10", "input": "سافر إلى القاهرة", "context": "sentence", "concern": "إلى stays"},
175
+ {"id": "C9-S11", "input": "جلس على المقعد", "context": "sentence", "concern": "على stays"},
176
+ {"id": "C9-S12", "input": "ان الذكاء مهم لكن الاجتهاد اهم", "context": "sentence", "concern": "ان→أن, لكن stays"},
177
+ ]
178
+
179
+ CAT10_EDGE_CASES = [
180
+ {"id": "C10-01", "input": "كَتَبَ الطَّالِبُ الدَّرسَ", "concern": "tashkeel_present"},
181
+ {"id": "C10-02", "input": "كتب الطالب الدرس", "concern": "tashkeel_absent"},
182
+ {"id": "C10-03", "input": "قرأ إبراهيم آيات من القرآن", "concern": "alef_forms"},
183
+ {"id": "C10-04", "input": "مشى الفتى إلى المستشفى", "concern": "ya_alef_maksura"},
184
+ {"id": "C10-05", "input": "ذهبت إلى المدرسة", "concern": "ta_marbuta"},
185
+ {"id": "C10-06", "input": "جاء ١٢٣ طالبا", "concern": "arabic_indic_digits"},
186
+ {"id": "C10-07", "input": "جاء 123 طالبا", "concern": "western_digits"},
187
+ {"id": "C10-08", "input": "يعمل في شركة Google في القاهرة", "concern": "latin_in_arabic"},
188
+ {"id": "C10-09", "input": "انا رايح المدرسة النهارده", "concern": "egyptian_dialect"},
189
+ {"id": "C10-10", "input": "الموضوع ده كويس جدااااا", "concern": "repeated_letters"},
190
+ {"id": "C10-11", "input": "مسؤول عن الشؤون الداخلية", "concern": "hamza_on_waw"},
191
+ {"id": "C10-12", "input": "بيئة العمل مليئة بالتحديات", "concern": "hamza_on_ya"},
192
+ {"id": "C10-13", "input": "الكتاب الذى قرأته مفيد", "concern": "ya_in_الذي"},
193
+ {"id": "C10-14", "input": "خطأ الطالب في الامتحان", "concern": "hamza_standalone"},
194
+ {"id": "C10-15", "input": "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين", "concern": "religious_long"},
195
+ ]
196
+
197
+ CAT5_PUNC_SANITY = [
198
+ {"id": "C5-01", "input": "ذهب إلى المدرسة", "length": "short_3w"},
199
+ {"id": "C5-02", "input": "هل تعلم أن الأرض تدور حول الشمس كل عام", "length": "medium_9w"},
200
+ {"id": "C5-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب", "length": "long_20w"},
201
+ {"id": "C5-04", "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب", "length": "medium_imperative"},
202
+ {"id": "C5-05", "input": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحداهن وبدأت تبكي بشدة", "length": "long_narrative"},
203
+ ]
204
+
205
+ CAT6_PUNC_POSITION = [
206
+ {"id": "C6-01", "input": "ذهب محمد إلى المدرسة ودرس جيدا ثم عاد إلى البيت"},
207
+ {"id": "C6-02", "input": "إن الذكاء الاصطناعي يلعب دورا هاما لذلك يجب الاهتمام به"},
208
+ {"id": "C6-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب"},
209
+ {"id": "C6-04", "input": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحداهن وبدأت تبكي بشدة"},
210
+ {"id": "C6-05", "input": "هل تعلم أن القاهرة هي عاصمة مصر وتقع على ضفاف نهر النيل"},
211
+ {"id": "C6-06", "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب"},
212
+ {"id": "C6-07", "input": "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار"},
213
+ {"id": "C6-08", "input": "رغم صعوبة الامتحان إلا أن الطلاب حققوا نتائج مبهرة"},
214
+ {"id": "C6-09", "input": "سافر العالم إلى عدة دول لحضور المؤتمرات العلمية ونشر أبحاثه"},
215
+ {"id": "C6-10", "input": "يا بني اجتهد في دراستك فالعلم نور والجهل ظلام"},
216
+ ]
217
+
218
+ # ═══════════════════════════════════════════════════════════════════
219
+ # RUNNERS
220
+ # ═══════════════════════════════════════════════════════════════════
221
+
222
+ def log(msg):
223
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
224
+
225
+ def run_spelling_tests():
226
+ results = []
227
+
228
+ log("=== Category 2: Overcorrection (10 tests) ===")
229
+ for test in CAT2_OVERCORRECTION:
230
+ log(f" {test['id']}: {test['input'][:50]}...")
231
+ a = track_a_spelling(test['input'])
232
+ b = track_b_analyze(test['input'])
233
+ fp = a.get('changed', False)
234
+ result = {
235
+ "id": test['id'], "category": 2, "input": test['input'],
236
+ "domain": test['domain'],
237
+ "track_a_spelling": a['output'],
238
+ "track_a_changed": a.get('changed', False),
239
+ "track_b_suggestions": len(b.get('suggestions', [])),
240
+ "track_b_corrected": b.get('corrected', ''),
241
+ "is_false_positive": fp,
242
+ }
243
+ status = "⚠ FP" if fp else "✓"
244
+ log(f" {status} A:'{a['output'][:60]}' B_sugg:{len(b.get('suggestions',[]))}")
245
+ results.append(result)
246
+
247
+ log("\n=== Category 8: Clitic/Prefix (30 tests) ===")
248
+ for test in CAT8_TESTS:
249
+ a = track_a_spelling(test['input'])
250
+ changed = a.get('changed', False)
251
+ if changed:
252
+ # Classify: did it preserve root or mangle it?
253
+ output = a['output']
254
+ root_preserved = test['root'] in output or any(
255
+ test['root'][:-1] in output # partial root match
256
+ for _ in [1]
257
+ )
258
+ classification = "root_fixed" if root_preserved else "prefix_mangled"
259
+ else:
260
+ classification = "correct"
261
+ result = {
262
+ "id": test['id'], "category": 8, "input": test['input'],
263
+ "root": test['root'], "root_type": test['root_type'],
264
+ "prefix": test['prefix'],
265
+ "track_a_spelling": a['output'], "changed": changed,
266
+ "classification": classification,
267
+ }
268
+ if changed:
269
+ log(f" ⚠ {test['id']}: '{test['input']}' → '{a['output']}' [{classification}]")
270
+ results.append(result)
271
+
272
+ log("\n=== Category 9: Confusable Words (24 tests) ===")
273
+ for test in CAT9_CONFUSABLE:
274
+ a = track_a_spelling(test['input'])
275
+ result = {
276
+ "id": test['id'], "category": 9, "input": test['input'],
277
+ "context": test['context'], "concern": test['concern'],
278
+ "track_a_spelling": a['output'], "changed": a.get('changed', False),
279
+ }
280
+ if a.get('changed'):
281
+ log(f" ⚠ {test['id']}: '{test['input']}' → '{a['output']}' (concern: {test['concern']})")
282
+ else:
283
+ log(f" ✓ {test['id']}: no change")
284
+ results.append(result)
285
+
286
+ log("\n=== Category 10: Arabic Edge Cases (15 tests) ===")
287
+ for test in CAT10_EDGE_CASES:
288
+ a = track_a_spelling(test['input'])
289
+ result = {
290
+ "id": test['id'], "category": 10, "input": test['input'],
291
+ "concern": test['concern'],
292
+ "track_a_spelling": a['output'], "changed": a.get('changed', False),
293
+ }
294
+ if a.get('changed'):
295
+ log(f" ⚠ {test['id']}: '{test['input']}' → '{a['output']}' [{test['concern']}]")
296
+ else:
297
+ log(f" ✓ {test['id']}: no change [{test['concern']}]")
298
+ results.append(result)
299
+
300
+ return results
301
+
302
+ def run_punctuation_tests():
303
+ results = []
304
+
305
+ log("=== Category 5: Punctuation Sanity (5 tests) ===")
306
+ for test in CAT5_PUNC_SANITY:
307
+ log(f" {test['id']}: {test['input'][:50]}...")
308
+ a = track_a_punctuation(test['input'])
309
+ result = {
310
+ "id": test['id'], "category": 5, "input": test['input'],
311
+ "length": test['length'],
312
+ "track_a_punc": a['output'],
313
+ "marks_added": a.get('marks_added', 0),
314
+ "changed": a.get('changed', False),
315
+ }
316
+ log(f" Marks: +{a.get('marks_added', 0)} | Output: {a['output'][:80]}")
317
+ results.append(result)
318
+
319
+ log("\n=== Category 6: Punctuation Position (10 tests) ===")
320
+ for test in CAT6_PUNC_POSITION:
321
+ log(f" {test['id']}: {test['input'][:50]}...")
322
+ # Track A: raw punctuation on original text
323
+ a_punc = track_a_punctuation(test['input'])
324
+ # Track B: full pipeline
325
+ b = track_b_analyze(test['input'])
326
+
327
+ # Measure: where did Track A put punctuation marks?
328
+ a_marks = _find_punct_positions(test['input'], a_punc['output'])
329
+ # Measure: where did Track B put punctuation suggestions?
330
+ b_punc_sugg = [s for s in b.get('suggestions', []) if s.get('type') == 'punctuation']
331
+ b_marks = [(s.get('start', 0), s.get('end', 0), s.get('correction', '')) for s in b_punc_sugg]
332
+
333
+ result = {
334
+ "id": test['id'], "category": 6, "input": test['input'],
335
+ "track_a_punc_output": a_punc['output'],
336
+ "track_a_marks": a_marks,
337
+ "track_b_corrected": b.get('corrected', ''),
338
+ "track_b_punc_suggestions": b_punc_sugg,
339
+ "track_b_marks": b_marks,
340
+ }
341
+ log(f" A marks: {a_marks}")
342
+ log(f" B marks: {b_marks}")
343
+ results.append(result)
344
+
345
+ return results
346
+
347
+ def _find_punct_positions(original, punctuated):
348
+ """Find where punctuation was added by comparing original vs punctuated."""
349
+ PUNC = set('.,;:!?،؛؟')
350
+ marks = []
351
+ # Word-level alignment
352
+ orig_words = original.split()
353
+ punc_words = punctuated.split()
354
+ oi, pi = 0, 0
355
+ char_pos = 0
356
+ while oi < len(orig_words) and pi < len(punc_words):
357
+ o_base = ''.join(c for c in orig_words[oi] if c not in PUNC)
358
+ p_base = ''.join(c for c in punc_words[pi] if c not in PUNC)
359
+ if o_base == p_base:
360
+ # Same word — check for added punctuation
361
+ o_punc = set(c for c in orig_words[oi] if c in PUNC)
362
+ p_punc = set(c for c in punc_words[pi] if c in PUNC)
363
+ added = p_punc - o_punc
364
+ if added:
365
+ marks.append({
366
+ "word_index": oi, "word": orig_words[oi],
367
+ "after_word": orig_words[oi],
368
+ "marks_added": list(added),
369
+ "char_pos": char_pos,
370
+ })
371
+ char_pos += len(orig_words[oi]) + 1 # +1 for space
372
+ oi += 1
373
+ pi += 1
374
+ else:
375
+ # Mismatch — model changed the word
376
+ char_pos += len(orig_words[oi]) + 1
377
+ oi += 1
378
+ pi += 1
379
+ return marks
380
+
381
+ def run_pipeline_comparison():
382
+ """Run tests that need both Track A and Track B for comparison (Cat 1, 3, 4, 7)."""
383
+ results = []
384
+
385
+ # Cat 3: Integration-only — test where raw models work but pipeline might not
386
+ log("=== Category 3: Integration-Only (5 tests) ===")
387
+ integration_inputs = [
388
+ {"id": "C3-01", "input": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده"},
389
+ {"id": "C3-02", "input": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة"},
390
+ {"id": "C3-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب"},
391
+ {"id": "C3-04", "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام"},
392
+ {"id": "C3-05", "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات"},
393
+ ]
394
+ for test in integration_inputs:
395
+ log(f" {test['id']}: {test['input'][:50]}...")
396
+ a_spell = track_a_spelling(test['input'])
397
+ a_gram = track_a_grammar(test['input'])
398
+ a_punc = track_a_punctuation(test['input'])
399
+ b = track_b_analyze(test['input'])
400
+ result = {
401
+ "id": test['id'], "category": 3, "input": test['input'],
402
+ "track_a": {
403
+ "spelling": a_spell['output'], "spelling_changed": a_spell.get('changed'),
404
+ "grammar": a_gram['output'], "grammar_changed": a_gram.get('changed'),
405
+ "punctuation": a_punc['output'], "punctuation_changed": a_punc.get('changed'),
406
+ },
407
+ "track_b": {
408
+ "corrected": b.get('corrected', ''),
409
+ "suggestions": b.get('suggestions', []),
410
+ "timing_ms": b.get('timing_ms', {}),
411
+ }
412
+ }
413
+ log(f" A_spell: {a_spell['output'][:60]}")
414
+ log(f" A_gram: {a_gram['output'][:60]}")
415
+ log(f" A_punc: {a_punc['output'][:60]}")
416
+ log(f" B_final: {b.get('corrected','')[:60]}")
417
+ log(f" B_sugg: {len(b.get('suggestions',[]))}")
418
+ results.append(result)
419
+
420
+ # Cat 4: Overlap — run 3x for determinism
421
+ log("\n=== Category 4: Overlap Resolution (3 tests × 3 runs) ===")
422
+ overlap_inputs = [
423
+ {"id": "C4-01", "input": "كانت الفتيات يلعبون في الحديقه"},
424
+ {"id": "C4-02", "input": "ذهب الى المدرسه وقابل المعلمه"},
425
+ {"id": "C4-03", "input": "ان الطالبات ذهبو الى الجامعه"},
426
+ ]
427
+ for test in overlap_inputs:
428
+ runs = []
429
+ for run_idx in range(3):
430
+ b = track_b_analyze(test['input'])
431
+ runs.append({
432
+ "run": run_idx + 1,
433
+ "corrected": b.get('corrected', ''),
434
+ "suggestions": b.get('suggestions', []),
435
+ })
436
+ # Check determinism
437
+ all_same = all(r['corrected'] == runs[0]['corrected'] for r in runs)
438
+ result = {
439
+ "id": test['id'], "category": 4, "input": test['input'],
440
+ "runs": runs, "deterministic": all_same,
441
+ }
442
+ log(f" {test['id']}: deterministic={all_same}")
443
+ for r in runs:
444
+ log(f" Run {r['run']}: {r['corrected'][:60]} ({len(r['suggestions'])} sugg)")
445
+ results.append(result)
446
+
447
+ return results
448
+
449
+ # Boundary tests for spelling 300-char cutoff
450
+ def run_boundary_tests():
451
+ results = []
452
+ log("\n=== Boundary: Spelling 300-char cutoff ===")
453
+ base = "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات "
454
+ for target_len in [299, 300, 301, 500]:
455
+ text = (base * 10)[:target_len]
456
+ b = track_b_analyze(text)
457
+ has_spelling = any(s.get('type') == 'spelling' for s in b.get('suggestions', []))
458
+ result = {
459
+ "id": f"BOUND-{target_len}", "category": 3, "input_len": target_len,
460
+ "input": text[:80] + "...",
461
+ "has_spelling_suggestions": has_spelling,
462
+ "total_suggestions": len(b.get('suggestions', [])),
463
+ "timing": b.get('timing_ms', {}),
464
+ }
465
+ log(f" len={target_len}: spelling_active={has_spelling} suggestions={len(b.get('suggestions',[]))}")
466
+ results.append(result)
467
+ return results
468
+
469
+ # ═══════════════════════════════════════════════════════════════════
470
+ # MAIN
471
+ # ═══════════════════════════════════════════════════════════════════
472
+
473
+ def main():
474
+ parser = argparse.ArgumentParser(description='BAYAN Deep-Dive Test Harness')
475
+ parser.add_argument('--stage', choices=['spelling', 'grammar', 'punctuation', 'pipeline', 'all'],
476
+ default='spelling')
477
+ args = parser.parse_args()
478
+
479
+ all_results = {"timestamp": datetime.now(timezone.utc).isoformat(), "api_base": API_BASE}
480
+
481
+ # Health check
482
+ log(f"Checking API health at {API_BASE}...")
483
+ try:
484
+ resp = requests.get(f"{API_BASE}/api/health", timeout=10)
485
+ log(f" Health: {resp.status_code} — {resp.json()}")
486
+ all_results['health'] = resp.json()
487
+ except Exception as e:
488
+ log(f" ⚠ API unreachable: {e}")
489
+ all_results['health'] = {"error": str(e)}
490
+
491
+ if args.stage in ('spelling', 'all'):
492
+ log("\n══════ SPELLING TESTS (Cat 2, 8, 9, 10) ══════")
493
+ all_results['spelling_tests'] = run_spelling_tests()
494
+
495
+ if args.stage in ('punctuation', 'all'):
496
+ log("\n══════ PUNCTUATION TESTS (Cat 5, 6) ══════")
497
+ all_results['punctuation_tests'] = run_punctuation_tests()
498
+
499
+ if args.stage in ('pipeline', 'all'):
500
+ log("\n══════ PIPELINE TESTS (Cat 3, 4) ══════")
501
+ all_results['pipeline_tests'] = run_pipeline_comparison()
502
+ all_results['boundary_tests'] = run_boundary_tests()
503
+
504
+ # Save
505
+ output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_output.json')
506
+ with open(output_path, 'w', encoding='utf-8') as f:
507
+ json.dump(all_results, f, ensure_ascii=False, indent=2)
508
+ log(f"\nResults saved to {output_path}")
509
+
510
+ # Summary
511
+ for key in ['spelling_tests', 'punctuation_tests', 'pipeline_tests', 'boundary_tests']:
512
+ if key in all_results:
513
+ tests = all_results[key]
514
+ if isinstance(tests, list):
515
+ changed = sum(1 for t in tests if t.get('changed') or t.get('is_false_positive'))
516
+ log(f" {key}: {len(tests)} tests, {changed} with changes")
517
+
518
+ if __name__ == '__main__':
519
+ main()
tests/gap_filling_results.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "phase_1_3": [
3
+ {
4
+ "input": "لكن الأمر مختلف",
5
+ "corrected": "لكن الأمر مختلف.",
6
+ "check": "لكن",
7
+ "status": "✅ PRESERVED",
8
+ "suggestions": 1
9
+ },
10
+ {
11
+ "input": "ذلك الكتاب جميل",
12
+ "corrected": "ذلك الكتاب جميل.",
13
+ "check": "ذلك",
14
+ "status": "✅ PRESERVED",
15
+ "suggestions": 1
16
+ },
17
+ {
18
+ "input": "إلى المدرسة",
19
+ "corrected": "إلى المدرسة.",
20
+ "check": "إلى",
21
+ "status": "✅ PRESERVED",
22
+ "suggestions": 1
23
+ },
24
+ {
25
+ "input": "على الطاولة",
26
+ "corrected": "على الطاولة.",
27
+ "check": "على",
28
+ "status": "✅ PRESERVED",
29
+ "suggestions": 1
30
+ },
31
+ {
32
+ "input": "هذه المدينة جميلة",
33
+ "corrected": "هذه المدينة جميلة.",
34
+ "check": "هذه",
35
+ "status": "✅ PRESERVED",
36
+ "suggestions": 1
37
+ },
38
+ {
39
+ "input": "كان الجو حارا",
40
+ "corrected": "كان الجو حارا.",
41
+ "check": "كان",
42
+ "status": "✅ PRESERVED",
43
+ "suggestions": 1
44
+ },
45
+ {
46
+ "input": "لاكن الأمر مختلف",
47
+ "corrected": "لكن الأمر مختلف.",
48
+ "check": "لاكن→لكن",
49
+ "status": "✅ CORRECTED",
50
+ "suggestions": 2
51
+ },
52
+ {
53
+ "input": "ذالك الكتاب جميل",
54
+ "corrected": "ذلك الكتاب جميل.",
55
+ "check": "ذالك→ذلك",
56
+ "status": "✅ CORRECTED",
57
+ "suggestions": 2
58
+ }
59
+ ],
60
+ "phase_2": {
61
+ "total": 10,
62
+ "raw_fp_count": 5,
63
+ "raw_fp_rate": "50%",
64
+ "pipeline_fp_count": 1,
65
+ "pipeline_fp_rate": "10%",
66
+ "results": [
67
+ {
68
+ "id": "R-01",
69
+ "word": "عصماء",
70
+ "raw_changed": true,
71
+ "pipeline_changed": false,
72
+ "pipeline_targeted": false,
73
+ "is_false_positive": false
74
+ },
75
+ {
76
+ "id": "R-02",
77
+ "word": "يستشف",
78
+ "raw_changed": true,
79
+ "pipeline_changed": false,
80
+ "pipeline_targeted": false,
81
+ "is_false_positive": false
82
+ },
83
+ {
84
+ "id": "R-03",
85
+ "word": "المسغبة",
86
+ "raw_changed": true,
87
+ "pipeline_changed": false,
88
+ "pipeline_targeted": false,
89
+ "is_false_positive": false
90
+ },
91
+ {
92
+ "id": "R-04",
93
+ "word": "التقاعس",
94
+ "raw_changed": true,
95
+ "pipeline_changed": false,
96
+ "pipeline_targeted": false,
97
+ "is_false_positive": false
98
+ },
99
+ {
100
+ "id": "R-05",
101
+ "word": "استئثار",
102
+ "raw_changed": false,
103
+ "pipeline_changed": false,
104
+ "pipeline_targeted": false,
105
+ "is_false_positive": false
106
+ },
107
+ {
108
+ "id": "R-06",
109
+ "word": "تبجيل",
110
+ "raw_changed": false,
111
+ "pipeline_changed": false,
112
+ "pipeline_targeted": false,
113
+ "is_false_positive": false
114
+ },
115
+ {
116
+ "id": "R-07",
117
+ "word": "الدمث",
118
+ "raw_changed": true,
119
+ "pipeline_changed": true,
120
+ "pipeline_targeted": true,
121
+ "is_false_positive": true
122
+ },
123
+ {
124
+ "id": "R-08",
125
+ "word": "استقصاء",
126
+ "raw_changed": false,
127
+ "pipeline_changed": false,
128
+ "pipeline_targeted": false,
129
+ "is_false_positive": false
130
+ },
131
+ {
132
+ "id": "R-09",
133
+ "word": "التواني",
134
+ "raw_changed": false,
135
+ "pipeline_changed": false,
136
+ "pipeline_targeted": false,
137
+ "is_false_positive": false
138
+ },
139
+ {
140
+ "id": "R-10",
141
+ "word": "مستطرف",
142
+ "raw_changed": false,
143
+ "pipeline_changed": false,
144
+ "pipeline_targeted": false,
145
+ "is_false_positive": false
146
+ }
147
+ ]
148
+ },
149
+ "phase_3_2": [
150
+ {
151
+ "input": "ولذالك قررت السفر",
152
+ "corrected": "ولذالك قررت السفر.",
153
+ "bad_split_present": false,
154
+ "good_correction_present": false
155
+ },
156
+ {
157
+ "input": "المستشفياتهم كبيرة",
158
+ "corrected": "المستشفيات هم كبيرة.",
159
+ "bad_split_present": false,
160
+ "good_correction_present": false
161
+ }
162
+ ],
163
+ "phase_5_5": [
164
+ {
165
+ "input": "الطالبه كتبو الوجبات",
166
+ "corrected": "الطالبة كتبو الوجبات.",
167
+ "suggestions": 2,
168
+ "has_duplicate": false,
169
+ "word_count_diff": 0
170
+ },
171
+ {
172
+ "input": "هو ذهبو الي البيت",
173
+ "corrected": "هو ذهب إلى البيت.",
174
+ "suggestions": 3,
175
+ "has_duplicate": false,
176
+ "word_count_diff": 0
177
+ },
178
+ {
179
+ "input": "الطلاب اجتهدو في امتحانتهم",
180
+ "corrected": "الطلاب اجتهدو في امتحانتهم.",
181
+ "suggestions": 1,
182
+ "has_duplicate": false,
183
+ "word_count_diff": 0
184
+ }
185
+ ],
186
+ "phase_6_3": {
187
+ "empty_count": 0,
188
+ "error_count": 0,
189
+ "results": [
190
+ {
191
+ "attempt": 1,
192
+ "corrected": "الحديقة جميلة والأزهار متفتحة.",
193
+ "suggestions": 2,
194
+ "status": "success",
195
+ "warnings": {},
196
+ "is_empty": false,
197
+ "is_error": false
198
+ },
199
+ {
200
+ "attempt": 2,
201
+ "corrected": "الحديقة جميلة والأزهار متفتحة.",
202
+ "suggestions": 2,
203
+ "status": "success",
204
+ "warnings": {},
205
+ "is_empty": false,
206
+ "is_error": false
207
+ },
208
+ {
209
+ "attempt": 3,
210
+ "corrected": "الحديقة جميلة والأزهار متفتحة.",
211
+ "suggestions": 2,
212
+ "status": "success",
213
+ "warnings": {},
214
+ "is_empty": false,
215
+ "is_error": false
216
+ },
217
+ {
218
+ "attempt": 4,
219
+ "corrected": "الحديقة جميلة والأزهار متفتحة.",
220
+ "suggestions": 2,
221
+ "status": "success",
222
+ "warnings": {},
223
+ "is_empty": false,
224
+ "is_error": false
225
+ },
226
+ {
227
+ "attempt": 5,
228
+ "corrected": "الحديقة جميلة والأزهار متفتحة.",
229
+ "suggestions": 2,
230
+ "status": "success",
231
+ "warnings": {},
232
+ "is_empty": false,
233
+ "is_error": false
234
+ }
235
+ ]
236
+ },
237
+ "phase_6_4": {
238
+ "input_chars": 982,
239
+ "input_words": 159,
240
+ "status": "success",
241
+ "suggestions": 4,
242
+ "warnings": {},
243
+ "timing": {
244
+ "grammar_ms": 12196,
245
+ "punctuation_ms": 14448,
246
+ "spelling_ms": 0,
247
+ "total_ms": 26649
248
+ },
249
+ "elapsed_ms": 27615,
250
+ "is_silently_empty": false
251
+ },
252
+ "phase_7_1": {
253
+ "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب",
254
+ "raw_output": "قال المعلم للطلاب ادرسوا: جيدا فالامتحان قريب؛",
255
+ "pipeline_output": "قال المعلم للطلاب ادرسوا: جيدا فالامتحين قريب",
256
+ "has_semicolon_raw": true,
257
+ "has_semicolon_pipeline": false,
258
+ "diagnosis": "StageLocker or validate_punctuation_diff rejection",
259
+ "pipeline_punc_count": 1
260
+ }
261
+ }
tests/gap_filling_tests.py ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gap-filling live tests for all missing items from the Fix-Everything prompt.
3
+ Covers:
4
+ Phase 1.3 — Category 9 pairs: لكن/لاكن, ذلك/ذالك, الى/إلى live verification
5
+ Phase 2 — R-01→R-10 rare vocabulary FP measurement
6
+ Phase 3.2 — ولذالك and مستشفياتهم specific cases
7
+ Phase 5.5 — Constructed dual-correction cases
8
+ Phase 6.3 — BUG-017 re-test
9
+ Phase 6.4 — 187-word input regression
10
+ Phase 7.1 — BUG-018 precise tracing
11
+ """
12
+ import sys, os, json, time, requests
13
+
14
+ API_BASE = "https://bayan10-bayan-api.hf.space"
15
+ TIMEOUT = 90
16
+
17
+ def api_call(endpoint, text, timeout=TIMEOUT):
18
+ url = f"{API_BASE}{endpoint}"
19
+ try:
20
+ t0 = time.time()
21
+ resp = requests.post(url, json={"text": text}, timeout=timeout)
22
+ elapsed = int((time.time() - t0) * 1000)
23
+ if resp.status_code == 200:
24
+ data = resp.json()
25
+ data['_elapsed_ms'] = elapsed
26
+ return data
27
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
28
+ except Exception as e:
29
+ return {"error": f"{type(e).__name__}: {e}"}
30
+
31
+
32
+ all_results = {}
33
+
34
+
35
+ # ══════════════════════════════════════════════════════════════════════
36
+ # Phase 1.3 — Category 9 Pairs Live Verification
37
+ # ══════════════════════════════════════════════════════════════════════
38
+ def test_category9_live():
39
+ print("=" * 70)
40
+ print("PHASE 1.3 — Category 9 Pairs Live Verification")
41
+ print("=" * 70)
42
+
43
+ pairs = [
44
+ # (input_text, word_that_must_NOT_change, description)
45
+ ("لكن الأمر مختلف", "لكن", "لكن must NOT become لاكن"),
46
+ ("ذلك الكتاب جميل", "ذلك", "ذلك must NOT become ذالك"),
47
+ ("إلى المدرسة", "إلى", "إلى must NOT become على"),
48
+ ("على الطاولة", "على", "على must NOT become إلى"),
49
+ ("هذه المدينة جميلة", "هذه", "هذه must NOT become هذة"),
50
+ ("كان الجو حارا", "كان", "كان must NOT become كأن"),
51
+ # Reverse direction: misspellings SHOULD be corrected
52
+ ("لاكن الأمر مختلف", "لاكن→لكن", "لاكن should become لكن"),
53
+ ("ذالك الكتاب جميل", "ذالك→ذلك", "ذالك should become ذلك"),
54
+ ]
55
+
56
+ results = []
57
+ for text, check, desc in pairs:
58
+ r = api_call("/api/analyze", text)
59
+ corrected = r.get("corrected", text)
60
+ suggestions = r.get("suggestions", [])
61
+
62
+ is_reverse = "→" in check
63
+ if is_reverse:
64
+ # For misspellings, check that correction happened
65
+ orig, expected = check.split("→")
66
+ if expected in corrected and orig not in corrected:
67
+ status = "✅ CORRECTED"
68
+ elif orig in corrected:
69
+ status = "⚠ NOT corrected (pipeline didn't fix misspelling)"
70
+ else:
71
+ status = "⚠ UNCLEAR"
72
+ else:
73
+ # For correct words, check they weren't corrupted
74
+ if check in corrected:
75
+ status = "✅ PRESERVED"
76
+ else:
77
+ status = "❌ CORRUPTED"
78
+
79
+ result = {
80
+ "input": text, "corrected": corrected,
81
+ "check": check, "status": status,
82
+ "suggestions": len(suggestions),
83
+ }
84
+ results.append(result)
85
+ print(f"\n {desc}")
86
+ print(f" Input: '{text}'")
87
+ print(f" Corrected: '{corrected}'")
88
+ print(f" {status}")
89
+
90
+ return results
91
+
92
+
93
+ # ══════════════════════════════════════════════════════════════════════
94
+ # Phase 2 — R-01→R-10 Rare Vocabulary FP Measurement
95
+ # ══════════════════════════════════════════════════════════════════════
96
+ def test_rare_vocabulary():
97
+ print("\n" + "=" * 70)
98
+ print("PHASE 2 — R-01→R-10 Rare Vocabulary FP Measurement")
99
+ print("=" * 70)
100
+
101
+ # R-01 through R-10: valid but uncommon Arabic words
102
+ rare_words = [
103
+ {"id": "R-01", "word": "عصماء", "sentence": "المدينة العصماء تحتضن آلاف السكان",
104
+ "desc": "عصماء = impeccable (feminine)"},
105
+ {"id": "R-02", "word": "يستشف", "sentence": "يستشف الباحث نتائج الدراسة بعناية",
106
+ "desc": "يستشف = to discern/perceive"},
107
+ {"id": "R-03", "word": "المسغبة", "sentence": "أرهقته المسغبة والعطش الشديد",
108
+ "desc": "المسغبة = severe hunger"},
109
+ {"id": "R-04", "word": "التقاعس", "sentence": "التقاعس عن العمل يؤدي إلى الفشل",
110
+ "desc": "التقاعس = negligence/laziness"},
111
+ {"id": "R-05", "word": "استئثار", "sentence": "استئثار السلطة يهدد الديمقراطية",
112
+ "desc": "استئثار = monopolization"},
113
+ {"id": "R-06", "word": "تبجيل", "sentence": "تبجيل العلماء واجب على المجتمع",
114
+ "desc": "تبجيل = veneration"},
115
+ {"id": "R-07", "word": "الدمث", "sentence": "الرجل الدمث يحبه الجميع",
116
+ "desc": "الدمث = gentle/affable person"},
117
+ {"id": "R-08", "word": "استقصاء", "sentence": "استقصاء الحقائق مهم في الصحافة",
118
+ "desc": "استقصاء = investigation/inquiry"},
119
+ {"id": "R-09", "word": "التواني", "sentence": "لا يجوز التواني في طلب العلم",
120
+ "desc": "التواني = procrastination"},
121
+ {"id": "R-10", "word": "مستطرف", "sentence": "كتاب المستطرف من أمهات الكتب العربية",
122
+ "desc": "مستطرف = novel/curious (literary term)"},
123
+ ]
124
+
125
+ false_positives = 0
126
+ total = len(rare_words)
127
+ results = []
128
+
129
+ for item in rare_words:
130
+ # Track A: Raw spelling
131
+ a = api_call("/api/spelling", item["sentence"])
132
+ a_out = a.get("corrected_text", item["sentence"])
133
+ a_changed_word = item["word"] not in a_out
134
+
135
+ # Track B: Pipeline
136
+ b = api_call("/api/analyze", item["sentence"])
137
+ b_out = b.get("corrected", item["sentence"])
138
+ b_suggestions = b.get("suggestions", [])
139
+ b_changed_word = item["word"] not in b_out
140
+
141
+ # Check if any suggestion targets the rare word
142
+ word_targeted = False
143
+ targeting_suggestion = None
144
+ for s in b_suggestions:
145
+ if s.get("original", "") == item["word"]:
146
+ word_targeted = True
147
+ targeting_suggestion = s
148
+ break
149
+
150
+ is_fp = b_changed_word or word_targeted
151
+ if is_fp:
152
+ false_positives += 1
153
+
154
+ result = {
155
+ "id": item["id"],
156
+ "word": item["word"],
157
+ "raw_changed": a_changed_word,
158
+ "pipeline_changed": b_changed_word,
159
+ "pipeline_targeted": word_targeted,
160
+ "is_false_positive": is_fp,
161
+ }
162
+ results.append(result)
163
+
164
+ status = "❌ FALSE POSITIVE" if is_fp else "✅ PRESERVED"
165
+ print(f"\n {item['id']}: {item['desc']}")
166
+ print(f" Input: '{item['sentence'][:60]}...'")
167
+ print(f" Raw spell: changed={a_changed_word}")
168
+ if a_changed_word:
169
+ print(f" Raw output: '{a_out[:60]}...'")
170
+ print(f" Pipeline: changed={b_changed_word}, targeted={word_targeted}")
171
+ if b_changed_word:
172
+ print(f" Pipeline: '{b_out[:60]}...'")
173
+ if targeting_suggestion:
174
+ print(f" Suggestion: '{targeting_suggestion.get('original','')}' → '{targeting_suggestion.get('correction','')}' (conf={targeting_suggestion.get('confidence', '?')})")
175
+ print(f" {status}")
176
+
177
+ raw_fp_count = sum(1 for r in results if r["raw_changed"])
178
+ pipeline_fp_count = false_positives
179
+ print(f"\n{'=' * 50}")
180
+ print(f" Raw model FP rate: {raw_fp_count}/{total} = {raw_fp_count/total*100:.0f}%")
181
+ print(f" Pipeline FP rate: {pipeline_fp_count}/{total} = {pipeline_fp_count/total*100:.0f}%")
182
+
183
+ return {
184
+ "total": total,
185
+ "raw_fp_count": raw_fp_count,
186
+ "raw_fp_rate": f"{raw_fp_count/total*100:.0f}%",
187
+ "pipeline_fp_count": pipeline_fp_count,
188
+ "pipeline_fp_rate": f"{pipeline_fp_count/total*100:.0f}%",
189
+ "results": results,
190
+ }
191
+
192
+
193
+ # ══════════════════════════════════════════════════════════════════════
194
+ # Phase 3.2 — Specific Word-split Cases
195
+ # ══════════════════════════════════════════════════════════════════════
196
+ def test_word_splits():
197
+ print("\n" + "=" * 70)
198
+ print("PHASE 3.2 — Specific Word-split Verification")
199
+ print("=" * 70)
200
+
201
+ cases = [
202
+ {
203
+ "input": "ولذالك قررت السفر",
204
+ "target_word": "ولذالك",
205
+ "expected_correct": "ولذلك",
206
+ "bad_split": "ولذا ذلك",
207
+ "desc": "ولذالك should become ولذلك, NOT 'ولذا ذلك'"
208
+ },
209
+ {
210
+ "input": "المستشفياتهم كبيرة",
211
+ "target_word": "المستشفياتهم",
212
+ "expected_correct": "مستشفياتهم",
213
+ "bad_split": "في مستشفيات هم",
214
+ "desc": "مستشفياتهم should NOT be split into 'في مستشفيات هم'"
215
+ },
216
+ ]
217
+
218
+ results = []
219
+ for case in cases:
220
+ r = api_call("/api/analyze", case["input"])
221
+ corrected = r.get("corrected", case["input"])
222
+ suggestions = r.get("suggestions", [])
223
+
224
+ has_bad_split = case["bad_split"] in corrected
225
+ has_good_correction = case["expected_correct"] in corrected
226
+
227
+ result = {
228
+ "input": case["input"],
229
+ "corrected": corrected,
230
+ "bad_split_present": has_bad_split,
231
+ "good_correction_present": has_good_correction,
232
+ }
233
+ results.append(result)
234
+
235
+ print(f"\n {case['desc']}")
236
+ print(f" Input: '{case['input']}'")
237
+ print(f" Corrected: '{corrected}'")
238
+ if has_bad_split:
239
+ print(f" ❌ BAD SPLIT detected: '{case['bad_split']}'")
240
+ elif has_good_correction:
241
+ print(f" ✅ Correctly fixed to '{case['expected_correct']}'")
242
+ else:
243
+ print(f" ⚠ Neither expected correction nor bad split found")
244
+
245
+ return results
246
+
247
+
248
+ # ══════════════════════════════════════════════════════════════════════
249
+ # Phase 5.5 — Constructed Dual-correction Cases
250
+ # ══════════════════════════════════════════════════════════════════════
251
+ def test_dual_corrections():
252
+ print("\n" + "=" * 70)
253
+ print("PHASE 5.5 — Constructed Dual-correction Cases")
254
+ print("=" * 70)
255
+
256
+ # Cases where spelling AND grammar would both want to change words
257
+ cases = [
258
+ {
259
+ "input": "الطالبه كتبو الوجبات",
260
+ "desc": "Spelling: الطالبه→الطالبة, Grammar: كتبو→كتبوا + possibly الوجبات→الواجبات",
261
+ },
262
+ {
263
+ "input": "هو ذهبو الي البيت",
264
+ "desc": "Spelling: الي→إلى, Grammar: ذهبو→ذهب (singular subject هو)",
265
+ },
266
+ {
267
+ "input": "الطلاب اجتهدو في امتحانتهم",
268
+ "desc": "Spelling: امتحانتهم→امتحاناتهم, Grammar: اجتهدو→اجتهدوا",
269
+ },
270
+ ]
271
+
272
+ results = []
273
+ for case in cases:
274
+ r = api_call("/api/analyze", case["input"])
275
+ corrected = r.get("corrected", case["input"])
276
+ suggestions = r.get("suggestions", [])
277
+
278
+ # Check for text duplication
279
+ words = corrected.split()
280
+ has_duplicate = any(i > 0 and words[i] == words[i-1] for i in range(len(words)))
281
+
282
+ # Check for dropped words (output should have ≈ same word count ±1)
283
+ input_words = case["input"].split()
284
+ word_diff = len(words) - len(input_words)
285
+
286
+ result = {
287
+ "input": case["input"],
288
+ "corrected": corrected,
289
+ "suggestions": len(suggestions),
290
+ "has_duplicate": has_duplicate,
291
+ "word_count_diff": word_diff,
292
+ }
293
+ results.append(result)
294
+
295
+ print(f"\n {case['desc']}")
296
+ print(f" Input: '{case['input']}'")
297
+ print(f" Corrected: '{corrected}'")
298
+ print(f" Suggestions: {len(suggestions)}")
299
+ if has_duplicate:
300
+ print(f" ❌ DUPLICATE WORDS detected in output!")
301
+ else:
302
+ print(f" ✅ No duplicate words")
303
+ if abs(word_diff) > 2:
304
+ print(f" ⚠ Word count diff: {word_diff} (possible drop/duplication)")
305
+ else:
306
+ print(f" ✅ Word count reasonable (diff={word_diff})")
307
+
308
+ for s in suggestions:
309
+ print(f" [{s.get('start')}:{s.get('end')}] {s.get('type')}: '{s.get('original','')}' → '{s.get('correction','')}'")
310
+
311
+ return results
312
+
313
+
314
+ # ══════════════════════════════════════════════════════════════════════
315
+ # Phase 6.3 — BUG-017 Re-test (Intermittent Empty Response)
316
+ # ══════════════════════════════════════════════════════════════════════
317
+ def test_bug017():
318
+ print("\n" + "=" * 70)
319
+ print("PHASE 6.3 — BUG-017 Re-test (Intermittent Empty Response)")
320
+ print("=" * 70)
321
+
322
+ # Send the same input 5 times rapidly and check for empty responses
323
+ test_input = "الحديقه جميله والأزهار متفتحه"
324
+ empty_count = 0
325
+ error_count = 0
326
+ results = []
327
+
328
+ for i in range(5):
329
+ r = api_call("/api/analyze", test_input, timeout=30)
330
+ corrected = r.get("corrected", "")
331
+ suggestions = r.get("suggestions", [])
332
+ status = r.get("status", "")
333
+ warnings = r.get("warnings", {})
334
+
335
+ is_empty = (corrected == test_input and len(suggestions) == 0)
336
+ is_error = "error" in r and "status" not in r
337
+
338
+ if is_empty:
339
+ empty_count += 1
340
+ if is_error:
341
+ error_count += 1
342
+
343
+ result = {
344
+ "attempt": i + 1,
345
+ "corrected": corrected,
346
+ "suggestions": len(suggestions),
347
+ "status": status,
348
+ "warnings": warnings,
349
+ "is_empty": is_empty,
350
+ "is_error": is_error,
351
+ }
352
+ results.append(result)
353
+
354
+ status_str = "❌ EMPTY" if is_empty else ("❌ ERROR" if is_error else "✅ OK")
355
+ print(f" Attempt {i+1}: {status_str} — suggestions={len(suggestions)}, status='{status}'")
356
+ if warnings:
357
+ print(f" Warnings: {warnings}")
358
+ if is_error:
359
+ print(f" Error: {r.get('error', '?')}")
360
+
361
+ print(f"\n Empty responses: {empty_count}/5")
362
+ print(f" Error responses: {error_count}/5")
363
+ if empty_count > 0:
364
+ print(f" ⚠ BUG-017 may still be present!")
365
+ else:
366
+ print(f" ✅ No empty responses detected")
367
+
368
+ return {
369
+ "empty_count": empty_count,
370
+ "error_count": error_count,
371
+ "results": results,
372
+ }
373
+
374
+
375
+ # ══════════════════════════════════════════════════════════════════════
376
+ # Phase 6.4 — 187-word Long Input Regression
377
+ # ══════════════════════════════════════════════════════════════════════
378
+ def test_long_input_regression():
379
+ print("\n" + "=" * 70)
380
+ print("PHASE 6.4 — 187-word Long Input Regression")
381
+ print("=" * 70)
382
+
383
+ long_text = (
384
+ "في ظل التطورات التكنولوجية المتسارعة التي يشهدها العالم اليوم أصبح من الضروري "
385
+ "أن نواكب هذه التغييرات ونتكيف معها بشكل فعال حيث تلعب التكنولوجيا دورا محوريا "
386
+ "في مختلف جوانب حياتنا اليومية بدءا من التعليم والصحة وصولا إلى الاقتصاد والسياسة "
387
+ "ولقد أدى الذكاء الاصطناعي إلى تحولات جذرية في طريقة عمل المؤسسات والشركات حيث "
388
+ "باتت الآلات قادرة على أداء مهام كانت حكرا على البشر مما يطرح تساؤلات عديدة حول "
389
+ "مستقبل سوق العمل والوظائف التقليدية كما أن التحول الرقمي فرض على الحكومات والمجتمعات "
390
+ "إعادة النظر في سياساتها التعليمية والاقتصادية لضمان مواكبة هذا التطور السريع وفي هذا "
391
+ "السياق يبرز دور البحث العلمي والابتكار كعاملين أساسيين في دفع عجلة التنمية المستدامة "
392
+ "وتحقيق الرفاهية للمجتمعات البشرية إذ لا يمكن لأي دولة أن تحقق تقدما حقيقيا دون "
393
+ "الاستثمار في العقول البشرية وتوفير بيئة محفزة للإبداع والابتكار ومن هنا تأتي أهمية "
394
+ "التعاون الدولي في مجال البحث العلمي وتبادل الخبرات والمعارف بين الدول المتقدمة والنامية "
395
+ "على حد سواء لتحقيق التنمية الشاملة والمستدامة التي تعود بالنفع على جميع شعوب العالم"
396
+ )
397
+ print(f" Input: {len(long_text)} chars, {len(long_text.split())} words")
398
+
399
+ r = api_call("/api/analyze", long_text, timeout=120)
400
+ status = r.get("status", "")
401
+ corrected = r.get("corrected", "")
402
+ suggestions = r.get("suggestions", [])
403
+ warnings = r.get("warnings", {})
404
+ timing = r.get("timing_ms", {})
405
+
406
+ if "error" in r and "status" not in r:
407
+ print(f" ❌ ERROR: {r['error']}")
408
+ result_status = "error"
409
+ elif status == "partial":
410
+ print(f" ⚠ PARTIAL: some stages failed")
411
+ print(f" Warnings: {warnings}")
412
+ result_status = "partial"
413
+ elif status == "success":
414
+ print(f" ✅ SUCCESS")
415
+ result_status = "success"
416
+ else:
417
+ print(f" ⚠ UNKNOWN STATUS: '{status}'")
418
+ result_status = "unknown"
419
+
420
+ print(f" Elapsed: {r.get('_elapsed_ms', '?')}ms")
421
+ print(f" Timing: {timing}")
422
+ print(f" Suggestions: {len(suggestions)}")
423
+ print(f" Corrected == Original: {corrected == long_text}")
424
+
425
+ # Key check: response should NOT be silently empty
426
+ is_silently_empty = (status == "success" and corrected == long_text and len(suggestions) == 0)
427
+ if is_silently_empty:
428
+ print(f" ⚠ Silently empty! This is the BUG-032 behavior we're preventing.")
429
+ else:
430
+ print(f" ✅ Response is either successful with results or properly flagged as partial/error")
431
+
432
+ return {
433
+ "input_chars": len(long_text),
434
+ "input_words": len(long_text.split()),
435
+ "status": result_status,
436
+ "suggestions": len(suggestions),
437
+ "warnings": warnings,
438
+ "timing": timing,
439
+ "elapsed_ms": r.get("_elapsed_ms"),
440
+ "is_silently_empty": is_silently_empty,
441
+ }
442
+
443
+
444
+ # ══════════════════════════════════════════════════════════════════════
445
+ # Phase 7.1 — BUG-018 Precise Tracing
446
+ # ══════════════════════════════════════════════════════════════════════
447
+ def test_bug018_tracing():
448
+ print("\n" + "=" * 70)
449
+ print("PHASE 7.1 — BUG-018 Precise Tracing (dropped ؛)")
450
+ print("=" * 70)
451
+
452
+ test_input = "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب"
453
+ print(f" Input: '{test_input}'")
454
+
455
+ # Track A: Raw punctuation only
456
+ a = api_call("/api/punctuation", test_input)
457
+ a_out = a.get("corrected_text", test_input)
458
+ has_semicolon_raw = "؛" in a_out
459
+ print(f"\n Raw punctuation output: '{a_out}'")
460
+ print(f" Has ؛: {has_semicolon_raw}")
461
+
462
+ # Track B: Full pipeline
463
+ b = api_call("/api/analyze", test_input)
464
+ b_out = b.get("corrected", test_input)
465
+ b_sugg = b.get("suggestions", [])
466
+ has_semicolon_pipeline = "؛" in b_out
467
+ print(f"\n Pipeline output: '{b_out}'")
468
+ print(f" Has ؛: {has_semicolon_pipeline}")
469
+ print(f" Suggestions: {len(b_sugg)}")
470
+
471
+ for s in b_sugg:
472
+ print(f" [{s.get('start')}:{s.get('end')}] {s.get('type')}: '{s.get('original','')}' → '{s.get('correction','')}'")
473
+
474
+ # Determine drop cause
475
+ if has_semicolon_raw and not has_semicolon_pipeline:
476
+ # Raw produced it but pipeline dropped it
477
+ punc_suggestions = [s for s in b_sugg if s.get('type') == 'punctuation']
478
+ total_punc = len(punc_suggestions)
479
+ if total_punc >= 3:
480
+ cause = "MAX_PUNC_PATCHES_PER_RESPONSE cap (3 patches, ؛ was 4th+)"
481
+ else:
482
+ # Check if any grammar suggestion overlaps the ؛ position
483
+ cause = "StageLocker or validate_punctuation_diff rejection"
484
+ print(f"\n DIAGNOSIS: ؛ was produced by raw model but dropped by pipeline")
485
+ print(f" Likely cause: {cause}")
486
+ elif not has_semicolon_raw:
487
+ cause = "Raw punctuation model did NOT produce ؛ at all"
488
+ print(f"\n DIAGNOSIS: {cause} — not a pipeline bug")
489
+ else:
490
+ cause = "؛ present in both raw and pipeline — BUG-018 not reproduced"
491
+ print(f"\n DIAGNOSIS: {cause}")
492
+
493
+ return {
494
+ "input": test_input,
495
+ "raw_output": a_out,
496
+ "pipeline_output": b_out,
497
+ "has_semicolon_raw": has_semicolon_raw,
498
+ "has_semicolon_pipeline": has_semicolon_pipeline,
499
+ "diagnosis": cause,
500
+ "pipeline_punc_count": len([s for s in b_sugg if s.get('type') == 'punctuation']),
501
+ }
502
+
503
+
504
+ # ══════════════════════════════════════════════════════════════════════
505
+ # MAIN
506
+ # ══════════════════════════════════════════════════════════════════════
507
+ if __name__ == "__main__":
508
+ print("BAYAN — Gap-filling Live Tests\n")
509
+
510
+ all_results["phase_1_3"] = test_category9_live()
511
+ all_results["phase_2"] = test_rare_vocabulary()
512
+ all_results["phase_3_2"] = test_word_splits()
513
+ all_results["phase_5_5"] = test_dual_corrections()
514
+ all_results["phase_6_3"] = test_bug017()
515
+ all_results["phase_6_4"] = test_long_input_regression()
516
+ all_results["phase_7_1"] = test_bug018_tracing()
517
+
518
+ # Save all results
519
+ output_path = os.path.join(os.path.dirname(__file__), 'gap_filling_results.json')
520
+ with open(output_path, 'w', encoding='utf-8') as f:
521
+ json.dump(all_results, f, ensure_ascii=False, indent=2)
522
+ print(f"\n\nAll results saved to {output_path}")
tests/phase0_investigation.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Phase 0 — Investigation Script
3
+ Tests:
4
+ 0.1 — ان→أن in sentence context vs isolation
5
+ 0.3 — BUG-032 (long text) with detailed error capture
6
+ 0.4 — BUG-031 sentence (already resolved: الطلاب = plural → اللذين is wrong)
7
+ """
8
+ import sys, os, json, time, requests
9
+
10
+ API_BASE = "https://bayan10-bayan-api.hf.space"
11
+ TIMEOUT = 90
12
+
13
+ def api_call(endpoint, text):
14
+ url = f"{API_BASE}{endpoint}"
15
+ try:
16
+ t0 = time.time()
17
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
18
+ elapsed = int((time.time() - t0) * 1000)
19
+ if resp.status_code == 200:
20
+ data = resp.json()
21
+ data['_elapsed_ms'] = elapsed
22
+ return data
23
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
24
+ except Exception as e:
25
+ return {"error": f"{type(e).__name__}: {e}"}
26
+
27
+ def test_0_1():
28
+ """0.1 — Does spelling correct ان→أن in sentence context?"""
29
+ print("=" * 70)
30
+ print("PHASE 0.1 — ان→أن contradiction test")
31
+ print("=" * 70)
32
+
33
+ tests = [
34
+ ("ان (isolation)", "ان"),
35
+ ("ان الحياة جميلة (sentence)", "ان الحياة جميلة"),
36
+ ("ان الذكاء مهم (sentence)", "ان الذكاء مهم"),
37
+ ("قال ان الحق واضح (mid-sentence)", "قال ان الحق واضح"),
38
+ ]
39
+
40
+ results = []
41
+ for label, text in tests:
42
+ # Track A: raw spelling model
43
+ a = api_call("/api/spelling", text)
44
+ a_out = a.get("corrected_text", text)
45
+ a_changed = a_out != text
46
+
47
+ # Track B: full pipeline
48
+ b = api_call("/api/analyze", text)
49
+ b_out = b.get("corrected", text)
50
+ b_sugg = b.get("suggestions", [])
51
+
52
+ result = {
53
+ "label": label, "input": text,
54
+ "raw_spelling": a_out, "raw_changed": a_changed,
55
+ "pipeline_corrected": b_out,
56
+ "pipeline_suggestions": len(b_sugg),
57
+ }
58
+ results.append(result)
59
+
60
+ print(f"\n {label}:")
61
+ print(f" Input: '{text}'")
62
+ print(f" Raw spell: '{a_out}' (changed={a_changed})")
63
+ print(f" Pipeline: '{b_out}' (suggestions={len(b_sugg)})")
64
+
65
+ # Check if ان was corrected to أن or إن
66
+ if 'أن' in a_out or 'إن' in a_out:
67
+ print(f" ✅ Raw spelling DID correct ان")
68
+ elif a_changed:
69
+ print(f" ⚠ Raw spelling changed but NOT to أن/إن")
70
+ else:
71
+ print(f" ❌ Raw spelling did NOT correct ان")
72
+
73
+ # Verdict
74
+ print("\n" + "-" * 50)
75
+ isolation = results[0]
76
+ sentences = results[1:]
77
+ iso_fixed = 'أن' in isolation['raw_spelling'] or 'إن' in isolation['raw_spelling']
78
+ sent_fixed = any('أن' in r['raw_spelling'] or 'إن' in r['raw_spelling'] for r in sentences)
79
+
80
+ if iso_fixed and sent_fixed:
81
+ verdict = "WORKS in both isolation AND sentence context"
82
+ elif iso_fixed and not sent_fixed:
83
+ verdict = "WORKS in isolation ONLY, FAILS in sentence context"
84
+ elif not iso_fixed:
85
+ verdict = "FAILS in both isolation and sentence context"
86
+ else:
87
+ verdict = "Inconsistent"
88
+
89
+ print(f" FINAL VERDICT: {verdict}")
90
+ return {"verdict": verdict, "results": results}
91
+
92
+
93
+ def test_0_3():
94
+ """0.3 — BUG-032: Long text (187 words / 1104 chars)"""
95
+ print("\n" + "=" * 70)
96
+ print("PHASE 0.3 — BUG-032 long text test")
97
+ print("=" * 70)
98
+
99
+ # 187-word Arabic text (from deep-dive report)
100
+ long_text = (
101
+ "في ظل التطورات التكنولوجية المتسارعة التي يشهدها العالم اليوم أصبح من الضروري "
102
+ "أن نواكب هذه التغييرات ونتكيف معها بشكل فعال حيث تلعب التكنولوجيا دورا محوريا "
103
+ "في مختلف جوانب حياتنا اليومية بدءا من التعليم والصحة وصولا إلى الاقتصاد والسياسة "
104
+ "ولقد أدى الذكاء الاصطناعي إلى تحولات جذرية في طريقة عمل المؤسسات والشركات حيث "
105
+ "باتت الآلات قادرة على أداء مهام كانت حكرا على البشر مما يطرح تساؤلات عديدة حول "
106
+ "مستقبل سوق العمل والوظائف التقليدية كما أن التحول الرقمي فرض على الحكومات والمجتمعات "
107
+ "إعادة النظر في سياساتها التعليمية والاقتصادية لضمان مواكبة هذا التطور السريع وفي هذا "
108
+ "السياق يبرز دور البحث العلمي والابتكار كعاملين أساسيين في دفع عجلة التنمية المستدامة "
109
+ "وتحقيق الرفاهية للمجتمعات البشرية إذ لا يمكن لأي دولة أن تحقق تقدما حقيقيا دون "
110
+ "الاستثمار في العقول ��لبشرية وتوفير بيئة محفزة للإبداع والابتكار ومن هنا تأتي أهمية "
111
+ "التعاون الدولي في مجال البحث العلمي وتبادل الخبرات والمعارف بين الدول المتقدمة والنامية "
112
+ "على حد سواء لتحقيق التنمية الشاملة والمستدامة التي تعود بالنفع على جميع شعوب العالم"
113
+ )
114
+ print(f" Input length: {len(long_text)} chars, {len(long_text.split())} words")
115
+
116
+ # Test all three individual endpoints
117
+ print("\n Testing /api/spelling...")
118
+ a_spell = api_call("/api/spelling", long_text)
119
+ print(f" Status: {'error' if 'error' in a_spell else 'OK'}")
120
+ if 'error' in a_spell:
121
+ print(f" Error: {a_spell['error']}")
122
+ else:
123
+ print(f" Elapsed: {a_spell.get('_elapsed_ms', '?')}ms")
124
+ print(f" Changed: {a_spell.get('corrected_text', '') != long_text}")
125
+
126
+ print("\n Testing /api/grammar...")
127
+ a_gram = api_call("/api/grammar", long_text)
128
+ print(f" Status: {'error' if 'error' in a_gram else 'OK'}")
129
+ if 'error' in a_gram:
130
+ print(f" Error: {a_gram['error']}")
131
+ else:
132
+ print(f" Elapsed: {a_gram.get('_elapsed_ms', '?')}ms")
133
+ print(f" Changed: {a_gram.get('corrected_text', '') != long_text}")
134
+
135
+ print("\n Testing /api/punctuation...")
136
+ a_punc = api_call("/api/punctuation", long_text)
137
+ print(f" Status: {'error' if 'error' in a_punc else 'OK'}")
138
+ if 'error' in a_punc:
139
+ print(f" Error: {a_punc['error']}")
140
+ else:
141
+ print(f" Elapsed: {a_punc.get('_elapsed_ms', '?')}ms")
142
+ print(f" Changed: {a_punc.get('corrected_text', '') != long_text}")
143
+
144
+ print("\n Testing /api/analyze (full pipeline)...")
145
+ b = api_call("/api/analyze", long_text)
146
+ print(f" Status: {'error' if 'error' in b and 'status' not in b else b.get('status', '?')}")
147
+ if 'error' in b and 'status' not in b:
148
+ print(f" Error: {b['error']}")
149
+ else:
150
+ print(f" Elapsed: {b.get('_elapsed_ms', '?')}ms")
151
+ print(f" Suggestions: {len(b.get('suggestions', []))}")
152
+ print(f" Timing: {b.get('timing_ms', {})}")
153
+ if b.get('corrected') == long_text:
154
+ print(f" ⚠ corrected == original (no changes or silent failure?)")
155
+
156
+ return {
157
+ "input_chars": len(long_text),
158
+ "input_words": len(long_text.split()),
159
+ "spelling": {"error": a_spell.get("error"), "elapsed": a_spell.get("_elapsed_ms")},
160
+ "grammar": {"error": a_gram.get("error"), "elapsed": a_gram.get("_elapsed_ms")},
161
+ "punctuation": {"error": a_punc.get("error"), "elapsed": a_punc.get("_elapsed_ms")},
162
+ "pipeline": {
163
+ "error": b.get("error"),
164
+ "status": b.get("status"),
165
+ "suggestions": len(b.get("suggestions", [])),
166
+ "timing": b.get("timing_ms", {}),
167
+ "elapsed": b.get("_elapsed_ms"),
168
+ }
169
+ }
170
+
171
+
172
+ def test_0_4():
173
+ """0.4 — BUG-031: اللذين vs الذين"""
174
+ print("\n" + "=" * 70)
175
+ print("PHASE 0.4 — BUG-031 (اللذين vs الذين)")
176
+ print("=" * 70)
177
+
178
+ sentence = "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه"
179
+ print(f" Test sentence: '{sentence}'")
180
+ print(f" Subject: الطلاب (PLURAL, not dual)")
181
+ print(f" Therefore: اللذين (dual) is WRONG, الذين (plural) is CORRECT")
182
+ print(f" Verdict: BUG-031 IS a real bug — grammar should correct اللذين→الذين")
183
+
184
+ # Test it
185
+ a_gram = api_call("/api/grammar", sentence)
186
+ a_out = a_gram.get("corrected_text", sentence)
187
+ print(f"\n Grammar model output: '{a_out}'")
188
+ if 'الذين' in a_out and 'اللذين' not in a_out:
189
+ print(f" ✅ Grammar DID correct اللذين→الذين")
190
+ bug_status = "fixed_by_model"
191
+ elif 'اللذين' in a_out:
192
+ print(f" ❌ Grammar did NOT correct اللذين (left as dual)")
193
+ bug_status = "still_broken"
194
+ else:
195
+ print(f" ⚠ Unexpected output")
196
+ bug_status = "unclear"
197
+
198
+ return {
199
+ "sentence": sentence,
200
+ "subject": "الطلاب (PLURAL)",
201
+ "correct_form": "الذين (plural)",
202
+ "is_real_bug": True,
203
+ "grammar_output": a_out,
204
+ "bug_status": bug_status,
205
+ }
206
+
207
+
208
+ if __name__ == "__main__":
209
+ print("BAYAN Phase 0 — Investigation\n")
210
+
211
+ all_results = {}
212
+
213
+ all_results["phase_0_1"] = test_0_1()
214
+ all_results["phase_0_3"] = test_0_3()
215
+ all_results["phase_0_4"] = test_0_4()
216
+
217
+ # Save results
218
+ output_path = os.path.join(os.path.dirname(__file__), 'phase0_results.json')
219
+ with open(output_path, 'w', encoding='utf-8') as f:
220
+ json.dump(all_results, f, ensure_ascii=False, indent=2)
221
+ print(f"\nResults saved to {output_path}")
tests/phase0_results.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "phase_0_1": {
3
+ "verdict": "WORKS in isolation ONLY, FAILS in sentence context",
4
+ "results": [
5
+ {
6
+ "label": "ان (isolation)",
7
+ "input": "ان",
8
+ "raw_spelling": "أن",
9
+ "raw_changed": true,
10
+ "pipeline_corrected": "إن.",
11
+ "pipeline_suggestions": 1
12
+ },
13
+ {
14
+ "label": "ان الحياة جميلة (sentence)",
15
+ "input": "ان الحياة جميلة",
16
+ "raw_spelling": "ان الحياة جميلة",
17
+ "raw_changed": false,
18
+ "pipeline_corrected": "إن الحياة جميلة!",
19
+ "pipeline_suggestions": 2
20
+ },
21
+ {
22
+ "label": "ان الذكاء مهم (sentence)",
23
+ "input": "ان الذكاء مهم",
24
+ "raw_spelling": "ان الذكاء مهم",
25
+ "raw_changed": false,
26
+ "pipeline_corrected": "إن الذكاء مهم.",
27
+ "pipeline_suggestions": 2
28
+ },
29
+ {
30
+ "label": "قال ان الحق واضح (mid-sentence)",
31
+ "input": "قال ان الحق واضح",
32
+ "raw_spelling": "قال ان الحق واضح",
33
+ "raw_changed": false,
34
+ "pipeline_corrected": "قال ان: الحق واضح",
35
+ "pipeline_suggestions": 1
36
+ }
37
+ ]
38
+ },
39
+ "phase_0_3": {
40
+ "input_chars": 982,
41
+ "input_words": 159,
42
+ "spelling": {
43
+ "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
44
+ "elapsed": null
45
+ },
46
+ "grammar": {
47
+ "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
48
+ "elapsed": null
49
+ },
50
+ "punctuation": {
51
+ "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
52
+ "elapsed": null
53
+ },
54
+ "pipeline": {
55
+ "error": null,
56
+ "status": "success",
57
+ "suggestions": 4,
58
+ "timing": {
59
+ "grammar_ms": 12179,
60
+ "punctuation_ms": 12237,
61
+ "spelling_ms": 0,
62
+ "total_ms": 24420
63
+ },
64
+ "elapsed": 54892
65
+ }
66
+ },
67
+ "phase_0_4": {
68
+ "sentence": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
69
+ "subject": "الطلاب (PLURAL)",
70
+ "correct_form": "الذين (plural)",
71
+ "is_real_bug": true,
72
+ "grammar_output": "الطلاب الذين اجتهدو في دراستهم حققوا نتائج ممتازة في الامتحانات الصعبة",
73
+ "bug_status": "fixed_by_model"
74
+ }
75
+ }
tests/phase5_investigation.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Phase 5 — BUG-016/027 Text Duplication Investigation
3
+
4
+ Reproduce exact case: ان الطالبات ذهبو الى الجامعه
5
+ Log every patch produced by spelling and grammar with full ORIGINAL coordinates.
6
+ Determine: overlapping coords (PatchSet bug) vs non-overlapping (coord computation bug).
7
+ Also check: does الى get silently dropped?
8
+ """
9
+ import sys, os, json, time, requests
10
+
11
+ API_BASE = "https://bayan10-bayan-api.hf.space"
12
+ TIMEOUT = 60
13
+
14
+ def api_call(endpoint, text):
15
+ url = f"{API_BASE}{endpoint}"
16
+ try:
17
+ t0 = time.time()
18
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
19
+ elapsed = int((time.time() - t0) * 1000)
20
+ if resp.status_code == 200:
21
+ data = resp.json()
22
+ data['_elapsed_ms'] = elapsed
23
+ return data
24
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
25
+ except Exception as e:
26
+ return {"error": f"{type(e).__name__}: {e}"}
27
+
28
+
29
+ def investigate_bug_016():
30
+ """Full coordinate investigation for BUG-016."""
31
+ print("=" * 70)
32
+ print("PHASE 5 — BUG-016/027 Text Duplication Investigation")
33
+ print("=" * 70)
34
+
35
+ test_input = "ان الطالبات ذهبو الى الجامعه"
36
+ print(f"\nInput: '{test_input}'")
37
+ print(f"Words: {test_input.split()}")
38
+ for i, w in enumerate(test_input.split()):
39
+ # Compute char offsets
40
+ start = test_input.index(w) if i == 0 else test_input.index(w, sum(len(x) + 1 for x in test_input.split()[:i]))
41
+ end = start + len(w)
42
+ print(f" Word {i}: '{w}' chars [{start}:{end}]")
43
+
44
+ # Track A: Raw model outputs
45
+ print("\n--- Track A: Raw Spelling ---")
46
+ a_spell = api_call("/api/spelling", test_input)
47
+ a_spell_out = a_spell.get("corrected_text", test_input)
48
+ print(f" Input: '{test_input}'")
49
+ print(f" Output: '{a_spell_out}'")
50
+ print(f" Changed: {a_spell_out != test_input}")
51
+
52
+ # Character-level diff
53
+ if a_spell_out != test_input:
54
+ print("\n Character-level changes (spelling):")
55
+ from difflib import SequenceMatcher
56
+ s = SequenceMatcher(None, test_input.split(), a_spell_out.split())
57
+ for tag, i1, i2, j1, j2 in s.get_opcodes():
58
+ if tag != 'equal':
59
+ orig_words = test_input.split()[i1:i2]
60
+ corr_words = a_spell_out.split()[j1:j2]
61
+ print(f" {tag}: [{i1}:{i2}] {orig_words} → [{j1}:{j2}] {corr_words}")
62
+
63
+ print("\n--- Track A: Raw Grammar ---")
64
+ a_gram = api_call("/api/grammar", test_input)
65
+ a_gram_out = a_gram.get("corrected_text", test_input)
66
+ print(f" Input: '{test_input}'")
67
+ print(f" Output: '{a_gram_out}'")
68
+ print(f" Changed: {a_gram_out != test_input}")
69
+
70
+ if a_gram_out != test_input:
71
+ print("\n Character-level changes (grammar):")
72
+ from difflib import SequenceMatcher
73
+ s = SequenceMatcher(None, test_input.split(), a_gram_out.split())
74
+ for tag, i1, i2, j1, j2 in s.get_opcodes():
75
+ if tag != 'equal':
76
+ orig_words = test_input.split()[i1:i2]
77
+ corr_words = a_gram_out.split()[j1:j2]
78
+ print(f" {tag}: [{i1}:{i2}] {orig_words} → [{j1}:{j2}] {corr_words}")
79
+
80
+ # Track B: Full pipeline
81
+ print("\n--- Track B: Full Pipeline ---")
82
+ b = api_call("/api/analyze", test_input)
83
+ b_corrected = b.get("corrected", test_input)
84
+ b_suggestions = b.get("suggestions", [])
85
+ print(f" Input: '{test_input}'")
86
+ print(f" Corrected: '{b_corrected}'")
87
+ print(f" Suggestions: {len(b_suggestions)}")
88
+
89
+ for s in b_suggestions:
90
+ print(f"\n Suggestion [{s.get('start')}:{s.get('end')}]:")
91
+ print(f" Type: {s.get('type')}")
92
+ print(f" Original: '{s.get('original', '')}'")
93
+ print(f" Correction: '{s.get('correction', '')}'")
94
+ if 'confidence' in s:
95
+ print(f" Confidence: {s.get('confidence')}")
96
+
97
+ # Check for duplicates
98
+ print("\n--- Duplicate / Drop Analysis ---")
99
+ output_words = b_corrected.split()
100
+ input_words = test_input.split()
101
+ print(f" Input words: {input_words}")
102
+ print(f" Output words: {output_words}")
103
+
104
+ # Check for duplicated words
105
+ for i, w in enumerate(output_words):
106
+ if i > 0 and w == output_words[i-1]:
107
+ print(f" ⚠ DUPLICATE: '{w}' at positions {i-1} and {i}")
108
+
109
+ # Check for dropped words (الى should appear as الى or إلى)
110
+ for w in input_words:
111
+ # Check if word or a known correction of it appears in output
112
+ found = w in b_corrected
113
+ if not found:
114
+ # Check common corrections
115
+ corrections = {
116
+ 'ان': ['أن', 'إن', 'ان'],
117
+ 'الى': ['إلى', 'الى'],
118
+ 'الجامعه': ['الجامعة', 'الجامعه'],
119
+ 'ذهبو': ['ذهبوا', 'ذهبن', 'ذهبو'],
120
+ 'الطالبات': ['الطالبات'],
121
+ }
122
+ alts = corrections.get(w, [w])
123
+ found = any(a in b_corrected for a in alts)
124
+ if not found:
125
+ print(f" ⚠ DROPPED: '{w}' not found in corrected output!")
126
+ else:
127
+ print(f" ✓ '{w}' present (or corrected variant)")
128
+
129
+ # Overlap analysis between suggestions
130
+ print("\n--- Overlap Analysis ---")
131
+ for i, s1 in enumerate(b_suggestions):
132
+ for j, s2 in enumerate(b_suggestions):
133
+ if j <= i:
134
+ continue
135
+ s1_start, s1_end = s1.get('start', 0), s1.get('end', 0)
136
+ s2_start, s2_end = s2.get('start', 0), s2.get('end', 0)
137
+ if s1_start < s2_end and s2_start < s1_end:
138
+ print(f" ⚠ OVERLAP: suggestion {i} [{s1_start}:{s1_end}] and suggestion {j} [{s2_start}:{s2_end}]")
139
+ print(f" S{i}: '{s1.get('original','')}' → '{s1.get('correction','')}' ({s1.get('type')})")
140
+ print(f" S{j}: '{s2.get('original','')}' → '{s2.get('correction','')}' ({s2.get('type')})")
141
+ if not any(
142
+ s1.get('start', 0) < s2.get('end', 0) and s2.get('start', 0) < s1.get('end', 0)
143
+ for i, s1 in enumerate(b_suggestions) for j, s2 in enumerate(b_suggestions) if j > i
144
+ ):
145
+ print(" ✓ No overlapping suggestions found")
146
+
147
+ return {
148
+ "input": test_input,
149
+ "raw_spelling": a_spell_out,
150
+ "raw_grammar": a_gram_out,
151
+ "pipeline_corrected": b_corrected,
152
+ "suggestions": b_suggestions,
153
+ }
154
+
155
+
156
+ if __name__ == "__main__":
157
+ result = investigate_bug_016()
158
+ output_path = os.path.join(os.path.dirname(__file__), 'phase5_results.json')
159
+ with open(output_path, 'w', encoding='utf-8') as f:
160
+ json.dump(result, f, ensure_ascii=False, indent=2)
161
+ print(f"\nResults saved to {output_path}")
tests/phase5_results.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "input": "ان الطالبات ذهبو الى الجامعه",
3
+ "raw_spelling": "ان الط ابت ذهبوا إلى الجامعه",
4
+ "raw_grammar": "إن الطالبات ذهبن إلى الجامعة",
5
+ "pipeline_corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
6
+ "suggestions": [
7
+ {
8
+ "alternatives": [],
9
+ "confidence": 1.0,
10
+ "correction": "ذهبن",
11
+ "end": 16,
12
+ "id": "f303a4d8-1369-43f7-8ad5-209c03d4af24",
13
+ "locked": true,
14
+ "original": "ذهبو",
15
+ "priority": 3,
16
+ "start": 12,
17
+ "type": "grammar"
18
+ },
19
+ {
20
+ "alternatives": [],
21
+ "confidence": 0.8,
22
+ "correction": "الجامعة.",
23
+ "end": 28,
24
+ "id": "ece1c300-e501-44dc-9ef2-907b47785145",
25
+ "locked": true,
26
+ "original": "الجامعه",
27
+ "priority": 2,
28
+ "start": 21,
29
+ "type": "punctuation"
30
+ },
31
+ {
32
+ "alternatives": [],
33
+ "confidence": 1.0,
34
+ "correction": "إن",
35
+ "end": 2,
36
+ "id": "aa123654-bb3a-46ab-aa3c-7cea6dc4955b",
37
+ "locked": true,
38
+ "original": "ان",
39
+ "priority": 1,
40
+ "start": 0,
41
+ "type": "spelling"
42
+ },
43
+ {
44
+ "alternatives": [
45
+ "ذهبوا",
46
+ "ال",
47
+ "الم",
48
+ "الى"
49
+ ],
50
+ "confidence": 0.9,
51
+ "correction": "ذهبوا",
52
+ "end": 20,
53
+ "id": "bf07637f-0432-4311-aab1-77f521718214",
54
+ "locked": true,
55
+ "original": "الى",
56
+ "priority": 1,
57
+ "start": 17,
58
+ "type": "spelling"
59
+ }
60
+ ]
61
+ }
tests/phase9_results.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test_count": 5,
3
+ "all_pass": true,
4
+ "contamination_found": false,
5
+ "results": {
6
+ "CONC-3": {
7
+ "corrected": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة؛ في بناء كتلة عضلية قوية ويا له، من التزام حديدي يثير الإعجاب ولقد كان! أداؤه في المسابقات الأخيرة مبهرا للغاية",
8
+ "suggestions_count": 3
9
+ },
10
+ "CONC-1": {
11
+ "corrected": "الحديقه جميلة.",
12
+ "suggestions_count": 1
13
+ },
14
+ "CONC-2": {
15
+ "corrected": "الطلاب ذهبوا إلى المدرسة.",
16
+ "suggestions_count": 3
17
+ },
18
+ "CONC-5": {
19
+ "corrected": "هذه المدينة جميلة جدا ومناخها معتدل.",
20
+ "suggestions_count": 2
21
+ },
22
+ "CONC-4": {
23
+ "corrected": "القاهرة عاصمة مصر.",
24
+ "suggestions_count": 1
25
+ }
26
+ }
27
+ }
tests/round2_b2_b3_live.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "b2": [
3
+ {
4
+ "id": "BUG-006",
5
+ "word": "اهم",
6
+ "input": "هذا اهم شيء في الحياة",
7
+ "corrected": "هذا اهم شيء في الحياة.",
8
+ "status": "✅ PRESERVED",
9
+ "targeting_suggestions": 0
10
+ },
11
+ {
12
+ "id": "BUG-009",
13
+ "word": "قرأ",
14
+ "input": "قرأ الطالب الكتاب",
15
+ "corrected": "قرأ الطالب الكتاب.",
16
+ "status": "✅ PRESERVED",
17
+ "targeting_suggestions": 0
18
+ },
19
+ {
20
+ "id": "BUG-010",
21
+ "word": "مشى",
22
+ "input": "مشى الرجل إلى البيت",
23
+ "corrected": "مشى الرجل إلى البيت.",
24
+ "status": "✅ PRESERVED",
25
+ "targeting_suggestions": 0
26
+ },
27
+ {
28
+ "id": "BUG-013",
29
+ "word": "خطأ",
30
+ "input": "وقع في خطأ كبير",
31
+ "corrected": "وقع في خطأ كبير.",
32
+ "status": "✅ PRESERVED",
33
+ "targeting_suggestions": 0
34
+ }
35
+ ],
36
+ "b3_note": "Deployed API lacks Round 2 fixes"
37
+ }
tests/round2_b2_b3_live.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Round 2 — B2 Live API Test
3
+ Tests BUG-006/009/010/013 through the DEPLOYED pipeline to verify
4
+ whether the existing mechanism catches common-word substitutions.
5
+ """
6
+ import requests, json, time, os
7
+
8
+ API_BASE = "https://bayan10-bayan-api.hf.space"
9
+ TIMEOUT = 60
10
+
11
+ def api_call(endpoint, text):
12
+ try:
13
+ t0 = time.time()
14
+ resp = requests.post(f"{API_BASE}{endpoint}", json={"text": text}, timeout=TIMEOUT)
15
+ elapsed = int((time.time() - t0) * 1000)
16
+ if resp.status_code == 200:
17
+ data = resp.json()
18
+ data['_elapsed_ms'] = elapsed
19
+ return data
20
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
21
+ except Exception as e:
22
+ return {"error": str(e)}
23
+
24
+ print("=" * 70)
25
+ print("B2 LIVE TEST: Common-word substitution via /api/analyze")
26
+ print("=" * 70)
27
+
28
+ # Test each BUG in sentence context
29
+ tests = [
30
+ {"id": "BUG-006", "sentence": "هذا اهم شيء في الحياة", "word": "اهم",
31
+ "bad_correction": "مهم", "concern": "اهم must NOT become مهم"},
32
+ {"id": "BUG-009", "sentence": "قرأ الطالب الكتاب", "word": "قرأ",
33
+ "bad_correction": "قرا", "concern": "قرأ must NOT become قرا"},
34
+ {"id": "BUG-010", "sentence": "مشى الرجل إلى البيت", "word": "مشى",
35
+ "bad_correction": "مضى", "concern": "مشى must NOT become مضى"},
36
+ {"id": "BUG-013", "sentence": "وقع في خطأ كبير", "word": "خطأ",
37
+ "bad_correction": "خطا", "concern": "خطأ must NOT become خطا"},
38
+ ]
39
+
40
+ results = []
41
+ for t in tests:
42
+ r = api_call("/api/analyze", t["sentence"])
43
+ corrected = r.get("corrected", t["sentence"])
44
+ suggestions = r.get("suggestions", [])
45
+
46
+ # Check if the target word was changed
47
+ word_present = t["word"] in corrected
48
+ bad_present = t["bad_correction"] in corrected and t["bad_correction"] not in t["sentence"]
49
+
50
+ # Find suggestions targeting this word
51
+ targeting = [s for s in suggestions if t["word"] in s.get("original", "")
52
+ or t["bad_correction"] in s.get("correction", "")]
53
+
54
+ status = "❌ CORRUPTED" if bad_present else ("✅ PRESERVED" if word_present else "⚠ OTHER")
55
+
56
+ result = {
57
+ "id": t["id"], "word": t["word"], "input": t["sentence"],
58
+ "corrected": corrected, "status": status,
59
+ "targeting_suggestions": len(targeting),
60
+ }
61
+ results.append(result)
62
+
63
+ print(f"\n {t['id']}: {t['concern']}")
64
+ print(f" Input: '{t['sentence']}'")
65
+ print(f" Corrected: '{corrected}'")
66
+ print(f" Status: {status}")
67
+ if targeting:
68
+ for s in targeting:
69
+ conf = s.get('confidence', '?')
70
+ print(f" Suggestion: '{s.get('original','')}' → '{s.get('correction','')}' (conf={conf})")
71
+
72
+ # Also test BUG-014/015 live
73
+ print("\n" + "=" * 70)
74
+ print("B3 LIVE TEST: Suffix corruption via /api/analyze")
75
+ print("=" * 70)
76
+
77
+ b3_tests = [
78
+ {"id": "BUG-014", "sentence": "قرأته بسرعة", "word": "قرأته",
79
+ "bad": "قرأتة", "concern": "قرأته must NOT become قرأتة"},
80
+ {"id": "BUG-015", "sentence": "استوقفني المشهد فتأملته مليا", "word": "فتأملته",
81
+ "bad": "فتأملتة", "concern": "فتأملته must NOT become فتأملتة"},
82
+ ]
83
+
84
+ for t in b3_tests:
85
+ r = api_call("/api/analyze", t["sentence"])
86
+ corrected = r.get("corrected", t["sentence"])
87
+ bad_present = t["bad"] in corrected
88
+ word_present = t["word"] in corrected
89
+ status = "❌ CORRUPTED" if bad_present else ("✅ PRESERVED" if word_present else "⚠ OTHER")
90
+
91
+ print(f"\n {t['id']}: {t['concern']}")
92
+ print(f" Input: '{t['sentence']}'")
93
+ print(f" Corrected: '{corrected}'")
94
+ print(f" Status: {status}")
95
+ print(f" NOTE: Deployed API does NOT have Round 2 fixes yet. "
96
+ f"This tests the CURRENT deployed state.")
97
+
98
+ # Save
99
+ output_path = os.path.join(os.path.dirname(__file__), 'round2_b2_b3_live.json')
100
+ with open(output_path, 'w', encoding='utf-8') as f:
101
+ json.dump({"b2": results, "b3_note": "Deployed API lacks Round 2 fixes"}, f, ensure_ascii=False, indent=2)
102
+ print(f"\nResults saved to {output_path}")
tests/round2_live_tests.py ADDED
@@ -0,0 +1,423 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BAYAN Round 2 — Live API Tests
3
+ Covers: A1 (BUG-032 exact input), A2 (Appendix E FP rate), B1 (31 prefix cases),
4
+ B4 (300/301 boundary), B5 (shadda), B7 (brackets)
5
+ """
6
+ import sys, os, json, time, requests
7
+ from datetime import datetime, timezone
8
+
9
+ API_BASE = "https://bayan10-bayan-api.hf.space"
10
+ TIMEOUT = 120
11
+
12
+ def api_call(endpoint, text, timeout=TIMEOUT):
13
+ url = f"{API_BASE}{endpoint}"
14
+ try:
15
+ t0 = time.time()
16
+ resp = requests.post(url, json={"text": text}, timeout=timeout)
17
+ elapsed = int((time.time() - t0) * 1000)
18
+ if resp.status_code == 200:
19
+ data = resp.json()
20
+ data['_elapsed_ms'] = elapsed
21
+ return data
22
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
23
+ except Exception as e:
24
+ return {"error": f"{type(e).__name__}: {e}"}
25
+
26
+ def log(msg):
27
+ print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
28
+
29
+ results = {"timestamp": datetime.now(timezone.utc).isoformat()}
30
+
31
+ # ═══════════════════════════════════════════════════════════════
32
+ # A1: BUG-032 — Exact 1104-char/187-word Input Re-test
33
+ # ═══════════════════════════════════════════════════════════════
34
+ log("=" * 70)
35
+ log("A1: BUG-032 — Exact Original Input Re-test")
36
+ log("=" * 70)
37
+
38
+ # Exact original text from phase0_investigation.py L100-112
39
+ BUG032_TEXT = (
40
+ "في ظل التطورات التكنولوجية المتسارعة التي يشهدها العالم اليوم أصبح من الضروري "
41
+ "أن نواكب هذه التغييرات ونتكيف معها بشكل فعال حيث تلعب التكنولوجيا دورا محوريا "
42
+ "في مختلف جوانب حياتنا اليومية بدءا من التعليم والصحة وصولا إلى الاقتصاد والسياسة "
43
+ "ولقد أدى الذكاء الاصطناعي إلى تحولات جذرية في طريقة عمل المؤسسات والشركات حيث "
44
+ "باتت الآلات قادرة على أداء مهام كانت حكرا على البشر مما يطرح تساؤلات عديدة حول "
45
+ "مستقبل سوق العمل والوظائف التقليدية كما أن التحول الرقمي فرض على الحكومات والمجتمعات "
46
+ "إعادة النظر في سياساتها التعليمية والاقتصادية لضمان مواكبة هذا التطور السريع وفي هذا "
47
+ "السياق يبرز دور البحث العلمي والابتكار كعاملين أساسيين في دفع عجلة التنمية المستدامة "
48
+ "وتحقيق الرفاهية للمجتمعات البشرية إذ لا يمكن لأي دولة أن تحقق تقدما حقيقيا دون "
49
+ "الاستثمار في العقول البشرية وتوفير بيئة محفزة للإبداع والابتكار ومن هنا تأتي أهمية "
50
+ "التعاون الدولي في مجال البحث العلمي وتبادل الخبرات والمعارف بين الدول المتقدمة والنامية "
51
+ "على حد سواء لتحقيق التنمية الشاملة والمستدامة التي تعود بالنفع على جميع شعوب العالم"
52
+ )
53
+
54
+ log(f" Input: {len(BUG032_TEXT)} chars, {len(BUG032_TEXT.split())} words")
55
+
56
+ # Test 1: Full pipeline
57
+ log(" Running /api/analyze...")
58
+ a1_pipeline = api_call("/api/analyze", BUG032_TEXT)
59
+ log(f" Pipeline: status={a1_pipeline.get('status', 'N/A')}, "
60
+ f"suggestions={len(a1_pipeline.get('suggestions', []))}, "
61
+ f"elapsed={a1_pipeline.get('_elapsed_ms', 'N/A')}ms")
62
+ if 'timing_ms' in a1_pipeline:
63
+ log(f" Timing: {a1_pipeline['timing_ms']}")
64
+ if 'warnings' in a1_pipeline:
65
+ log(f" Warnings: {a1_pipeline['warnings']}")
66
+
67
+ # Test 2: Individual endpoints for timing reconciliation
68
+ log(" Running /api/spelling (expect timeout or skip)...")
69
+ a1_spell = api_call("/api/spelling", BUG032_TEXT, timeout=120)
70
+ log(f" Spelling: elapsed={a1_spell.get('_elapsed_ms', 'N/A')}ms, "
71
+ f"error={a1_spell.get('error', 'none')}")
72
+
73
+ log(" Running /api/grammar...")
74
+ a1_gram = api_call("/api/grammar", BUG032_TEXT, timeout=120)
75
+ log(f" Grammar: elapsed={a1_gram.get('_elapsed_ms', 'N/A')}ms, "
76
+ f"changed={a1_gram.get('corrected_text', BUG032_TEXT) != BUG032_TEXT}")
77
+
78
+ log(" Running /api/punctuation...")
79
+ a1_punc = api_call("/api/punctuation", BUG032_TEXT, timeout=120)
80
+ log(f" Punctuation: elapsed={a1_punc.get('_elapsed_ms', 'N/A')}ms, "
81
+ f"changed={a1_punc.get('corrected_text', BUG032_TEXT) != BUG032_TEXT}")
82
+
83
+ results['a1_bug032'] = {
84
+ 'input_chars': len(BUG032_TEXT),
85
+ 'input_words': len(BUG032_TEXT.split()),
86
+ 'pipeline': {
87
+ 'status': a1_pipeline.get('status'),
88
+ 'suggestions': len(a1_pipeline.get('suggestions', [])),
89
+ 'elapsed_ms': a1_pipeline.get('_elapsed_ms'),
90
+ 'timing_ms': a1_pipeline.get('timing_ms'),
91
+ 'warnings': a1_pipeline.get('warnings'),
92
+ },
93
+ 'spelling': {'elapsed_ms': a1_spell.get('_elapsed_ms'), 'error': a1_spell.get('error')},
94
+ 'grammar': {'elapsed_ms': a1_gram.get('_elapsed_ms')},
95
+ 'punctuation': {'elapsed_ms': a1_punc.get('_elapsed_ms')},
96
+ }
97
+
98
+ # ═══════════════════════════════════════════════════════════════
99
+ # A2: Appendix E FP Rate — EXACT Original Sentences
100
+ # ═══════════════════════════════════════════════════════════════
101
+ log("\n" + "=" * 70)
102
+ log("A2: Appendix E — Exact Original Sentences FP Rate")
103
+ log("=" * 70)
104
+
105
+ # EXACT sentences from deep_dive_gaps.py L260-271
106
+ APPENDIX_E_ORIGINAL = [
107
+ {"id": "R-01", "sentence": "استوقفني المشهد فتأملته مليا", "domain": "literary"},
108
+ {"id": "R-02", "sentence": "تستأثر القوى العظمى بالنفوذ الدولي", "domain": "political"},
109
+ {"id": "R-03", "sentence": "استقطب المؤتمر ثلة من العلماء الأفذاذ", "domain": "formal"},
110
+ {"id": "R-04", "sentence": "يتسنى للمرء أن يستشف الحقيقة من بين السطور", "domain": "literary_verb"},
111
+ {"id": "R-05", "sentence": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين", "domain": "oratory"},
112
+ {"id": "R-06", "sentence": "تمخض الاجتماع عن قرارات مصيرية", "domain": "formal_verb"},
113
+ {"id": "R-07", "sentence": "أرهقته المسغبة فاستكان للقدر", "domain": "classical"},
114
+ {"id": "R-08", "sentence": "نستشرف آفاق المستقبل بثقة واقتدار", "domain": "formal_speech"},
115
+ {"id": "R-09", "sentence": "اعتراه القلق فتملكه الأرق", "domain": "literary_psych"},
116
+ {"id": "R-10", "sentence": "استأنف العمل بعد فترة من التقاعس", "domain": "formal_verb"},
117
+ ]
118
+
119
+ # Constructed sentences from gap_filling_tests.py (for comparison)
120
+ CONSTRUCTED_SENTENCES = [
121
+ {"id": "R-01c", "sentence": "المدينة العصماء تحتضن آلاف السكان", "word": "العصماء"},
122
+ {"id": "R-02c", "sentence": "يستشف الباحث نتائج الدراسة بعناية", "word": "يستشف"},
123
+ {"id": "R-03c", "sentence": "أرهقته المسغبة والعطش الشديد", "word": "المسغبة"},
124
+ {"id": "R-04c", "sentence": "التقاعس عن العمل يؤدي إلى الفشل", "word": "التقاعس"},
125
+ {"id": "R-05c", "sentence": "استئثار السلطة يهدد الديمقراطية", "word": "استئثار"},
126
+ {"id": "R-06c", "sentence": "تبجيل العلماء واجب على المجتمع", "word": "تبجيل"},
127
+ {"id": "R-07c", "sentence": "الرجل الدمث يحبه الجميع", "word": "الدمث"},
128
+ {"id": "R-08c", "sentence": "استقصاء الحقائق مهم في الصحافة", "word": "استقصاء"},
129
+ {"id": "R-09c", "sentence": "لا يجوز التواني في طلب العلم", "word": "التواني"},
130
+ {"id": "R-10c", "sentence": "كتاب المستطرف من أمهات الكتب العربية", "word": "المستطرف"},
131
+ ]
132
+
133
+ def test_fp_set(name, items):
134
+ fp_count = 0
135
+ item_results = []
136
+ for item in items:
137
+ text = item["sentence"]
138
+ r = api_call("/api/analyze", text)
139
+ corrected = r.get("corrected", text)
140
+ suggestions = r.get("suggestions", [])
141
+ changed = corrected != text
142
+ if changed:
143
+ fp_count += 1
144
+ result = {
145
+ "id": item["id"], "input": text, "corrected": corrected,
146
+ "changed": changed, "suggestion_count": len(suggestions),
147
+ }
148
+ item_results.append(result)
149
+ status = "❌ CHANGED" if changed else "✅ PRESERVED"
150
+ log(f" {item['id']}: {status}")
151
+ if changed:
152
+ log(f" Input: '{text[:60]}'")
153
+ log(f" Corrected: '{corrected[:60]}'")
154
+ for s in suggestions:
155
+ log(f" [{s.get('type','')}] '{s.get('original','')}' → '{s.get('correction','')}'")
156
+ log(f"\n {name} FP rate: {fp_count}/{len(items)} = {fp_count*100//len(items)}%")
157
+ return {"fp_count": fp_count, "total": len(items), "fp_rate": f"{fp_count*100//len(items)}%", "results": item_results}
158
+
159
+ log("\n --- Original Appendix E sentences ---")
160
+ a2_original = test_fp_set("Original Appendix E", APPENDIX_E_ORIGINAL)
161
+
162
+ log("\n --- Constructed sentences (comparison) ---")
163
+ a2_constructed = test_fp_set("Constructed", CONSTRUCTED_SENTENCES)
164
+
165
+ results['a2_fp_rate'] = {
166
+ 'original_appendix_e': a2_original,
167
+ 'constructed_sentences': a2_constructed,
168
+ }
169
+
170
+ # ═══════════════════════════════════════════════════════════════
171
+ # B1: All 31 Prefix/Clitic Cases through Pipeline
172
+ # ═══════════════════════════════════════════════════════════════
173
+ log("\n" + "=" * 70)
174
+ log("B1: All 31 Prefix/Clitic Cases through Pipeline")
175
+ log("=" * 70)
176
+
177
+ CAT8_ROOTS = ['مدرسة', 'شمس', 'أمة', 'نافذة', 'علم', 'اقتصاد']
178
+ CAT8_PREFIXES = [("bare", ""), ("wa", "و"), ("ba", "ب"), ("la", "ل"), ("ka", "ك")]
179
+
180
+ b1_total = 0
181
+ b1_blocked = 0
182
+ b1_leaked = 0
183
+ b1_unchanged = 0
184
+ b1_results = []
185
+
186
+ for root in CAT8_ROOTS:
187
+ for pfx_name, pfx in CAT8_PREFIXES:
188
+ word = pfx + root
189
+ # Put word in a minimal sentence context
190
+ sentence = f"{word} مهم جدا"
191
+ r = api_call("/api/analyze", sentence)
192
+ corrected = r.get("corrected", sentence)
193
+ suggestions = r.get("suggestions", [])
194
+
195
+ # Check if the word was changed
196
+ word_in_corrected = word in corrected
197
+ word_changed = not word_in_corrected
198
+
199
+ # Find suggestions targeting this word
200
+ targeting = [s for s in suggestions if s.get("original", "").strip() == word
201
+ or word in s.get("original", "")]
202
+
203
+ b1_total += 1
204
+ if not word_changed and not targeting:
205
+ b1_unchanged += 1
206
+ status = "✅ PRESERVED"
207
+ elif targeting and not word_changed:
208
+ # Suggestion exists but wasn't applied (dampened?)
209
+ conf = targeting[0].get('confidence', '?')
210
+ if conf and float(str(conf)) < 0.9:
211
+ b1_blocked += 1
212
+ status = f"✅ DAMPENED (conf={conf})"
213
+ else:
214
+ b1_leaked += 1
215
+ status = f"⚠ LEAKED (conf={conf})"
216
+ elif word_changed:
217
+ b1_leaked += 1
218
+ status = "❌ CHANGED"
219
+ else:
220
+ b1_unchanged += 1
221
+ status = "✅ OK"
222
+
223
+ result = {
224
+ "word": word, "root": root, "prefix": pfx_name,
225
+ "input": sentence, "corrected": corrected,
226
+ "word_preserved": word_in_corrected,
227
+ "targeting_suggestions": len(targeting),
228
+ "status": status,
229
+ }
230
+ b1_results.append(result)
231
+ log(f" {word:12s} ({pfx_name:4s}+{root}): {status}")
232
+ if word_changed:
233
+ log(f" Input: '{sentence}'")
234
+ log(f" Corrected: '{corrected}'")
235
+
236
+ # BUG-021: ولذالك (case 31)
237
+ sentence_31 = "ولذالك يجب الاهتمام"
238
+ r31 = api_call("/api/analyze", sentence_31)
239
+ corrected_31 = r31.get("corrected", sentence_31)
240
+ word_31 = "ولذالك"
241
+ word_31_ok = "ولذلك" in corrected_31
242
+ bad_split_31 = "ولذا ذلك" in corrected_31
243
+ b1_total += 1
244
+
245
+ if word_31_ok:
246
+ status_31 = "✅ CORRECTED (ولذالك→ولذلك)"
247
+ b1_blocked += 1
248
+ elif bad_split_31:
249
+ status_31 = "❌ BAD SPLIT (ولذا ذلك)"
250
+ b1_leaked += 1
251
+ elif word_31 in corrected_31:
252
+ status_31 = "⚠ UNCHANGED (misspelling preserved)"
253
+ b1_unchanged += 1
254
+ else:
255
+ status_31 = f"⚠ OTHER: '{corrected_31}'"
256
+ b1_leaked += 1
257
+
258
+ b1_results.append({"word": word_31, "input": sentence_31, "corrected": corrected_31, "status": status_31})
259
+ log(f" {'ولذالك':12s} (BUG-021): {status_31}")
260
+
261
+ log(f"\n Total: {b1_total}, Preserved: {b1_unchanged}, Blocked/Dampened: {b1_blocked}, Leaked: {b1_leaked}")
262
+
263
+ results['b1_prefix'] = {
264
+ 'total': b1_total, 'unchanged': b1_unchanged,
265
+ 'blocked': b1_blocked, 'leaked': b1_leaked,
266
+ 'results': b1_results,
267
+ }
268
+
269
+ # ═══════════════════════════════════════════════════════════════
270
+ # B4: 300/301 Boundary + Repetitive Text
271
+ # ═══════════════════════════════════════════════════════════════
272
+ log("\n" + "=" * 70)
273
+ log("B4: 300/301 Boundary + Repetitive Text")
274
+ log("=" * 70)
275
+
276
+ # Test 1: Exact boundary (300 chars vs 301 chars)
277
+ base = "ذهب الولد الى المدرسه وقابل المعلمه " # ~37 chars with errors
278
+ text_300 = (base * 10)[:300]
279
+ text_301 = (base * 10)[:301]
280
+ log(f" 300-char text: {len(text_300)} chars")
281
+ log(f" 301-char text: {len(text_301)} chars")
282
+
283
+ r300 = api_call("/api/analyze", text_300)
284
+ r301 = api_call("/api/analyze", text_301)
285
+
286
+ s300 = r300.get("suggestions", [])
287
+ s301 = r301.get("suggestions", [])
288
+ s300_types = {s.get('type') for s in s300}
289
+ s301_types = {s.get('type') for s in s301}
290
+
291
+ log(f" 300 chars: {len(s300)} suggestions, types={s300_types}")
292
+ log(f" 301 chars: {len(s301)} suggestions, types={s301_types}")
293
+
294
+ # Check if spelling suggestions differ
295
+ s300_spell = [s for s in s300 if s.get('type') == 'spelling']
296
+ s301_spell = [s for s in s301 if s.get('type') == 'spelling']
297
+ log(f" 300 chars spelling: {len(s300_spell)} suggestions")
298
+ log(f" 301 chars spelling: {len(s301_spell)} suggestions")
299
+
300
+ if len(s300_spell) > 0 and len(s301_spell) == 0:
301
+ log(f" ✅ AraSpell skip confirmed: spelling runs at 300, skipped at 301")
302
+ boundary_explanation = "Character count: <=300 runs AraSpell, >300 skips it"
303
+ elif len(s300_spell) == len(s301_spell):
304
+ log(f" ⚠ Same spelling count at both — boundary may not work as expected")
305
+ boundary_explanation = "Boundary NOT working as expected — same results at 300 and 301"
306
+ else:
307
+ log(f" ⚠ Different spelling counts but not the expected pattern")
308
+ boundary_explanation = f"Partial: 300={len(s300_spell)} spell, 301={len(s301_spell)} spell"
309
+
310
+ # Test 2: Repetitive text (مرحبا × 100)
311
+ repetitive = "مرحبا " * 100
312
+ log(f"\n Repetitive text: '{repetitive[:30]}...' ({len(repetitive)} chars)")
313
+ r_rep = api_call("/api/analyze", repetitive)
314
+ rep_corrected = r_rep.get("corrected", "")
315
+ rep_sugg = r_rep.get("suggestions", [])
316
+ rep_status = r_rep.get("status", "")
317
+
318
+ # Check for garbling
319
+ has_garble = any(c in rep_corrected for c in 'صطن') and 'مرحبا' not in rep_corrected[:20]
320
+ log(f" Status: {rep_status}, Suggestions: {len(rep_sugg)}")
321
+ log(f" Corrected starts with: '{rep_corrected[:60]}...'")
322
+ if has_garble:
323
+ log(f" ❌ GARBLED output detected")
324
+ else:
325
+ log(f" ✅ No obvious garbling")
326
+
327
+ results['b4_boundary'] = {
328
+ 'boundary_explanation': boundary_explanation,
329
+ 'test_300': {'chars': 300, 'suggestions': len(s300), 'spelling': len(s300_spell)},
330
+ 'test_301': {'chars': 301, 'suggestions': len(s301), 'spelling': len(s301_spell)},
331
+ 'repetitive': {
332
+ 'input_chars': len(repetitive),
333
+ 'status': rep_status,
334
+ 'suggestions': len(rep_sugg),
335
+ 'garbled': has_garble,
336
+ 'corrected_preview': rep_corrected[:100],
337
+ },
338
+ }
339
+
340
+ # ═══════════════════════════════════════════════════════════════
341
+ # B5: Shadda Duplication Verification
342
+ # ═══════════════════════════════════════════════════════════════
343
+ log("\n" + "=" * 70)
344
+ log("B5: Shadda Duplication — Sentence Context")
345
+ log("=" * 70)
346
+
347
+ shadda_tests = [
348
+ {"input": "إنّ العلم نور", "check": "إنّ", "desc": "إنّ in sentence"},
349
+ {"input": "علمت أنّ الامتحان صعب", "check": "أنّ", "desc": "أنّ in sentence"},
350
+ {"input": "إنّ", "check": "إنّ", "desc": "إنّ in isolation"},
351
+ {"input": "أنّ", "check": "أنّ", "desc": "أنّ in isolation"},
352
+ ]
353
+
354
+ b5_results = []
355
+ for t in shadda_tests:
356
+ r = api_call("/api/spelling", t["input"])
357
+ out = r.get("corrected_text", t["input"])
358
+ duplicated = out.count("إن") >= 2 or out.count("أن") >= 2
359
+ changed = out != t["input"]
360
+ status = "❌ DUPLICATED" if duplicated else ("⚠ CHANGED" if changed else "✅ OK")
361
+ b5_results.append({"input": t["input"], "output": out, "status": status})
362
+ log(f" {t['desc']}: '{t['input']}' → '{out}' {status}")
363
+
364
+ results['b5_shadda'] = b5_results
365
+
366
+ # ═══════════════════════════════════════════════════════════════
367
+ # B7: Unbalanced Brackets
368
+ # ═══════════════════════════════════════════════════════════════
369
+ log("\n" + "=" * 70)
370
+ log("B7: Unbalanced Brackets — E6")
371
+ log("=" * 70)
372
+
373
+ bracket_tests = [
374
+ "(([{هذا النص}]))",
375
+ "({هذا النص})",
376
+ "(هذا النص)",
377
+ "[هذا النص]",
378
+ ]
379
+
380
+ b7_results = []
381
+ for text in bracket_tests:
382
+ r = api_call("/api/analyze", text)
383
+ corrected = r.get("corrected", text)
384
+ suggestions = r.get("suggestions", [])
385
+
386
+ # Count bracket balance
387
+ def bracket_balance(s):
388
+ opens = sum(1 for c in s if c in '([{')
389
+ closes = sum(1 for c in s if c in ')]}')
390
+ return opens, closes, opens == closes
391
+
392
+ in_o, in_c, in_bal = bracket_balance(text)
393
+ out_o, out_c, out_bal = bracket_balance(corrected)
394
+
395
+ result = {
396
+ "input": text, "corrected": corrected,
397
+ "input_balanced": in_bal, "output_balanced": out_bal,
398
+ "suggestions": len(suggestions),
399
+ }
400
+ b7_results.append(result)
401
+
402
+ if not out_bal and in_bal:
403
+ status = "❌ BRACKETS UNBALANCED"
404
+ elif out_bal:
405
+ status = "✅ BRACKETS OK"
406
+ elif not in_bal and not out_bal:
407
+ status = "⚠ BOTH UNBALANCED"
408
+ else:
409
+ status = "✅ FIXED"
410
+
411
+ log(f" '{text}' → '{corrected}' {status}")
412
+ log(f" Input: {in_o} opens, {in_c} closes, balanced={in_bal}")
413
+ log(f" Output: {out_o} opens, {out_c} closes, balanced={out_bal}")
414
+
415
+ results['b7_brackets'] = b7_results
416
+
417
+ # ═══════════════════════════════════════════════════════════════
418
+ # SAVE
419
+ # ═══════════════════════════════════════════════════════════════
420
+ output_path = os.path.join(os.path.dirname(__file__), 'round2_results.json')
421
+ with open(output_path, 'w', encoding='utf-8') as f:
422
+ json.dump(results, f, ensure_ascii=False, indent=2)
423
+ log(f"\nAll results saved to {output_path}")
tests/round2_results.json ADDED
@@ -0,0 +1,571 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "2026-06-20T21:33:09.369155+00:00",
3
+ "a1_bug032": {
4
+ "input_chars": 982,
5
+ "input_words": 159,
6
+ "pipeline": {
7
+ "status": "success",
8
+ "suggestions": 4,
9
+ "elapsed_ms": 29556,
10
+ "timing_ms": {
11
+ "grammar_ms": 13132,
12
+ "punctuation_ms": 15452,
13
+ "spelling_ms": 0,
14
+ "total_ms": 28589
15
+ },
16
+ "warnings": null
17
+ },
18
+ "spelling": {
19
+ "elapsed_ms": 15980,
20
+ "error": null
21
+ },
22
+ "grammar": {
23
+ "elapsed_ms": 13068
24
+ },
25
+ "punctuation": {
26
+ "elapsed_ms": 21901
27
+ }
28
+ },
29
+ "a2_fp_rate": {
30
+ "original_appendix_e": {
31
+ "fp_count": 10,
32
+ "total": 10,
33
+ "fp_rate": "100%",
34
+ "results": [
35
+ {
36
+ "id": "R-01",
37
+ "input": "استوقفني المشهد فتأملته مليا",
38
+ "corrected": "استوقفني المشهد فتأملتة مليا",
39
+ "changed": true,
40
+ "suggestion_count": 1
41
+ },
42
+ {
43
+ "id": "R-02",
44
+ "input": "تستأثر القوى العظمى بالنفوذ الدولي",
45
+ "corrected": "تستأثر القوى العظمى بالنفوذ الدولي.",
46
+ "changed": true,
47
+ "suggestion_count": 1
48
+ },
49
+ {
50
+ "id": "R-03",
51
+ "input": "استقطب المؤتمر ثلة من العلماء الأفذاذ",
52
+ "corrected": "استقطب المؤتمر ثلة من العلماء الأفذاذ.",
53
+ "changed": true,
54
+ "suggestion_count": 1
55
+ },
56
+ {
57
+ "id": "R-04",
58
+ "input": "يتسنى للمرء أن يستشف الحقيقة من بين السطور",
59
+ "corrected": "يتسنى للمرء أن يستشفِ الحقيقة من بين السطور",
60
+ "changed": true,
61
+ "suggestion_count": 1
62
+ },
63
+ {
64
+ "id": "R-05",
65
+ "input": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين",
66
+ "corrected": "ألقى المحاضر خطبة عصماء استحوذت؛ على إعجاب الحاضرين",
67
+ "changed": true,
68
+ "suggestion_count": 1
69
+ },
70
+ {
71
+ "id": "R-06",
72
+ "input": "تمخض الاجتماع عن قرارات مصيرية",
73
+ "corrected": "تمخض الاجتماع عن قرارات مصيرية.",
74
+ "changed": true,
75
+ "suggestion_count": 1
76
+ },
77
+ {
78
+ "id": "R-07",
79
+ "input": "أرهقته المسغبة فاستكان للقدر",
80
+ "corrected": "أرهقته المسغبة فاستكان للقدر.",
81
+ "changed": true,
82
+ "suggestion_count": 1
83
+ },
84
+ {
85
+ "id": "R-08",
86
+ "input": "نستشرف آفاق المستقبل بثقة واقتدار",
87
+ "corrected": "نستشرف آفاق المستقبل بثقة واقتدار.",
88
+ "changed": true,
89
+ "suggestion_count": 1
90
+ },
91
+ {
92
+ "id": "R-09",
93
+ "input": "اعتراه القلق فتملكه الأرق",
94
+ "corrected": "اعتراه القلق فتملكة الأرق.",
95
+ "changed": true,
96
+ "suggestion_count": 2
97
+ },
98
+ {
99
+ "id": "R-10",
100
+ "input": "استأنف العمل بعد فترة من التقاعس",
101
+ "corrected": "استأنف العمل بعد فترة من التقاعس.",
102
+ "changed": true,
103
+ "suggestion_count": 1
104
+ }
105
+ ]
106
+ },
107
+ "constructed_sentences": {
108
+ "fp_count": 10,
109
+ "total": 10,
110
+ "fp_rate": "100%",
111
+ "results": [
112
+ {
113
+ "id": "R-01c",
114
+ "input": "المدينة العصماء تحتضن آلاف السكان",
115
+ "corrected": "المدينة العصماء تحتضن آلاف السكان.",
116
+ "changed": true,
117
+ "suggestion_count": 1
118
+ },
119
+ {
120
+ "id": "R-02c",
121
+ "input": "يستشف الباحث نتائج الدراسة بعناية",
122
+ "corrected": "يستشف الباحث نتائج الدراسة بعناية.",
123
+ "changed": true,
124
+ "suggestion_count": 1
125
+ },
126
+ {
127
+ "id": "R-03c",
128
+ "input": "أرهقته المسغبة والعطش الشديد",
129
+ "corrected": "أرهقته المسغبة والعطش الشديد!",
130
+ "changed": true,
131
+ "suggestion_count": 1
132
+ },
133
+ {
134
+ "id": "R-04c",
135
+ "input": "التقاعس عن العمل يؤدي إلى الفشل",
136
+ "corrected": "التقاعس عن العمل يؤدي إلى الفشل.",
137
+ "changed": true,
138
+ "suggestion_count": 1
139
+ },
140
+ {
141
+ "id": "R-05c",
142
+ "input": "استئثار السلطة يهدد الديمقراطية",
143
+ "corrected": "استئثار السلطة يهدد الديمقراطية.",
144
+ "changed": true,
145
+ "suggestion_count": 1
146
+ },
147
+ {
148
+ "id": "R-06c",
149
+ "input": "تبجيل العلماء واجب على المجتمع",
150
+ "corrected": "تبجيل العلماء واجب على المجتمع.",
151
+ "changed": true,
152
+ "suggestion_count": 1
153
+ },
154
+ {
155
+ "id": "R-07c",
156
+ "input": "الرجل الدمث يحبه الجميع",
157
+ "corrected": "الرجل الدم يحبة الجميع.",
158
+ "changed": true,
159
+ "suggestion_count": 3
160
+ },
161
+ {
162
+ "id": "R-08c",
163
+ "input": "استقصاء الحقائق مهم في الصحافة",
164
+ "corrected": "استقصاء الحقائق مهم في الصحافة.",
165
+ "changed": true,
166
+ "suggestion_count": 1
167
+ },
168
+ {
169
+ "id": "R-09c",
170
+ "input": "لا يجوز التواني في طلب العلم",
171
+ "corrected": "لا يجوز التواني في طلب العلم.",
172
+ "changed": true,
173
+ "suggestion_count": 1
174
+ },
175
+ {
176
+ "id": "R-10c",
177
+ "input": "كتاب المستطرف من أمهات الكتب العربية",
178
+ "corrected": "كتاب المستطرف من أمهات الكتب العربية.",
179
+ "changed": true,
180
+ "suggestion_count": 1
181
+ }
182
+ ]
183
+ }
184
+ },
185
+ "b1_prefix": {
186
+ "total": 31,
187
+ "unchanged": 31,
188
+ "blocked": 0,
189
+ "leaked": 0,
190
+ "results": [
191
+ {
192
+ "word": "مدرسة",
193
+ "root": "مدرسة",
194
+ "prefix": "bare",
195
+ "input": "مدرسة مهم جدا",
196
+ "corrected": "مدرسة مهمة جدا.",
197
+ "word_preserved": true,
198
+ "targeting_suggestions": 0,
199
+ "status": "✅ PRESERVED"
200
+ },
201
+ {
202
+ "word": "ومدرسة",
203
+ "root": "مدرسة",
204
+ "prefix": "wa",
205
+ "input": "ومدرسة مهم جدا",
206
+ "corrected": "ومدرسة مهمة جدا.",
207
+ "word_preserved": true,
208
+ "targeting_suggestions": 0,
209
+ "status": "✅ PRESERVED"
210
+ },
211
+ {
212
+ "word": "بمدرسة",
213
+ "root": "مدرسة",
214
+ "prefix": "ba",
215
+ "input": "بمدرسة مهم جدا",
216
+ "corrected": "بمدرسة مهمة جدا.",
217
+ "word_preserved": true,
218
+ "targeting_suggestions": 0,
219
+ "status": "✅ PRESERVED"
220
+ },
221
+ {
222
+ "word": "لمدرسة",
223
+ "root": "مدرسة",
224
+ "prefix": "la",
225
+ "input": "لمدرسة مهم جدا",
226
+ "corrected": "لمدرسة مهمة جدا.",
227
+ "word_preserved": true,
228
+ "targeting_suggestions": 0,
229
+ "status": "✅ PRESERVED"
230
+ },
231
+ {
232
+ "word": "كمدرسة",
233
+ "root": "مدرسة",
234
+ "prefix": "ka",
235
+ "input": "كمدرسة مهم جدا",
236
+ "corrected": "كمدرسة مهمة جدا.",
237
+ "word_preserved": true,
238
+ "targeting_suggestions": 0,
239
+ "status": "✅ PRESERVED"
240
+ },
241
+ {
242
+ "word": "شمس",
243
+ "root": "شمس",
244
+ "prefix": "bare",
245
+ "input": "شمس مهم جدا",
246
+ "corrected": "شمس مهمة جدا.",
247
+ "word_preserved": true,
248
+ "targeting_suggestions": 0,
249
+ "status": "✅ PRESERVED"
250
+ },
251
+ {
252
+ "word": "وشمس",
253
+ "root": "شمس",
254
+ "prefix": "wa",
255
+ "input": "وشمس مهم جدا",
256
+ "corrected": "وشمس مهمة جدا.",
257
+ "word_preserved": true,
258
+ "targeting_suggestions": 0,
259
+ "status": "✅ PRESERVED"
260
+ },
261
+ {
262
+ "word": "بشمس",
263
+ "root": "شمس",
264
+ "prefix": "ba",
265
+ "input": "بشمس مهم جدا",
266
+ "corrected": "بشمس مهمة جدا.",
267
+ "word_preserved": true,
268
+ "targeting_suggestions": 0,
269
+ "status": "✅ PRESERVED"
270
+ },
271
+ {
272
+ "word": "لشمس",
273
+ "root": "شمس",
274
+ "prefix": "la",
275
+ "input": "لشمس مهم جدا",
276
+ "corrected": "لشمس مهم جدا.",
277
+ "word_preserved": true,
278
+ "targeting_suggestions": 0,
279
+ "status": "✅ PRESERVED"
280
+ },
281
+ {
282
+ "word": "كشمس",
283
+ "root": "شمس",
284
+ "prefix": "ka",
285
+ "input": "كشمس مهم جدا",
286
+ "corrected": "كشمس مهمة جدا.",
287
+ "word_preserved": true,
288
+ "targeting_suggestions": 0,
289
+ "status": "✅ PRESERVED"
290
+ },
291
+ {
292
+ "word": "أمة",
293
+ "root": "أمة",
294
+ "prefix": "bare",
295
+ "input": "أمة مهم جدا",
296
+ "corrected": "أمة مهمة جدا؟",
297
+ "word_preserved": true,
298
+ "targeting_suggestions": 0,
299
+ "status": "✅ PRESERVED"
300
+ },
301
+ {
302
+ "word": "وأمة",
303
+ "root": "أمة",
304
+ "prefix": "wa",
305
+ "input": "وأمة مهم جدا",
306
+ "corrected": "وأمة مهمة جدا.",
307
+ "word_preserved": true,
308
+ "targeting_suggestions": 0,
309
+ "status": "✅ PRESERVED"
310
+ },
311
+ {
312
+ "word": "بأمة",
313
+ "root": "أمة",
314
+ "prefix": "ba",
315
+ "input": "بأمة مهم جدا",
316
+ "corrected": "بأمة مهمة جدا؟",
317
+ "word_preserved": true,
318
+ "targeting_suggestions": 0,
319
+ "status": "✅ PRESERVED"
320
+ },
321
+ {
322
+ "word": "لأمة",
323
+ "root": "أمة",
324
+ "prefix": "la",
325
+ "input": "لأمة مهم جدا",
326
+ "corrected": "لأمة مهمة جدا.",
327
+ "word_preserved": true,
328
+ "targeting_suggestions": 0,
329
+ "status": "✅ PRESERVED"
330
+ },
331
+ {
332
+ "word": "كأمة",
333
+ "root": "أمة",
334
+ "prefix": "ka",
335
+ "input": "كأمة مهم جدا",
336
+ "corrected": "كأمة مهمة جدا.",
337
+ "word_preserved": true,
338
+ "targeting_suggestions": 0,
339
+ "status": "✅ PRESERVED"
340
+ },
341
+ {
342
+ "word": "نافذة",
343
+ "root": "نافذة",
344
+ "prefix": "bare",
345
+ "input": "نافذة مهم جدا",
346
+ "corrected": "نافذة مهمة جدا.",
347
+ "word_preserved": true,
348
+ "targeting_suggestions": 0,
349
+ "status": "✅ PRESERVED"
350
+ },
351
+ {
352
+ "word": "ونافذة",
353
+ "root": "نافذة",
354
+ "prefix": "wa",
355
+ "input": "ونافذة مهم جدا",
356
+ "corrected": "ونافذة مهمة جدا.",
357
+ "word_preserved": true,
358
+ "targeting_suggestions": 0,
359
+ "status": "✅ PRESERVED"
360
+ },
361
+ {
362
+ "word": "بنافذة",
363
+ "root": "نافذة",
364
+ "prefix": "ba",
365
+ "input": "بنافذة مهم جدا",
366
+ "corrected": "بنافذة مهمة جدا.",
367
+ "word_preserved": true,
368
+ "targeting_suggestions": 0,
369
+ "status": "✅ PRESERVED"
370
+ },
371
+ {
372
+ "word": "لنافذة",
373
+ "root": "نافذة",
374
+ "prefix": "la",
375
+ "input": "لنافذة مهم جدا",
376
+ "corrected": "لنافذة مهمة جدا.",
377
+ "word_preserved": true,
378
+ "targeting_suggestions": 0,
379
+ "status": "✅ PRESERVED"
380
+ },
381
+ {
382
+ "word": "كنافذة",
383
+ "root": "نافذة",
384
+ "prefix": "ka",
385
+ "input": "كنافذة مهم جدا",
386
+ "corrected": "كنافذة مهمة جدا.",
387
+ "word_preserved": true,
388
+ "targeting_suggestions": 0,
389
+ "status": "✅ PRESERVED"
390
+ },
391
+ {
392
+ "word": "علم",
393
+ "root": "علم",
394
+ "prefix": "bare",
395
+ "input": "علم مهم جدا",
396
+ "corrected": "علم مهم جدا.",
397
+ "word_preserved": true,
398
+ "targeting_suggestions": 0,
399
+ "status": "✅ PRESERVED"
400
+ },
401
+ {
402
+ "word": "وعلم",
403
+ "root": "علم",
404
+ "prefix": "wa",
405
+ "input": "وعلم مهم جدا",
406
+ "corrected": "وعلم مهم جدا.",
407
+ "word_preserved": true,
408
+ "targeting_suggestions": 0,
409
+ "status": "✅ PRESERVED"
410
+ },
411
+ {
412
+ "word": "بعلم",
413
+ "root": "علم",
414
+ "prefix": "ba",
415
+ "input": "بعلم مهم جدا",
416
+ "corrected": "بعلم مهم جدا.",
417
+ "word_preserved": true,
418
+ "targeting_suggestions": 0,
419
+ "status": "✅ PRESERVED"
420
+ },
421
+ {
422
+ "word": "لعلم",
423
+ "root": "علم",
424
+ "prefix": "la",
425
+ "input": "لعلم مهم جدا",
426
+ "corrected": "لعلم مهم جدا.",
427
+ "word_preserved": true,
428
+ "targeting_suggestions": 0,
429
+ "status": "✅ PRESERVED"
430
+ },
431
+ {
432
+ "word": "كعلم",
433
+ "root": "علم",
434
+ "prefix": "ka",
435
+ "input": "كعلم مهم جدا",
436
+ "corrected": "كعلم مهم جدا.",
437
+ "word_preserved": true,
438
+ "targeting_suggestions": 0,
439
+ "status": "✅ PRESERVED"
440
+ },
441
+ {
442
+ "word": "اقتصاد",
443
+ "root": "اقتصاد",
444
+ "prefix": "bare",
445
+ "input": "اقتصاد مهم جدا",
446
+ "corrected": "اقتصاد مهم جدا.",
447
+ "word_preserved": true,
448
+ "targeting_suggestions": 0,
449
+ "status": "✅ PRESERVED"
450
+ },
451
+ {
452
+ "word": "واقتصاد",
453
+ "root": "اقتصاد",
454
+ "prefix": "wa",
455
+ "input": "واقتصاد مهم جدا",
456
+ "corrected": "واقتصاد مهم جدا.",
457
+ "word_preserved": true,
458
+ "targeting_suggestions": 0,
459
+ "status": "✅ PRESERVED"
460
+ },
461
+ {
462
+ "word": "باقتصاد",
463
+ "root": "اقتصاد",
464
+ "prefix": "ba",
465
+ "input": "باقتصاد مهم جدا",
466
+ "corrected": "باقتصاد مهم جدا.",
467
+ "word_preserved": true,
468
+ "targeting_suggestions": 0,
469
+ "status": "✅ PRESERVED"
470
+ },
471
+ {
472
+ "word": "لاقتصاد",
473
+ "root": "اقتصاد",
474
+ "prefix": "la",
475
+ "input": "لاقتصاد مهم جدا",
476
+ "corrected": "لاقتصاد مهم جدا.",
477
+ "word_preserved": true,
478
+ "targeting_suggestions": 0,
479
+ "status": "✅ PRESERVED"
480
+ },
481
+ {
482
+ "word": "كاقتصاد",
483
+ "root": "اقتصاد",
484
+ "prefix": "ka",
485
+ "input": "كاقتصاد مهم جدا",
486
+ "corrected": "كاقتصاد مهم جدا.",
487
+ "word_preserved": true,
488
+ "targeting_suggestions": 0,
489
+ "status": "✅ PRESERVED"
490
+ },
491
+ {
492
+ "word": "ولذالك",
493
+ "input": "ولذالك يجب الاهتمام",
494
+ "corrected": "ولذالك يجب الاهتمام.",
495
+ "status": "⚠ UNCHANGED (misspelling preserved)"
496
+ }
497
+ ]
498
+ },
499
+ "b4_boundary": {
500
+ "boundary_explanation": "Partial: 300=23 spell, 301=2 spell",
501
+ "test_300": {
502
+ "chars": 300,
503
+ "suggestions": 31,
504
+ "spelling": 23
505
+ },
506
+ "test_301": {
507
+ "chars": 301,
508
+ "suggestions": 12,
509
+ "spelling": 2
510
+ },
511
+ "repetitive": {
512
+ "input_chars": 600,
513
+ "status": "success",
514
+ "suggestions": 4,
515
+ "garbled": false,
516
+ "corrected_preview": "مرحبا مرحبا مرحبا ومرحبا مرحبا مرحبامرحبا مرحبا مرحبا مرحب مرحبا مرحبا وسهلا مرحبا مرحبا ترحيبا مرحب"
517
+ }
518
+ },
519
+ "b5_shadda": [
520
+ {
521
+ "input": "إنّ العلم نور",
522
+ "output": "إن العلم نور",
523
+ "status": "⚠ CHANGED"
524
+ },
525
+ {
526
+ "input": "علمت أنّ الامتحان صعب",
527
+ "output": "علمت أن الامتحان صعب",
528
+ "status": "⚠ CHANGED"
529
+ },
530
+ {
531
+ "input": "إنّ",
532
+ "output": "إن إن",
533
+ "status": "❌ DUPLICATED"
534
+ },
535
+ {
536
+ "input": "أنّ",
537
+ "output": "أن أن",
538
+ "status": "❌ DUPLICATED"
539
+ }
540
+ ],
541
+ "b7_brackets": [
542
+ {
543
+ "input": "(([{هذا النص}]))",
544
+ "corrected": "( ( [ { هذا النص } ] ، و",
545
+ "input_balanced": true,
546
+ "output_balanced": false,
547
+ "suggestions": 1
548
+ },
549
+ {
550
+ "input": "({هذا النص})",
551
+ "corrected": "يُعدَّ هذا النصا",
552
+ "input_balanced": true,
553
+ "output_balanced": true,
554
+ "suggestions": 1
555
+ },
556
+ {
557
+ "input": "(هذا النص)",
558
+ "corrected": "هذا النص",
559
+ "input_balanced": true,
560
+ "output_balanced": true,
561
+ "suggestions": 1
562
+ },
563
+ {
564
+ "input": "[هذا النص]",
565
+ "corrected": "هذا النص",
566
+ "input_balanced": true,
567
+ "output_balanced": true,
568
+ "suggestions": 1
569
+ }
570
+ ]
571
+ }
tests/spelling_output.txt ADDED
Binary file (1.79 kB). View file
 
tests/test_bug_fixes.py CHANGED
@@ -657,5 +657,186 @@ class TestSuffixCorruption(unittest.TestCase):
657
  "Verb+pronoun كتبته→كتبتة must be blocked")
658
 
659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
  if __name__ == '__main__':
661
  unittest.main()
 
657
  "Verb+pronoun كتبته→كتبتة must be blocked")
658
 
659
 
660
+ # ═══════════════════════════════════════════════════════════════
661
+ # P2: Ta Marbuta Fix Tests
662
+ # ═══════════════════════════════════════════════════════════════
663
+ class TestTaMarbutaFix(unittest.TestCase):
664
+ """Tests for the _fix_ta_marbuta pipeline function."""
665
+
666
+ @classmethod
667
+ def setUpClass(cls):
668
+ from app import _fix_ta_marbuta, _TA_MARBUTA_DICT
669
+ cls.fix = staticmethod(_fix_ta_marbuta)
670
+ cls.dict = _TA_MARBUTA_DICT
671
+
672
+ def test_basic_fix(self):
673
+ """المدرسه should be corrected to المدرسة."""
674
+ result, changes = self.fix('ذهبت الى المدرسه')
675
+ self.assertIn('المدرسة', result)
676
+ self.assertEqual(len(changes), 1)
677
+ self.assertEqual(changes[0]['original'], 'المدرسه')
678
+ self.assertEqual(changes[0]['correction'], 'المدرسة')
679
+
680
+ def test_multiple_fixes(self):
681
+ """Multiple ta marbuta errors in one sentence."""
682
+ result, changes = self.fix('الحياه في المدينه جميله')
683
+ self.assertIn('الحياة', result)
684
+ self.assertIn('المدينة', result)
685
+ self.assertGreaterEqual(len(changes), 2)
686
+
687
+ def test_no_false_positives(self):
688
+ """Words ending in ه that are NOT ta marbuta should be left alone."""
689
+ result, changes = self.fix('الله أكبر')
690
+ self.assertEqual(result, 'الله أكبر')
691
+ self.assertEqual(len(changes), 0)
692
+
693
+ def test_correct_text_untouched(self):
694
+ """Already correct text should not be changed."""
695
+ result, changes = self.fix('ذهبت إلى المدرسة')
696
+ self.assertEqual(result, 'ذهبت إلى المدرسة')
697
+ self.assertEqual(len(changes), 0)
698
+
699
+ def test_without_alef_lam(self):
700
+ """Bare words without ال should also be fixed."""
701
+ result, changes = self.fix('هذه مدرسه كبيره')
702
+ self.assertIn('مدرسة', result)
703
+
704
+ def test_dict_coverage(self):
705
+ """Dictionary should have significant coverage."""
706
+ self.assertGreater(len(self.dict), 50)
707
+
708
+
709
+ # ═══════════════════════════════════════════════════════════════
710
+ # P2: Hamza Whitelist Tests
711
+ # ═══════════════════════════════════════════════════════════════
712
+ class TestHamzaWhitelist(unittest.TestCase):
713
+ """Tests for hamza fix function."""
714
+
715
+ @classmethod
716
+ def setUpClass(cls):
717
+ try:
718
+ from nlp.spelling.araspell_rules import AraSpellPostProcessor
719
+ cls.fix = staticmethod(AraSpellPostProcessor.fix_common_hamza)
720
+ cls.available = True
721
+ except Exception:
722
+ cls.available = False
723
+
724
+ def test_anta_fix(self):
725
+ """انت should become أنت."""
726
+ if not self.available:
727
+ self.skipTest("AraSpellPostProcessor not available")
728
+ result = self.fix('انت طالب')
729
+ self.assertIn('أنت', result)
730
+
731
+ def test_ana_fix(self):
732
+ """انا should become أنا."""
733
+ if not self.available:
734
+ self.skipTest("AraSpellPostProcessor not available")
735
+ result = self.fix('انا ذاهب')
736
+ self.assertIn('أنا', result)
737
+
738
+ def test_alaan_fix(self):
739
+ """الان should become الآن."""
740
+ if not self.available:
741
+ self.skipTest("AraSpellPostProcessor not available")
742
+ result = self.fix('اذهب الان')
743
+ self.assertIn('الآن', result)
744
+
745
+ def test_correct_hamza_untouched(self):
746
+ """Already correct hamza should not be changed."""
747
+ if not self.available:
748
+ self.skipTest("AraSpellPostProcessor not available")
749
+ result = self.fix('أنت ذاهب إلى المدرسة')
750
+ self.assertEqual(result, 'أنت ذاهب إلى المدرسة')
751
+
752
+
753
+ # ═══════════════════════════════════════════════════════════════
754
+ # P3: Caching & Rate Limiting Tests
755
+ # ═══════════════════════════════════════════════════════════════
756
+ class TestCachingAndRateLimiting(unittest.TestCase):
757
+ """Tests for response caching and rate limiting."""
758
+
759
+ @classmethod
760
+ def setUpClass(cls):
761
+ from app import (
762
+ _get_cache_key, _get_cached_response,
763
+ _set_cached_response, _check_rate_limit,
764
+ _analyze_cache, _rate_limit_store
765
+ )
766
+ cls._get_cache_key = staticmethod(_get_cache_key)
767
+ cls._get_cached = staticmethod(_get_cached_response)
768
+ cls._set_cached = staticmethod(_set_cached_response)
769
+ cls._check_rate = staticmethod(_check_rate_limit)
770
+ cls._cache = _analyze_cache
771
+ cls._rate_store = _rate_limit_store
772
+
773
+ def setUp(self):
774
+ self._cache.clear()
775
+ self._rate_store.clear()
776
+
777
+ def test_cache_key_deterministic(self):
778
+ """Same text should produce same cache key."""
779
+ key1 = self._get_cache_key('مرحبا')
780
+ key2 = self._get_cache_key('مرحبا')
781
+ self.assertEqual(key1, key2)
782
+
783
+ def test_cache_key_different(self):
784
+ """Different texts should produce different keys."""
785
+ key1 = self._get_cache_key('مرحبا')
786
+ key2 = self._get_cache_key('أهلا')
787
+ self.assertNotEqual(key1, key2)
788
+
789
+ def test_cache_store_and_retrieve(self):
790
+ """Cached response should be retrievable."""
791
+ data = {'original': 'test', 'corrected': 'test', 'suggestions': []}
792
+ self._set_cached('مرحبا', data)
793
+ result = self._get_cached('مرحبا')
794
+ self.assertIsNotNone(result)
795
+ self.assertEqual(result['original'], 'test')
796
+
797
+ def test_cache_miss(self):
798
+ """Non-cached text should return None."""
799
+ result = self._get_cached('نص جديد')
800
+ self.assertIsNone(result)
801
+
802
+ def test_rate_limit_allows(self):
803
+ """First request should be allowed."""
804
+ self.assertTrue(self._check_rate('127.0.0.1'))
805
+
806
+ def test_rate_limit_blocks(self):
807
+ """Should block after exceeding limit."""
808
+ for _ in range(30):
809
+ self._check_rate('test_ip')
810
+ self.assertFalse(self._check_rate('test_ip'))
811
+
812
+
813
+ # ═══════════════════════════════════════════════════════════════
814
+ # P2: Grammar Splitting Tests
815
+ # ═══════════════════════════════════════════════════════════════
816
+ class TestGrammarSplitting(unittest.TestCase):
817
+ """Tests for grammar multi-word diff splitting logic."""
818
+
819
+ def test_split_logic(self):
820
+ """Multi-word grammar diffs should be split into individual words."""
821
+ # Simulate the splitting logic from analyze_text
822
+ orig_text = 'الي المدرسه الاستاذ'
823
+ corr_text = 'إلى المدرسة الأستاذ'
824
+ orig_words = orig_text.split()
825
+ corr_words = corr_text.split()
826
+
827
+ self.assertEqual(len(orig_words), len(corr_words))
828
+
829
+ diffs = []
830
+ for ow, cw in zip(orig_words, corr_words):
831
+ if ow != cw:
832
+ diffs.append({'original': ow, 'correction': cw})
833
+
834
+ self.assertEqual(len(diffs), 3)
835
+ self.assertEqual(diffs[0]['original'], 'الي')
836
+ self.assertEqual(diffs[0]['correction'], 'إلى')
837
+ self.assertEqual(diffs[1]['original'], 'المدرسه')
838
+ self.assertEqual(diffs[1]['correction'], 'المدرسة')
839
+
840
+
841
  if __name__ == '__main__':
842
  unittest.main()
tests/test_concurrency.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Phase 9 — Concurrency Re-verification
3
+
4
+ Send 5 genuinely different inputs simultaneously.
5
+ Verify each response correctly corresponds to its own input.
6
+ No mixed, swapped, or cross-contaminated suggestions.
7
+
8
+ If cross-contamination is found, treat as P0 bug.
9
+ """
10
+ import sys, os, json, time, requests
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+
13
+ API_BASE = "https://bayan10-bayan-api.hf.space"
14
+ TIMEOUT = 60
15
+
16
+ def api_call(endpoint, text):
17
+ url = f"{API_BASE}{endpoint}"
18
+ try:
19
+ t0 = time.time()
20
+ resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
21
+ elapsed = int((time.time() - t0) * 1000)
22
+ if resp.status_code == 200:
23
+ data = resp.json()
24
+ data['_elapsed_ms'] = elapsed
25
+ return data
26
+ return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
27
+ except Exception as e:
28
+ return {"error": f"{type(e).__name__}: {e}"}
29
+
30
+
31
+ # 5 genuinely different inputs — different lengths, different error types
32
+ CONCURRENT_INPUTS = [
33
+ {
34
+ "id": "CONC-1",
35
+ "text": "الحديقه جميله",
36
+ "description": "Short text with spelling error (ه→ة)",
37
+ "expected_contains": "الحديق", # at least part of the input
38
+ "must_not_contain_from_others": ["المدرسة", "القاهرة", "مصر"],
39
+ },
40
+ {
41
+ "id": "CONC-2",
42
+ "text": "الطلاب ذهبو الى المدرسة",
43
+ "description": "Medium text with grammar error (ذهبو→ذهبوا)",
44
+ "expected_contains": "المدرسة",
45
+ "must_not_contain_from_others": ["الحديق", "القاهرة عاصمة"],
46
+ },
47
+ {
48
+ "id": "CONC-3",
49
+ "text": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب ولقد كان أداؤه في المسابقات الأخيرة مبهرا للغاية",
50
+ "description": "Long text with punctuation needed (50+ words)",
51
+ "expected_contains": "الرياضي",
52
+ "must_not_contain_from_others": ["الحديق", "المدرسة"],
53
+ },
54
+ {
55
+ "id": "CONC-4",
56
+ "text": "القاهرة عاصمة مصر",
57
+ "description": "Correct text (should return ~0 suggestions)",
58
+ "expected_contains": "القاهرة",
59
+ "must_not_contain_from_others": ["الحديق", "المدرسة", "الرياضي"],
60
+ },
61
+ {
62
+ "id": "CONC-5",
63
+ "text": "هذة المدينه جميله جدا ومناخها معتدل",
64
+ "description": "Text with mixed errors (هذة→هذه, ه→ة)",
65
+ "expected_contains": "المدين",
66
+ "must_not_contain_from_others": ["المدرسة", "القاهرة", "الرياضي"],
67
+ },
68
+ ]
69
+
70
+
71
+ def test_concurrent():
72
+ print("=" * 70)
73
+ print("PHASE 9 — Concurrency Re-verification")
74
+ print("=" * 70)
75
+
76
+ # Fire all 5 requests simultaneously
77
+ results = {}
78
+ print(f"\nSending {len(CONCURRENT_INPUTS)} requests simultaneously...")
79
+
80
+ with ThreadPoolExecutor(max_workers=5) as executor:
81
+ futures = {}
82
+ for inp in CONCURRENT_INPUTS:
83
+ future = executor.submit(api_call, "/api/analyze", inp["text"])
84
+ futures[future] = inp
85
+
86
+ for future in as_completed(futures):
87
+ inp = futures[future]
88
+ result = future.result()
89
+ results[inp["id"]] = {
90
+ "input": inp,
91
+ "response": result,
92
+ }
93
+ print(f" {inp['id']} completed ({result.get('_elapsed_ms', '?')}ms)")
94
+
95
+ # Verify each response corresponds to its own input
96
+ print("\n--- Verification ---")
97
+ all_pass = True
98
+ contamination_found = False
99
+
100
+ for test_id, data in sorted(results.items()):
101
+ inp = data["input"]
102
+ resp = data["response"]
103
+
104
+ if "error" in resp and "status" not in resp:
105
+ print(f"\n ⚠ {test_id}: ERROR — {resp['error']}")
106
+ continue
107
+
108
+ corrected = resp.get("corrected", "")
109
+ original = resp.get("original", "")
110
+ suggestions = resp.get("suggestions", [])
111
+
112
+ print(f"\n {test_id}: {inp['description']}")
113
+ print(f" Input: '{inp['text'][:60]}...'")
114
+ print(f" Original: '{original[:60]}...'")
115
+ print(f" Corrected: '{corrected[:60]}...'")
116
+ print(f" Suggestions: {len(suggestions)}")
117
+
118
+ # Check 1: original field should match our input
119
+ if original != inp["text"]:
120
+ print(f" ❌ FAIL: original != input! (cross-contamination?)")
121
+ contamination_found = True
122
+ all_pass = False
123
+ else:
124
+ print(f" ✓ original matches input")
125
+
126
+ # Check 2: corrected should contain expected content
127
+ if inp["expected_contains"] in corrected:
128
+ print(f" ✓ corrected contains '{inp['expected_contains']}'")
129
+ else:
130
+ print(f" ⚠ corrected missing '{inp['expected_contains']}'")
131
+
132
+ # Check 3: corrected must NOT contain content from other inputs
133
+ for foreign in inp["must_not_contain_from_others"]:
134
+ if foreign in corrected:
135
+ print(f" ❌ CONTAMINATION: corrected contains '{foreign}' from another input!")
136
+ contamination_found = True
137
+ all_pass = False
138
+
139
+ # Check 4: suggestions should reference text in our input
140
+ for s in suggestions:
141
+ s_orig = s.get("original", "")
142
+ s_start = s.get("start", 0)
143
+ s_end = s.get("end", 0)
144
+ # The suggestion's original text should be a substring of our input
145
+ if s_orig and s_orig not in inp["text"]:
146
+ # Check if it's a substring match (punc may include partial words)
147
+ input_slice = inp["text"][s_start:s_end]
148
+ if s_orig != input_slice:
149
+ print(f" ⚠ Suggestion '{s_orig}' [{s_start}:{s_end}] not in input")
150
+
151
+ print("\n" + "=" * 50)
152
+ if contamination_found:
153
+ print("🚨 P0: CROSS-CONTAMINATION DETECTED!")
154
+ print(" PipelineContext state is leaking between requests.")
155
+ print(" STOP ALL OTHER WORK AND FIX THIS FIRST.")
156
+ elif all_pass:
157
+ print("✅ ALL PASSED — No cross-contamination detected.")
158
+ else:
159
+ print("⚠ Some checks failed but no cross-contamination.")
160
+
161
+ return {
162
+ "test_count": len(CONCURRENT_INPUTS),
163
+ "all_pass": all_pass,
164
+ "contamination_found": contamination_found,
165
+ "results": {k: {"corrected": v["response"].get("corrected", ""),
166
+ "suggestions_count": len(v["response"].get("suggestions", []))}
167
+ for k, v in results.items()},
168
+ }
169
+
170
+
171
+ if __name__ == "__main__":
172
+ result = test_concurrent()
173
+ output_path = os.path.join(os.path.dirname(__file__), 'phase9_results.json')
174
+ with open(output_path, 'w', encoding='utf-8') as f:
175
+ json.dump(result, f, ensure_ascii=False, indent=2)
176
+ print(f"\nResults saved to {output_path}")
trace_output.txt ADDED
File without changes
trace_punc.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os, re
2
+ sys.path.insert(0, 'src')
3
+ import logging; logging.basicConfig(level=logging.INFO)
4
+ print("Starting...")
5
+
6
+ import torch
7
+ print(f"CUDA available: {torch.cuda.is_available()}")
8
+
9
+ from transformers import EncoderDecoderModel, AutoTokenizer
10
+ print("Loading PuncAra-v1...")
11
+ model = EncoderDecoderModel.from_pretrained("bayan10/PuncAra-v1")
12
+ tokenizer = AutoTokenizer.from_pretrained("bayan10/PuncAra-v1")
13
+ model.eval()
14
+ print("Model loaded!")
15
+
16
+ inp = "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب"
17
+ print(f"\nINPUT: {inp}")
18
+
19
+ # Raw inference
20
+ from nlp.punctuation.punctuation_rules import arabic_preprocessing
21
+ processed = arabic_preprocessing(inp)
22
+ inputs = tokenizer(processed, return_tensors="pt", padding=True, truncation=True, max_length=128)
23
+ print("Running inference...")
24
+ with torch.no_grad():
25
+ outputs = model.generate(
26
+ inputs.input_ids,
27
+ attention_mask=inputs.attention_mask,
28
+ decoder_start_token_id=tokenizer.cls_token_id,
29
+ bos_token_id=tokenizer.cls_token_id,
30
+ eos_token_id=tokenizer.sep_token_id,
31
+ pad_token_id=tokenizer.pad_token_id,
32
+ max_length=128, num_beams=3, repetition_penalty=1.2,
33
+ length_penalty=1.0, early_stopping=True, do_sample=False
34
+ )
35
+ raw = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+ print(f"[A] RAW MODEL: {raw}")
37
+
38
+ # Strip non-punc
39
+ from nlp.punctuation.punctuation_service import PunctuationChecker
40
+ checker = PunctuationChecker(model, tokenizer, torch.device('cpu'))
41
+ stripped = checker._strip_non_punctuation_changes(inp, raw)
42
+ print(f"[B] STRIPPED: {stripped}")
43
+ if stripped != raw:
44
+ rw, sw = raw.split(), stripped.split()
45
+ for w1, w2 in zip(rw, sw):
46
+ if w1 != w2:
47
+ print(f" LOST: '{w1}' -> '{w2}'")
48
+
49
+ # Postprocess
50
+ from nlp.punctuation.punctuation_rules import arabic_postprocessing
51
+ final = arabic_postprocessing(stripped)
52
+ print(f"[C] FINAL: {final}")
53
+
54
+ # Diffs
55
+ from app import get_word_diffs
56
+ from nlp.punctuation.punctuation_rules import validate_punctuation_diff
57
+ if final != inp:
58
+ diffs = get_word_diffs(inp, final)
59
+ print(f"[D] DIFFS ({len(diffs)}):")
60
+ for d in diffs:
61
+ o, c = d.get('original',''), d.get('correction','')
62
+ valid = validate_punctuation_diff(d)
63
+ oa = re.sub(r'[^\u0600-\u06FFa-zA-Z]','',o)
64
+ ca = re.sub(r'[^\u0600-\u06FFa-zA-Z]','',c)
65
+ alpha_ok = oa == ca
66
+ s = "PASS" if valid and alpha_ok else "BLOCKED"
67
+ r = ""
68
+ if not valid: r += " safety"
69
+ if not alpha_ok: r += " alpha"
70
+ print(f" [{d['start']}:{d['end']}] '{o}' -> '{c}' [{s}{r}]")
71
+ else:
72
+ print("[D] NO DIFFS!")
73
+ print("\nDONE")
trace_punctuation.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BAYAN Punctuation Trace — Diagnose where punctuation marks get lost.
3
+
4
+ Compares:
5
+ A) Raw PuncAra model output (no pipeline)
6
+ B) After _strip_non_punctuation_changes (Fix P1)
7
+ C) After get_word_diffs (diff algorithm)
8
+ D) After StageLocker check
9
+ E) After validate_punctuation_diff (safety layer)
10
+ F) After overlap resolver + patch cap
11
+ """
12
+
13
+ import sys, os, re, difflib
14
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
15
+
16
+ # Suppress model loading noise
17
+ import logging
18
+ logging.basicConfig(level=logging.WARNING)
19
+
20
+ # ─── Test Sentences ─────────────────────────────────────────────
21
+ TEST_SENTENCES = [
22
+ {
23
+ "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب",
24
+ "expected": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة؛ رغبة في بناء كتلة عضلية قوية، ويا له من التزام حديدي يثير الإعجاب!",
25
+ },
26
+ {
27
+ "input": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحداهن وبدأت تبكي بشدة",
28
+ "expected": "كانت الفتيات يلعبن في الحديقة، وفجأة سقطت إحداهن وبدأت تبكي بشدة.",
29
+ },
30
+ {
31
+ "input": "إن الذكاء الاصطناعي يلعب دورا هاما لذلك يجب الاهتمام به",
32
+ "expected": "إن الذكاء الاصطناعي يلعب دورا هاما؛ لذلك يجب الاهتمام به.",
33
+ },
34
+ {
35
+ "input": "هل تعلم أن القاهرة هي عاصمة مصر وتقع على ضفاف نهر النيل",
36
+ "expected": "هل تعلم أن القاهرة هي عاصمة مصر، وتقع على ضفاف نهر النيل؟",
37
+ },
38
+ {
39
+ "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب",
40
+ "expected": "قال المعلم للطلاب: ادرسوا جيدا، فالامتحان قريب.",
41
+ },
42
+ ]
43
+
44
+ def count_punct(text):
45
+ """Count punctuation marks in text."""
46
+ marks = set('.,;:!?،؛؟')
47
+ return sum(1 for c in text if c in marks)
48
+
49
+ def diff_punct(before, after):
50
+ """Show what punctuation marks were added/removed."""
51
+ marks = set('.,;:!?،؛؟')
52
+ before_marks = [(i, c) for i, c in enumerate(before) if c in marks]
53
+ after_marks = [(i, c) for i, c in enumerate(after) if c in marks]
54
+ return before_marks, after_marks
55
+
56
+ def main():
57
+ print("=" * 80)
58
+ print("BAYAN PUNCTUATION TRACE — Where do punctuation marks get lost?")
59
+ print("=" * 80)
60
+
61
+ # Load model
62
+ print("\n[1/2] Loading PuncAra-v1 model...")
63
+ from nlp.punctuation.punctuation_service import get_punctuation_model, PunctuationChecker
64
+ punc_checker = get_punctuation_model()
65
+ print(" ✓ Model loaded\n")
66
+
67
+ # Load pipeline tools
68
+ print("[2/2] Loading pipeline tools...")
69
+ from app import get_word_diffs
70
+ from nlp.punctuation.punctuation_rules import validate_punctuation_diff
71
+ print(" ✓ Tools loaded\n")
72
+
73
+ for idx, test in enumerate(TEST_SENTENCES):
74
+ inp = test["input"]
75
+ expected = test["expected"]
76
+
77
+ print("─" * 80)
78
+ print(f"TEST {idx+1}")
79
+ print(f" INPUT: {inp}")
80
+ print(f" EXPECTED: {expected}")
81
+ print(f" Expected marks: {count_punct(expected)}")
82
+ print()
83
+
84
+ # ─── Stage A: Raw model output (no post-processing) ────────
85
+ raw_output = punc_checker._fix_punctuation(inp)
86
+ print(f" [A] RAW MODEL: {raw_output}")
87
+ print(f" Marks added: {count_punct(raw_output) - count_punct(inp)}")
88
+ print()
89
+
90
+ # ─── Stage B: After _strip_non_punctuation_changes ─────────
91
+ stripped = punc_checker._strip_non_punctuation_changes(inp, raw_output)
92
+ print(f" [B] STRIP NON-PUNC: {stripped}")
93
+ if stripped != raw_output:
94
+ print(f" ⚠ Changes stripped! Diff from raw:")
95
+ for w1, w2 in zip(raw_output.split(), stripped.split()):
96
+ if w1 != w2:
97
+ print(f" '{w1}' → '{w2}'")
98
+ print(f" Marks added: {count_punct(stripped) - count_punct(inp)}")
99
+ print()
100
+
101
+ # ─── Stage C: get_word_diffs ───────────────────────────────
102
+ # This is what correct() returns after postprocessing
103
+ from nlp.punctuation.punctuation_rules import arabic_postprocessing
104
+ final_punc = arabic_postprocessing(stripped)
105
+
106
+ print(f" [C] FINAL PUNC OUT: {final_punc}")
107
+ print(f" Marks added: {count_punct(final_punc) - count_punct(inp)}")
108
+ print()
109
+
110
+ # ─── Stage D: Word diffs ──────────────────────────────────
111
+ if final_punc != inp:
112
+ diffs = get_word_diffs(inp, final_punc)
113
+ print(f" [D] WORD DIFFS ({len(diffs)} found):")
114
+ for d in diffs:
115
+ orig = d.get('original', '')
116
+ corr = d.get('correction', '')
117
+
118
+ # Check validate_punctuation_diff
119
+ is_valid = validate_punctuation_diff(d)
120
+
121
+ # Check alpha match (lock bypass)
122
+ orig_alpha = re.sub(r'[^\u0600-\u06FFa-zA-Z]', '', orig)
123
+ corr_alpha = re.sub(r'[^\u0600-\u06FFa-zA-Z]', '', corr)
124
+ alpha_match = orig_alpha == corr_alpha
125
+
126
+ status_parts = []
127
+ if not is_valid:
128
+ status_parts.append("❌ SAFETY-REJECTED")
129
+ if not alpha_match:
130
+ status_parts.append("❌ LOCK-BLOCKED (alpha differs)")
131
+ if is_valid and alpha_match:
132
+ status_parts.append("✅ WOULD PASS")
133
+ elif is_valid:
134
+ status_parts.append("✅ valid-punc")
135
+
136
+ status = " | ".join(status_parts)
137
+ print(f" [{d['start']}:{d['end']}] '{orig}' → '{corr}' {status}")
138
+ else:
139
+ print(f" [D] NO DIFFS — model returned same text as input!")
140
+
141
+ print()
142
+
143
+ # ─── Summary ───────────────────────────────────────────────────
144
+ print("=" * 80)
145
+ print("LOSS POINTS SUMMARY")
146
+ print("=" * 80)
147
+ print("""
148
+ Where punctuation marks can be lost:
149
+
150
+ [A→B] _strip_non_punctuation_changes():
151
+ If model changes a word's spelling AND adds punctuation,
152
+ the punctuation transfer logic may fail.
153
+
154
+ [B→C] arabic_postprocessing():
155
+ Typographic cleanup may remove valid marks.
156
+
157
+ [C→D] get_word_diffs():
158
+ Word-level diff may merge/split changes incorrectly.
159
+
160
+ [D→E] StageLocker:
161
+ Locked ranges from spelling/grammar block nearby punctuation.
162
+ (Now relaxed: pure-punc changes pass through)
163
+
164
+ [D→E] validate_punctuation_diff():
165
+ Safety layer rejects diffs that change Arabic text.
166
+
167
+ [E→F] Overlap resolver:
168
+ Grammar/spelling patches take priority over punctuation.
169
+
170
+ [E→F] Patch cap:
171
+ Max 3 punctuation patches per response.
172
+ """)
173
+
174
+
175
+ if __name__ == "__main__":
176
+ main()