Spaces:

bayan10
/

bayan-api

Running

App Files Files Community

youssefreda9 commited on 3 days ago

Commit

fe1e225

0 Parent(s):

HF Deploy: Fix syntax error with smart quotes in popup.js

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.github/workflows/deploy.yml +85 -0
.gitignore +0 -0
1.png +3 -0
BAYAN_COMPLETE_AUDIT.md +366 -0
Dockerfile +95 -0
LICENSE +21 -0
LOGOS/icon128.png +3 -0
LOGOS/icon16.png +3 -0
LOGOS/icon32.png +3 -0
LOGOS/icon48.png +3 -0
PROJECT_DESCRIPTION.md +217 -0
Procfile +1 -0
QUICKSTART.md +126 -0
README.md +41 -0
README_HF.md +8 -0
README_SETUP.md +172 -0
READMEquran.md +64 -0
add_divider.py +19 -0
add_extension_theme_toggle.py +124 -0
analyze_failures.py +31 -0
apply_locks.py +77 -0
archive/legacy_scripts/AraSpell.py +2224 -0
archive/legacy_scripts/Grammer_Rules.py +179 -0
archive/legacy_scripts/PuncAra.py +180 -0
archive/legacy_scripts/gradio Spelling.py +210 -0
archive/legacy_scripts/punctuation_rulesV2.py +257 -0
archive/old_tests/deep_dive_expanded.json +1323 -0
archive/old_tests/deep_dive_expanded.py +428 -0
archive/old_tests/deep_dive_gaps.json +260 -0
archive/old_tests/deep_dive_gaps.py +295 -0
archive/old_tests/deep_dive_output.json +671 -0
archive/old_tests/deep_dive_test.py +519 -0
archive/old_tests/gap_filling_results.json +261 -0
archive/old_tests/gap_filling_tests.py +522 -0
archive/old_tests/phase0_investigation.py +221 -0
archive/old_tests/phase0_results.json +75 -0
archive/old_tests/phase10_helpers/audit_output.txt +339 -0
archive/old_tests/phase10_helpers/audit_script.py +71 -0
archive/old_tests/phase10_helpers/extract_entity_results.py +16 -0
archive/old_tests/phase10_helpers/fetch_hf_logs.py +89 -0
archive/old_tests/phase10_helpers/generate_audit_md.py +165 -0
archive/old_tests/phase10_helpers/generate_collision_dataset.py +48 -0
archive/old_tests/phase10_helpers/generate_regression_audit.py +231 -0
archive/old_tests/phase10_helpers/show_samples.py +39 -0
archive/old_tests/phase5_investigation.py +161 -0
archive/old_tests/phase5_results.json +61 -0
archive/old_tests/phase8_adversarial_audit.py +678 -0
archive/old_tests/phase9_results.json +0 -0
archive/old_tests/phase9_validation.py +811 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.db filter=lfs diff=lfs merge=lfs -text
2	+ quran_master.db filter=lfs diff=lfs merge=lfs -text

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,85 @@

+name: Bayan CI/CD
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  validate:
+    name: Validate Code
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - name: Check Python syntax (all .py files)
+        run: |
+          echo "Checking Python syntax..."
+          find . -name "*.py" -not -path "./.git/*" -not -path "./archive/*" | while read f; do
+            python -m py_compile "$f" 2>&1 && echo "  ✅ $f" || { echo "  ❌ $f"; exit 1; }
+          done
+          echo "✅ All Python files have valid syntax"
+      - name: Verify critical files exist
+        run: |
+          for f in src/app.py src/model_loader.py src/hf_inference.py src/index.html \
+                   src/nlp/__init__.py src/nlp/spelling/araspell_service.py \
+                   src/nlp/grammar/grammar_service.py src/nlp/punctuation/punctuation_service.py \
+                   Dockerfile Procfile requirements.txt; do
+            test -f "$f" && echo "  ✅ $f" || { echo "  ❌ MISSING: $f"; exit 1; }
+          done
+          echo "✅ All critical files present"
+      - name: Verify API routes defined in app.py
+        run: |
+          for route in "/api/health" "/api/analyze" "/api/summarize" "/api/spelling" \
+                       "/api/grammar" "/api/punctuation" "/api/quran"; do
+            grep -q "$route" src/app.py && echo "  ✅ $route" || { echo "  ❌ MISSING ROUTE: $route"; exit 1; }
+          done
+          echo "✅ All API routes defined"
+      - name: Validate Supabase meta tags in index.html
+        run: |
+          grep -q 'supabase-url' src/index.html && echo "  ✅ supabase-url meta tag" || exit 1
+          grep -q 'supabase-anon-key' src/index.html && echo "  ✅ supabase-anon-key meta tag" || exit 1
+          echo "✅ Supabase tags present"
+      - name: Validate Dockerfile
+        run: |
+          grep -q 'EXPOSE' Dockerfile && echo "  ✅ EXPOSE directive" || exit 1
+          grep -q 'gunicorn\|CMD' Dockerfile && echo "  ✅ Startup command" || exit 1
+          echo "✅ Dockerfile valid"
+  health-check:
+    name: Post-Deploy Health Check
+    needs: validate
+    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Wait for HuggingFace Spaces to deploy
+        run: sleep 120
+      - name: Check backend health
+        run: |
+          HEALTH_URL="${{ secrets.BACKEND_URL }}/api/health"
+          if [ -z "${{ secrets.BACKEND_URL }}" ]; then
+            HEALTH_URL="https://bayan10-bayan-api.hf.space/api/health"
+          fi
+          echo "Checking: $HEALTH_URL"
+          response=$(curl -s -w "\n%{http_code}" "$HEALTH_URL")
+          http_code=$(echo "$response" | tail -n1)
+          body=$(echo "$response" | head -n -1)
+          echo "Status: $http_code"
+          echo "Body: $body"
+          if [ "$http_code" = "200" ] || [ "$http_code" = "503" ]; then
+            echo "✅ Backend is responding"
+          else
+            echo "❌ Backend health check failed"
+            exit 1
+          fi

.gitignore ADDED Viewed

Binary file (556 Bytes). View file

1.png ADDED Viewed

Git LFS Details

SHA256: ac95bbea5577ea3ec66e96a64311220b40201ed0e17e1a084aea51f1d2b16336
Pointer size: 131 Bytes
Size of remote file: 695 kB

BAYAN_COMPLETE_AUDIT.md ADDED Viewed

	@@ -0,0 +1,366 @@

+# BAYAN — Complete Product, Codebase & Extension Deep Audit
+> **Audit Date:** 2026-06-26
+> **Auditor Perspective:** Product Manager + Senior Frontend + Backend Architect + Extension Engineer + SaaS Reviewer
+---
+## 1. Current System Overview
+### Architecture Map
+```
+┌──────────────────────────────────────────────────────┐
+│                   BAYAN ECOSYSTEM                     │
+│                                                       │
+│  ┌─────────┐    ┌──────────┐    ┌─────────────────┐  │
+│  │ Website  │───▶│ Flask API │───▶│  NLP Pipeline   │  │
+│  │ (SPA)    │    │ (app.py) │    │ Spell/Gram/Punct│  │
+│  └─────────┘    └──────────┘    └─────────────────┘  │
+│       │              │                    │           │
+│       │              │          ┌─────────────────┐  │
+│       │              ├─────────▶│  HF Models      │  │
+│       │              │          │  Summarization   │  │
+│       │              │          │  Grammar (Gradio)│  │
+│       │              │          └─────────────────┘  │
+│       │              │                               │
+│  ┌─────────┐    ┌──────────┐    ┌─────────────────┐  │
+│  │Supabase │◀───│  Auth    │───▶│  Documents DB   │  │
+│  │ (Cloud) │    │  Module  │    │  Settings Sync  │  │
+│  └─────────┘    └──────────┘    └─────────────────┘  │
+│                                                       │
+│  ┌────────────────────────────────────────────────┐   │
+│  │           Chrome Extension (MV3)               │   │
+│  │  ┌──────────┐ ┌──────────┐ ┌───────────────┐  │   │
+│  │  │ Content  │ │Background│ │  Side Panel   │  │   │
+│  │  │ Script   │ │  Worker  │ │  + Popup      │  │   │
+│  │  └──────────┘ └──────────┘ └───────────────┘  │   │
+│  └────────────────────────────────────────────────┘   │
+└──────────────────────────────────────────────────────┘
+```
+### Technology Stack
+| Layer | Technology | Notes |
+|-------|-----------|-------|
+| **Frontend** | Vanilla JS, HTML, CSS (Tailwind CDN) | Custom `contenteditable` editor engine |
+| **Backend** | Flask (Python) | Single monolith `app.py` — 2,844 lines |
+| **NLP Pipeline** | Custom Python modules | Spelling, Grammar, Punctuation, Autocomplete, Dialect |
+| **AI Models** | Transformer-based | Summarization (local), Grammar (Gradio proxy), Spelling (CAMeL + custom) |
+| **Database** | Supabase (PostgreSQL) | Documents, profiles, user settings |
+| **Auth** | Supabase Auth | Guest (anonymous), Google OAuth |
+| **Deployment** | HuggingFace Spaces (Docker) | CPU-only free tier |
+| **Extension** | Chrome MV3 | Background SW, Content Script, Side Panel, Popup |
+### File Structure Summary
+| Directory | Files | Purpose |
+|-----------|-------|---------|
+| `src/` | 6 core files | Backend + HTML + CSS |
+| `src/js/` | 8 JS files + 7 subdirs | Frontend logic |
+| `src/js/auth/` | 5 files | Supabase auth (client, session, UI) |
+| `src/js/documents/` | 4 files | Local doc management + export |
+| `src/js/documents-cloud/` | 3 files | Supabase CRUD for documents |
+| `src/js/sync/` | 3 files | Offline queue + conflict resolution |
+| `src/js/settings-sync/` | 2 files | User settings cloud persistence |
+| `src/nlp/` | 6 subdirs | All NLP processing modules |
+| `extension/` | 8 files + 4 subdirs | Chrome Extension |
+| `extension/shared/` | 9 files | Shared utilities (api, renderer, patches) |
+| `extension/sidepanel/` | 3 files | Side panel UI |
+| `tests/` | 16 test files | Backend unit tests |
+| `extension/tests/` | 8 files | Extension integration tests |
+---
+## 2. Feature Inventory
+### Core AI Features
+| Feature | Backend API | Website Frontend | Extension | Files |
+|---------|------------|-----------------|-----------|-------|
+| **Spelling Correction** | ✅ `/api/spelling` + `/api/analyze` | ✅ Full (highlights, suggestions, apply) | ✅ Inline overlay + Popup + SidePanel | `nlp/spelling/`, `editor.js`, `renderer.js` |
+| **Grammar Correction** | ✅ `/api/grammar` + `/api/analyze` | ✅ Full (via Gradio proxy to HF model) | ✅ Inline overlay + Popup + SidePanel | `nlp/grammar/`, `hf_inference.py` |
+| **Punctuation** | ✅ `/api/punctuation` + `/api/analyze` | ✅ Full (PuncAra-v1 model) | ✅ Inline overlay + Popup + SidePanel | `nlp/punctuation/` |
+| **Summarization** | ✅ `/api/summarize` | ✅ Full (tab in editor, length control) | ✅ Popup tab + SidePanel tab | `model_loader.py`, `summaries-api.js` |
+| **AutoComplete** | ✅ `/api/autocomplete` | ✅ Ghost text + dropdown in editor | ⚠️ SidePanel text-box only, NO inline ghost text | `autocomplete.js`, sidepanel `btnAutocomplete` |
+| **Dialect→MSA** | ✅ `/api/dialect` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `nlp/dialect/` |
+| **Quran Verification** | ✅ `/api/quran` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `quran.py`, `quran_master.db` |
+### Platform Features
+| Feature | Website | Extension (Popup) | Extension (SidePanel) | Extension (Content Script) |
+|---------|---------|-------------------|----------------------|--------------------------|
+| **Authentication** | ✅ Guest + Google | ❌ None | ⚠️ Partial (`initExtensionAuth()` exists but requires web page auth sync) | ⚠️ Listens for `BAYAN_AUTH_SYNC` message from web |
+| **Document Save** | ✅ Supabase CRUD | ❌ None | ⚠️ UI exists (`btnNewDocument`, `btnSaveSelection`) but depends on auth | ❌ None |
+| **Document Load/History** | ✅ Full panel | ❌ None | ⚠️ UI exists (`documentsList`, `historyList`) but depends on auth | ❌ None |
+| **Export (PDF/DOCX/TXT)** | ✅ Full (mammoth.js, docx.js) | ❌ None | ❌ None | ❌ None |
+| **Import (TXT/DOCX)** | ✅ Full | ❌ None | ❌ None | ❌ None |
+| **Settings Sync** | ✅ Supabase | ❌ None | ⚠️ Placeholder (`syncExtensionSettings()`) | ❌ None |
+| **Theme Toggle** | ✅ Full dark/light | ❌ Hardcoded dark | ✅ Dark only | N/A |
+| **Focus Mode** | ✅ Full | N/A | ❌ None | N/A |
+| **Score Ring** | ✅ Animated SVG | ✅ Simplified | ✅ Simplified | ❌ None |
+| **Writing Score History** | ✅ Sparkline chart | ❌ None | ❌ None | ❌ None |
+| **Error Donut Chart** | ✅ SVG donut | ❌ None | ❌ None | ❌ None |
+| **Offline Mode** | ✅ Graceful degradation | ❌ No offline handling | ❌ No offline handling | ❌ No offline handling |
+| **Keyboard Shortcuts** | ✅ Extensive (Alt+1-3, Ctrl+S, etc.) | ❌ None | ❌ None | ❌ None |
+---
+## 3. Website vs Extension Comparison
+### Authentication Flow
+| Aspect | Website | Extension | Gap |
+|--------|---------|-----------|-----|
+| Guest login | ✅ `signInAnonymously()` | ❌ | **Critical** — extension users can't persist anything |
+| Google OAuth | ✅ `signInWithOAuth()` | ❌ | **High** |
+| Session restore | ✅ `restoreSession()` via Supabase | ❌ | **High** |
+| Auth state sync | ✅ `onAuthStateChange()` | ⚠️ Listens for `BAYAN_AUTH_SYNC` postMessage but only works when user visits Bayan website with extension installed | **High** — unreliable |
+| Auth-gated features | ✅ Documents, sync, settings | ⚠️ UI elements exist but non-functional without auth | **High** |
+### AI Feature Comparison
+| Feature | Website UX | Extension UX | Parity? |
+|---------|-----------|-------------|---------|
+| Analyze (S+G+P) | Rich editor with inline highlights, suggestion sidebar, popover tooltip, apply/dismiss per-suggestion | **Content Script:** Overlay marks + tooltip. **Popup/SidePanel:** Textarea + suggestion cards | ⚠️ Functional but UX gap |
+| Summarize | Editor tab with radio buttons (short/medium/long) | Popup/SidePanel textarea with radio buttons | ✅ Near parity |
+| AutoComplete | **Ghost text** inside editor (Tab to accept) | SidePanel has a text box with "إكمال" button but NO inline ghost text on 3rd party sites | **Medium** — missing the core UX |
+| Dialect | Dedicated editor tab with "Convert" button | SidePanel tab with text box and "Convert" button | ✅ Near parity |
+| Quran | Dedicated editor tab with search | SidePanel tab with text box and search | ✅ Near parity |
+### Documents
+| Aspect | Website | Extension | Gap |
+|--------|---------|-----------|-----|
+| Create document | ✅ `createDocument()` | ⚠️ Button exists in SidePanel but blocked by no auth | **High** |
+| List documents | ✅ Desktop sidebar panel | ⚠️ `documentsList` in SidePanel workspace tab, blocked by no auth | **High** |
+| Save/auto-save | ✅ Debounced sync via `SyncManager` | ❌ | **High** |
+| Export PDF/DOCX | ✅ `export.js` | ❌ | **Medium** |
+| Import | ✅ `import.js` (TXT, DOCX) | ❌ | **Low** |
+---
+## 4. Missing Features
+### Critical (Blocks Production)
+| # | Issue | Impact | Solution |
+|---|-------|--------|----------|
+| C1 | **`.env` file committed to Git** | Supabase URL and anon key are in the repo. While anon key is safe for client use, this is a security anti-pattern and may expose the project URL. | Remove `.env` from Git history, use HF Spaces secrets exclusively. `.gitignore` has `.env` but it was committed before the rule was added. |
+| C2 | **CORS wildcard `origins: "*"`** | Any website can call `/api/analyze`, `/api/summarize`, etc. directly. Abusers can drain compute. | Restrict CORS to `bayan10-bayan-api.hf.space` + extension origin `chrome-extension://<id>`. |
+| C3 | **No rate limiting on API** | No throttle on any endpoint. A single user can overwhelm the free-tier HF Space. | Add Flask-Limiter or simple in-memory token bucket. |
+### High (Important Feature Gap)
+| # | Issue | Impact | Solution |
+|---|-------|--------|----------|
+| H1 | Extension has no auth | Users cannot access cloud docs, settings, or history from extension | Implement Supabase auth in extension via `chrome.identity` or shared session from Bayan website |
+| H2 | Extension content script lacks AutoComplete ghost text | The flagship "ghost text" feature doesn't work on 3rd-party sites | Port `autocomplete.js` logic into `content-inline.js` with `/api/autocomplete` calls |
+| H3 | Extension popup/sidepanel have no export | Users cannot export corrected text as PDF/DOCX | Add "Copy as formatted text" or lightweight export |
+| H4 | No `documents` table migration | `supabase/migrations/001_profiles.sql` exists but no migration creates the `documents` table that `documents-api.js` uses | Create `002_documents.sql` migration |
+| H5 | Backend monolith: `app.py` is 2,844 lines | Extremely difficult to maintain, test, or extend | Split into `routes/`, `services/`, `middleware/` modules |
+### Medium (Improvement Needed)
+| # | Issue | Impact | Solution |
+|---|-------|--------|----------|
+| M1 | `src/js/api.js` uses ES module `export` syntax but is loaded via `<script>` tag (not `type="module"`) | The `api.js` exports are **never importable** — the website uses inline `fetch()` calls instead | Either convert to `type="module"` or remove the dead `export` statements |
+| M2 | Extension content script overlay doesn't handle `<iframe>` editors | Rich text editors in iframes (e.g., WordPress Gutenberg, TinyMCE) are invisible to the content script | Use `all_frames: true` in manifest or detect iframe editors |
+| M3 | Duplicated suggestion rendering logic | `ui.js` (website) and `bayan-ui.js` (extension) implement the same card HTML generation | Extract to shared package |
+| M4 | Extension `popup.js` (498 lines) and `sidepanel.js` (702 lines) share ~60% identical code | Maintenance nightmare — fixing a bug requires changes in 2+ files | Refactor into shared modules with UI-specific wrappers |
+| M5 | Grammar model uses Gradio proxy with SSE streaming | Creates a hard dependency on external `mohammedahmedezz2004-bayan-arabic-grammarly-correction.hf.space`. If that Space goes down, grammar breaks. | Host the grammar model directly on the Bayan Space, or add fallback |
+| M6 | No i18n framework on website | All strings are hardcoded in Arabic HTML. Adding English support requires rewriting HTML | Add simple i18n JSON loader (extension already has `_locales/ar/`) |
+### Low (Nice to Have)
+| # | Issue | Impact | Solution |
+|---|-------|--------|----------|
+| L1 | Extension only has Arabic locale | Cannot be published on Chrome Web Store for non-Arabic users | Add `_locales/en/messages.json` |
+| L2 | No analytics or telemetry | No visibility into usage patterns, error rates, or feature adoption | Add lightweight event tracking (privacy-respecting) |
+| L3 | Heavy vendor libraries loaded synchronously | `mammoth.browser.min.js`, `docx.umd.js`, `html2canvas.min.js` block initial render | Lazy-load on first export action |
+| L4 | No service worker for website | No offline caching for the web app | Add basic SW for static assets |
+---
+## 5. Bugs Found
+| # | Bug | Severity | Location | Status |
+|---|-----|----------|----------|--------|
+| B1 | `ENABLE_AUTOCOMPLETE_MODEL = False` in `app.py:62` | Medium | `app.py` line 62 | AutoComplete model disabled by default — `/api/autocomplete` still works via lazy-loading, but the flag is misleading |
+| B2 | `src/js/api.js` uses `export` keyword but is not loaded as ES module | Low | `api.js` | Dead code — never actually imported anywhere |
+| B3 | Extension `bayan-api.js` missing functions `bayanAutocomplete`, `bayanDialect`, `bayanQuran` | High | `bayan-api.js` only defines `bayanAnalyze`, `bayanSummarize`, `bayanHealthCheck` | SidePanel calls these undefined functions — will throw `ReferenceError` |
+| B4 | Extension content script overlay position breaks on page scroll (absolute vs fixed positioning) | Medium | `content-inline.js:191` | Overlay uses `window.scrollY` but doesn't update on window resize |
+| B5 | Score sparkline renders with only 2 data points creating a meaningless line | Low | `format.js` | ✅ Fixed (raised minimum to 3 points) |
+| B6 | `dismissAllFiltered()` only removed DOM elements without updating `window.currentSuggestions` | Medium | `format.js` | ✅ Fixed |
+---
+## 6. Security Issues
+| # | Issue | Severity | Details |
+|---|-------|----------|---------|
+| S1 | **`.env` committed to repo** | **Critical** | Supabase URL + anon key visible in Git history. While anon keys are designed for client-side use, the URL+key combo allows anyone to make Supabase API calls. |
+| S2 | **CORS `origins: "*"`** | **Critical** | `app.py:94` — allows any origin to call all API endpoints. Enables: (a) compute theft, (b) DDoS via free proxy, (c) third-party scraping. |
+| S3 | **No API authentication** | **High** | No JWT, API key, or session check on any endpoint. Extension uses only `host_permissions` scoping. |
+| S4 | **XSS risk in editor** | **Medium** | `setEditorHTML()` injects HTML directly into contenteditable. While `renderer.js` escapes text, any upstream bug in suggestion rendering could inject arbitrary HTML. |
+| S5 | **Supabase RLS incomplete** | **Medium** | Only `profiles` has RLS policies. The `documents` table (if exists) needs RLS to prevent cross-user data access. |
+| S6 | **Extension Trusted Types partial** | **Low** | `content-inline.js` implements `trustedTypes.createPolicy()` with identity transform (`input => input`), which passes the CSP check but provides no actual sanitization. |
+| S7 | **Debug endpoint exposed** | **Low** | `/api/debug-models` is accessible in production and leaks internal model status, memory usage, and startup errors. |
+---
+## 7. Performance Issues
+| # | Issue | Severity | Details |
+|---|-------|----------|---------|
+| P1 | **`app.py` is 2,844 lines** | High | Single-file monolith. Every request loads all imports. Cold start on HF Spaces free tier takes ~60s. |
+| P2 | **Vendor JS loaded synchronously** | Medium | `mammoth.browser.min.js` (340KB), `docx.umd.js` (1.2MB), `html2canvas.min.js` (210KB) all load on page start even if never used. |
+| P3 | **Extension content script injected on ALL sites** | Medium | `matches: ["https://*/*", "http://*/*"]` — runs on every page. The `BayanController` module loads even on sites where user never types Arabic. |
+| P4 | **No API response caching on website** | Medium | Every keystroke after debounce triggers a full `/api/analyze` call. Extension has background worker caching, but website doesn't. |
+| P5 | **Grammar Gradio SSE dependency** | Medium | Grammar correction requires streaming from external HF Space. Average latency: 3-8 seconds. Adds significant delay to the analysis pipeline. |
+| P6 | **Quran DB is 23MB** | Low | `quran_master.db` (SQLite, 23MB) is loaded into the Docker container. Fine for now, but limits scaling. |
+| P7 | **No CSS/JS minification** | Low | All assets served unminified. `components.css` alone is 4,125+ lines (~90KB). |
+---
+## 8. UX Problems
+| # | Issue | Severity | Details |
+|---|-------|----------|---------|
+| U1 | **Extension content script tooltip clips at viewport edge** | Medium | Tooltip for highlighted errors can overflow off-screen on narrow viewports. No boundary detection. |
+| U2 | **No loading skeleton on website** | Medium | Editor page shows blank white space during model initialization. No skeleton/shimmer to indicate loading. |
+| U3 | **Extension popup has no dialect/quran/autocomplete** | Medium | Only "تصحيح" and "تلخيص" tabs. SidePanel has all features, but popup is the first surface users see. |
+| U4 | **Inconsistent branding between popup and sidepanel** | Low | Popup uses `.bayan-*` class prefix, SidePanel uses `.sp-*` prefix. Different color palettes. |
+| U5 | **No onboarding flow** | Low | First-time users see an empty editor with no guidance. No tooltips, walkthrough, or sample text. |
+| U6 | **Mobile responsiveness incomplete** | Low | Website has responsive breakpoints but bottom-sheet for suggestions lacks smooth gestures. |
+---
+## 9. Technical Debt
+### Backend
+| Item | Severity | Details |
+|------|----------|---------|
+| **Monolith `app.py`** | High | 2,844 lines. Contains routes, NLP logic, model loading, diffing algorithms, offset mapping, pipeline orchestration, Quran search integration, and CORS — all in one file. |
+| **Duplicated directional blocks** | Medium | `_DIRECTIONAL_BLOCKS` in `app.py` duplicates logic that also exists in `araspell_rules.py`. |
+| **12+ test files at project root** | Low | `test_proof.py`, `test_sv.py`, `test_pc.py`, etc. scattered in root instead of `tests/`. |
+| **Dead code** | Low | `ENABLE_DIALECT_MODEL = False`, `ENABLE_AUTOCOMPLETE_MODEL = False` flags in `app.py` — no code path checks them for these features since they use lazy-loading. |
+| **Archive directory** | Low | `archive/legacy_scripts/` contains old code that shouldn't ship in Docker image. |
+### Frontend (Website)
+| Item | Severity | Details |
+|------|----------|---------|
+| **`api.js` dead exports** | Medium | `export async function analyzeText()` — never imported. Website uses inline `fetch()` in `editor.js`. |
+| **Tight coupling in `editor.js`** | Medium | DOM manipulation, API calls, suggestion management, and UI updates all in one 29KB file. |
+| **No build system** | Low | No bundler, no tree-shaking, no code-splitting. All JS loaded via `<script>` tags. |
+| **CSS structure** | Low | Single `components.css` at 4,125+ lines. No CSS modules, no scoping. |
+### Extension
+| Item | Severity | Details |
+|------|----------|---------|
+| **`popup.js` and `sidepanel.js` code duplication** | High | ~60% identical code: `updateCounts()`, `markStale()`, `setLoading()`, `updateScore()`, `renderSuggestions()`, `showToast()`. |
+| **Missing API functions in `bayan-api.js`** | High | SidePanel calls `bayanAutocomplete()`, `bayanDialect()`, `bayanQuran()` which are not defined in `bayan-api.js`. These must be defined elsewhere or will throw. |
+| **No TypeScript / JSDoc validation** | Low | All extension code is plain JS with no compile-time checking. |
+---
+## 10. Recommended Roadmap
+### Phase 1: Security Hardening ⚡ (Critical — Before Any Growth)
+**Timeline: 1-2 days**
+1. **Remove `.env` from Git history** — `git filter-branch` or BFG Repo Cleaner
+2. **Restrict CORS** — Change `origins: "*"` to allowlist `["https://bayan10-bayan-api.hf.space", "chrome-extension://<ext-id>"]`
+3. **Add rate limiting** — Flask-Limiter: 30 req/min per IP for `/api/analyze`, 10 req/min for `/api/summarize`
+4. **Disable debug endpoint in production** — Guard `/api/debug-models` behind `app.debug` flag
+5. **Add Supabase RLS for `documents` table** — `CREATE POLICY ... USING (auth.uid() = user_id)`
+### Phase 2: Extension Auth Unification 🔐 (High)
+**Timeline: 3-5 days**
+1. **Implement Supabase client in extension** — Add `@supabase/supabase-js` as UMD bundle in `shared/`
+2. **Auth flow**: Use `chrome.identity.launchWebAuthFlow()` for Google OAuth → receive tokens → init Supabase session
+3. **Session persistence**: Store refresh token in `chrome.storage.local`
+4. **Auth sync**: When user logs in on website, broadcast via `postMessage` → content script → `chrome.storage`
+5. **Result**: Extension users can access their documents, settings, and history
+### Phase 3: Extension Feature Parity 🔧 (High)
+**Timeline: 3-5 days**
+1. **Add missing API functions** to `bayan-api.js`: `bayanAutocomplete()`, `bayanDialect()`, `bayanQuran()`
+2. **Add autocomplete/dialect/quran tabs to popup** (currently SidePanel-only)
+3. **Inline ghost text for content script** — Port `autocomplete.js` logic for textareas on 3rd-party sites
+4. **Add basic export** — "Copy corrected text" button already exists; add "Download as TXT"
+### Phase 4: Backend Refactoring 🏗️ (Medium)
+**Timeline: 5-7 days**
+1. **Split `app.py`** into:
+   - `routes/analyze.py`, `routes/summarize.py`, `routes/dialect.py`, `routes/quran.py`
+   - `services/pipeline.py` (orchestration)
+   - `middleware/cors.py`, `middleware/rate_limit.py`
+2. **Create `002_documents.sql` migration** with proper RLS
+3. **Move root-level test files** into `tests/`
+4. **Remove `archive/` from Docker build** (add to `.dockerignore`)
+### Phase 5: Extension Code Quality 🧹 (Medium)
+**Timeline: 3-4 days**
+1. **Extract shared logic** from `popup.js` and `sidepanel.js` into `shared/bayan-core.js`
+2. **Add English locale** `_locales/en/messages.json`
+3. **Add `all_frames: true`** to manifest for iframe editor support
+4. **Add theme toggle** to popup and sidepanel
+### Phase 6: Performance & Polish ✨ (Low)
+**Timeline: 2-3 days**
+1. **Lazy-load vendor libs** (mammoth, docx, html2canvas) on first use
+2. **Add website-side API caching** (localStorage TTL cache like extension has)
+3. **Add CSS/JS minification** to Docker build
+4. **Add loading skeletons** for editor page
+5. **Add onboarding flow** — sample text + guided tooltips
+---
+## Summary Matrix
+| Category | Critical | High | Medium | Low | Total |
+|----------|---------|------|--------|-----|-------|
+| **Security** | 2 (S1, S2) | 1 (S3) | 2 (S4, S5) | 2 (S6, S7) | 7 |
+| **Missing Features** | 0 | 5 (H1-H5) | 6 (M1-M6) | 4 (L1-L4) | 15 |
+| **Bugs** | 0 | 1 (B3) | 2 (B1, B4) | 1 (B2) | 4 (+2 fixed) |
+| **Performance** | 0 | 1 (P1) | 4 (P2-P5) | 2 (P6, P7) | 7 |
+| **UX** | 0 | 0 | 3 (U1-U3) | 3 (U4-U6) | 6 |
+| **Tech Debt** | 0 | 3 | 5 | 5 | 13 |
+| **TOTAL** | **2** | **11** | **22** | **17** | **52** |
+---
+## Final Verdict
+Bayan is a technically impressive product with a solid NLP pipeline, a mature editor engine, and a well-architected extension. The core correction features (Spelling → Grammar → Punctuation) work end-to-end across both surfaces.
+**What Bayan does well:**
+- ✅ Custom contenteditable editor with proper cursor handling
+- ✅ Multi-stage NLP pipeline with offset mapping
+- ✅ Extension uses overlay-only rendering (never modifies user DOM)
+- ✅ Supabase integration for cloud persistence
+- ✅ Comprehensive test coverage (16 backend test files)
+- ✅ Extension follows MV3 best practices (service worker, side panel)
+**What must be fixed before growth:**
+1. 🔴 **Security**: CORS wildcard + no rate limiting = anyone can abuse the API
+2. 🔴 **Auth gap**: Extension users can't persist anything — breaks the SaaS value proposition
+3. 🟡 **Extension missing API functions**: `bayanAutocomplete/Dialect/Quran` will throw `ReferenceError`
+4. 🟡 **Backend monolith**: 2,844-line `app.py` is a maintenance bottleneck
+**Bottom line:** Bayan is 80% of the way to a production-grade SaaS product. The remaining 20% is security hardening, extension auth, and code architecture — all achievable in 2-3 focused weeks.

Dockerfile ADDED Viewed

	@@ -0,0 +1,95 @@

+FROM python:3.12-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install Python dependencies
+# Install CPU-only PyTorch first (saves ~1.5GB vs full torch with CUDA)
+COPY requirements.txt .
+RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir -r requirements.txt
+# Pre-download models during build (network is available here)
+# At runtime, the container has NO outbound DNS, so models must be cached
+# Set HF_HOME to a global path so non-root users (like in HF Spaces) can access cached models
+ENV HF_HOME=/opt/huggingface
+RUN mkdir -p /opt/huggingface && chmod 777 /opt/huggingface
+# 1. Summarization model (MBart, float16)
+RUN python -c "\
+from transformers import MBartForConditionalGeneration, AutoTokenizer, AutoConfig; \
+import torch; \
+repo = 'bayan10/summarization-model'; \
+print('Downloading summarization tokenizer...'); \
+AutoTokenizer.from_pretrained(repo); \
+print('Downloading summarization config...'); \
+AutoConfig.from_pretrained(repo); \
+print('Downloading summarization model (float16)...'); \
+MBartForConditionalGeneration.from_pretrained(repo, torch_dtype=torch.float16); \
+print('Summarization model cached!'); \
+"
+# 2. Spelling model (AraSpell — AraBERT encoder-decoder + checkpoint)
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+from transformers import AutoTokenizer, EncoderDecoderModel, AutoModelForMaskedLM; \
+print('Downloading AraSpell checkpoint...'); \
+hf_hub_download(repo_id='bayan10/AraSpell-Model', filename='last_model.pt'); \
+print('Downloading AraBERT tokenizer...'); \
+AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02'); \
+print('Downloading AraBERT encoder-decoder...'); \
+EncoderDecoderModel.from_encoder_decoder_pretrained('aubmindlab/bert-base-arabertv02', 'aubmindlab/bert-base-arabertv02'); \
+print('Downloading AraBERT MLM (for ContextualCorrector)...'); \
+AutoModelForMaskedLM.from_pretrained('aubmindlab/bert-base-arabertv02'); \
+print('Spelling model + MLM cached!'); \
+"
+# 3. Grammar — camel-tools MLE disambiguator data
+# Set CAMELTOOLS_DATA to a global path so non-root users (like in HF Spaces) can access it
+ENV CAMELTOOLS_DATA=/opt/camel_tools
+RUN mkdir -p /opt/camel_tools && chmod 777 /opt/camel_tools && camel_data -i light
+# 4. Punctuation model (PuncAra-v1 — EncoderDecoderModel)
+RUN python -c "\
+from transformers import EncoderDecoderModel, AutoTokenizer; \
+repo = 'bayan10/PuncAra-v1'; \
+print('Downloading PuncAra-v1 tokenizer...'); \
+AutoTokenizer.from_pretrained(repo); \
+print('Downloading PuncAra-v1 model...'); \
+EncoderDecoderModel.from_pretrained(repo); \
+print('PuncAra-v1 cached!'); \
+"
+# 5. Dialect-to-MSA model (mT5, float16)
+RUN python -c "\
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM; \
+import torch; \
+repo = 'bayan10/dialect-to-msa-model'; \
+print('Downloading dialect tokenizer...'); \
+AutoTokenizer.from_pretrained(repo); \
+print('Downloading dialect model (float16)...'); \
+AutoModelForSeq2SeqLM.from_pretrained(repo, torch_dtype=torch.float16); \
+print('Dialect model cached!'); \
+"
+# Copy application code
+COPY src/ ./src/
+COPY quran.py ./
+COPY quran_master.db ./
+COPY .env* ./
+# Set environment variables
+ENV PORT=7860
+ENV DEBUG=False
+ENV PYTHONUNBUFFERED=1
+# Expose port
+EXPOSE 7860
+# Start the app with gunicorn (single worker to minimize RAM)
+# Timeout 300s: full pipeline (spelling ~50s + grammar ~8s + punctuation ~30s + cold start)
+CMD ["gunicorn", "--chdir", "src", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Your Organization
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

LOGOS/icon128.png ADDED Viewed

Git LFS Details

SHA256: edf4e739bd7979282497a93e5eb79ba9ec15ec51ffb09288e710e27ef10af54f
Pointer size: 130 Bytes
Size of remote file: 14.3 kB

LOGOS/icon16.png ADDED Viewed

Git LFS Details

SHA256: 430d94175bb601012bcaa542e13012a7d49cccfb0f7e91e37b4e638962520650
Pointer size: 128 Bytes
Size of remote file: 845 Bytes

LOGOS/icon32.png ADDED Viewed

Git LFS Details

SHA256: 5dd959d57cd298801efd8e58f996f45b6913220538596b8918d4a8e4cbb98eac
Pointer size: 129 Bytes
Size of remote file: 2.2 kB

LOGOS/icon48.png ADDED Viewed

Git LFS Details

SHA256: ce4142d81117b1b950017f599e09ceddc0d89e517b74e796374b1ceb36555014
Pointer size: 129 Bytes
Size of remote file: 3.8 kB

PROJECT_DESCRIPTION.md ADDED Viewed

	@@ -0,0 +1,217 @@

+# Bayan (بيان) - Arabic Writing Assistant & Text Summarization System
+Bayan is a state-of-the-art Arabic text editing and summarization application. Similar to assistants like Grammarly, Bayan provides real-time correction of spelling, grammar, and punctuation, combined with autocomplete suggestions and an advanced summarization pipeline. It features a modern, responsive web interface that communicates with a Flask backend powered by deep learning models.
+---
+## 📁 Repository Layout & File Descriptions
+```
+Bayan/
+├── data/                       # Directory for raw and processed datasets (empty by default)
+├── models/                     # Deep learning models directory (organized by task)
+│   ├── Autocomplete/           # GPT-2 autocomplete model
+│   ├── Grammrar/               # Gemma-based grammar correction model
+│   ├── Punctuation/            # Seq2Seq punctuation correction model
+│   ├── Spelling/               # BERT-based spelling corrector checkpoint
+│   └── Summarization/          # mBART summarization model checkpoint
+├── src/                        # Core backend source code and frontend
+│   ├── app.py                  # Flask server containing API endpoints
+│   ├── ara_spell.py            # Custom spell-checking algorithms and post-processing
+│   ├── index.html              # TailwindCSS & Vanilla JS responsive web interface
+│   ├── model_loader.py         # Loader classes for all deep learning models
+│   └── README.md               # Source code instructions and API output contracts
+├── check_dependencies.py       # Helper script to check required Python libraries
+├── inspect_decoder.py          # Weight inspection helper for the spelling model
+├── inspect_model.py            # Basic PyTorch checkpoint architecture identifier
+├── inspect_model_details.py    # Detailed tensor shape explorer for spelling checkpoint
+├── inspect_model_weights.py    # Checkpoint structure explorer
+├── LICENSE                     # MIT License
+├── QUICKSTART.md               # Quickstart guide for setting up and running Bayan
+├── README.md                   # Main project overview and directory layout
+├── README_SETUP.md             # Detailed step-by-step setup and troubleshooting guide
+├── reproduce_issue.py          # Simple local script to test Spelling, Grammar, and Punctuation models
+├── requirements.txt            # Python dependencies (PyTorch, Transformers, Flask, etc.)
+├── run_app.py                  # Standard launcher script for the application
+├── summarization_test.py       # Local tests and configuration options for Summarization
+├── test_analyze_api.py         # Request test script for the /api/analyze endpoint
+├── test_analyze_methods.py     # Request test script for GET/POST validations of analyze endpoint
+├── test_model_load.py          # Verification script for local summarization model loading
+├── upload_model.py             # Script to upload models to the Hugging Face hub
+└── verify_api_live.py          # Test script to send sample text to a live API server
+```
+---
+## 🛠️ Core Features
+1. **Smart Spelling Correction (`SpellingModel`)**:
+   - Cleans the text (removes harakat and tatweel), collapses repeated characters, and resolves common keyboard substitution errors.
+   - Generates candidates using seq2seq model inference (beams), smart rules-based heuristics, and edit-distance suggestions (Norvig's spelling corrector adapted for Arabic).
+   - Reranks candidates using a combined formula of **fluency** (evaluated using a BERT Masked Language Model), **similarity** (Damerau-Levenshtein distance), and **vocabulary-aware acceptance** (checks In-Vocabulary/Out-of-Vocabulary words from the tokenizer dictionary).
+2. **Grammar Correction (`GrammarModel`)**:
+   - Loads a Gemma causal language model configured to run on CPU.
+   - Evaluates grammar through a standard chat template prompt.
+   - Extracts the first valid non-empty corrected sentence and rejects generic instruction text generated by the model.
+3. **Punctuation Insertion (`PunctuationModel`)**:
+   - Uses a Seq2Seq architecture to automatically place Arabic commas (`،`), semicolons (`؛`), question marks (`؟`), periods (`.`), and quotation marks (`« »`) into continuous text.
+4. **Text Summarization (`SummarizationModel`)**:
+   - Leverages an mBART conditional generation model.
+   - Supports variable length thresholds (short: ~30%, medium: ~50%, long: ~70% of the input text length).
+   - Features a **safe extractive fallback** mechanism: if the generated abstractive summary deviates too far from the original text (monitored by word overlap and similarity ratios), it falls back to a readable extractive summary composed of the opening sentences of the source text.
+5. **Autocomplete Suggestions (`AutocompleteModel`)**:
+   - Powered by a local GPT-2 model (CPU-only mode) configured to predict the next word given a text prefix.
+   - Integrates with the web interface to display ghost text prompts that users can accept by pressing the `Tab` key.
+---
+## 🖥️ Architecture & Web Interface
+The project uses a unified **Client-Server Architecture**:
+```mermaid
+graph TD
+    Client[Web Interface: HTML / CSS / JS] <-->|JSON over HTTP| Server[Flask API Server: app.py]
+    Server <--> ModelLoader[model_loader.py]
+    ModelLoader <--> Spelling[SpellingModel / ara_spell.py]
+    ModelLoader <--> Grammar[GrammarModel]
+    ModelLoader <--> Punctuation[PunctuationModel]
+    ModelLoader <--> Summarization[SummarizationModel]
+    ModelLoader <--> Autocomplete[AutocompleteModel]
+```
+### 1. Backend: Flask API (`src/app.py`)
+- Manages model state instances and startup loading triggers (loads the summarization model on startup and lazily loads autocomplete as needed).
+- Provides API endpoints validating text length requirements (between 10 and 5,000 characters).
+- Implements `/api/analyze` which coordinates a sequential processing pipeline:
+  $$\text{Input Text} \rightarrow \text{Spelling Correction} \rightarrow \text{Grammar Correction} \rightarrow \text{Punctuation Insertion} \rightarrow \text{Diff Calculation}$$
+### 2. Frontend: Modern Web Application (`src/index.html`)
+- Built using **TailwindCSS** for styling, **Google Fonts** (Tajawal, Noto Kufi Arabic) for premium typography, and glassmorphism cards.
+- Includes a live, rich editing canvas (`contenteditable`) with instant wavy underlines representing errors:
+  - <span style="border-bottom: 2px wavy #ef4444; background: rgba(239, 68, 68, 0.1); padding: 0 4px;">Red underlines</span> indicate **Spelling Errors**.
+  - <span style="border-bottom: 2px wavy #fbbf24; background: rgba(251, 191, 36, 0.1); padding: 0 4px;">Yellow underlines</span> indicate **Grammar / Punctuation Suggestions**.
+- Features an interactive **suggestion tooltip** allowing users to click on highlighted words to view explanations and apply replacements directly.
+- Displays a real-time **document score metric** (0–100 circular gauge) based on error density, along with word counters and feedback lists.
+- Hosts a **Summarization Panel** where users can control the length and generation configuration of the text summarizer.
+---
+## 🔌 API Endpoints Reference
+### 1. Health Check
+* **Endpoint**: `GET /api/health`
+* **Response**:
+  ```json
+  {
+    "status": "healthy",
+    "models": {
+      "summarization": true,
+      "spelling": false,
+      "autocomplete": false,
+      "grammar": false,
+      "punctuation": false
+    }
+  }
+  ```
+### 2. Summarize Text
+* **Endpoint**: `POST /api/summarize`
+* **Payload**:
+  ```json
+  {
+    "text": "النص العربي الطويل المراد تلخيصه...",
+    "length": 2, // 1 = short, 2 = medium, 3 = long
+    "full_text": true
+  }
+  ```
+* **Response**:
+  ```json
+  {
+    "status": "success",
+    "summary": "الملخص المولد من النموذج...",
+    "original_length": 1420,
+    "summary_length": 620
+  }
+  ```
+### 3. Spelling Correction
+* **Endpoint**: `POST /api/spelling`
+* **Payload**: `{"text": "الكتبة الصحيحه"}`
+* **Response**: `{"corrected": "الكتابة الصحيحة", "status": "success", ...}`
+### 4. Autocomplete
+* **Endpoint**: `POST /api/autocomplete`
+* **Payload**: `{"text": "ذهب الطالب إلى", "n": 3}`
+* **Response**: `{"suggestions": ["المدرسة", "الجامعة", "الفصل"], "status": "success"}`
+### 5. Unified Analyze Text
+* **Endpoint**: `POST /api/analyze`
+* **Payload**: `{"text": "الطلاب ذهبو الى المدرسة"}`
+* **Response**:
+  ```json
+  {
+    "original": "الطلاب ذهبو الى المدرسة",
+    "corrected": "ذهب الطلاب إلى المدرسة.",
+    "suggestions": [
+      {
+        "original": "ذهبو",
+        "correction": "ذهبوا",
+        "type": "spelling"
+      },
+      {
+        "original": "ذهبوا",
+        "correction": "ذهب",
+        "type": "grammar"
+      },
+      {
+        "original": "الطلاب ذهب",
+        "correction": "ذهب الطلاب",
+        "type": "grammar"
+      },
+      {
+        "original": "المدرسة",
+        "correction": "المدرسة.",
+        "type": "punctuation"
+      }
+    ],
+    "status": "success"
+  }
+  ```
+---
+## 🚀 How to Run the Project
+### 1. Install Dependencies
+Make sure you have Python 3.8+ installed, and then run:
+```bash
+pip install -r requirements.txt
+```
+*Note: If you are running on a CPU-only environment or want to configure PyTorch for CUDA (GPU), visit [PyTorch Local Setup](https://pytorch.org/get-started/locally/) to install the appropriate distribution.*
+### 2. Prepare Model Files
+Verify that you have placed the model files under the `models/` directory:
+- Summarization: `models/Summarization/Model/`
+- Spelling: `models/Spelling/Model/`
+- Autocomplete: `models/Autocomplete/Model/`
+- Grammar: `models/Grammrar/Model/`
+- Punctuation: `models/Punctuation/Model/`
+### 3. Run the Server
+Use gunicorn (production) or Flask dev server:
+```bash
+# Production (matches Procfile)
+cd src && gunicorn app:app --bind 0.0.0.0:7860 --timeout 120 --workers 1
+# Development
+cd src && python -c "from app import app; app.run(host='0.0.0.0', port=7860, debug=True)"
+```
+Open your web browser and navigate to:
+```
+http://localhost:7860
+```

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: cd src && gunicorn app:app --bind 0.0.0.0:$PORT --timeout 120 --workers 1

QUICKSTART.md ADDED Viewed

	@@ -0,0 +1,126 @@

+# Bayan - Quick Start Guide
+## 🚀 Quick Start
+### 1. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+**Note:** If you have issues, install PyTorch separately:
+- CPU: `pip install torch --index-url https://download.pytorch.org/whl/cpu`
+- GPU: Visit https://pytorch.org/get-started/locally/
+### 2. Run the Application
+```bash
+python run_app.py
+```
+### 3. Open in Browser
+Navigate to: **http://localhost:5000**
+## 📁 Project Structure
+```
+Bayan/
+├── src/
+│   ├── app.py              # Flask backend server
+│   ├── model_loader.py     # Model loading and inference
+│   └── index.html          # Web interface
+├── models/
+│   └── arabic_summarization_model/
+│       └── content/drive/MyDrive/arabic_summarization_model/
+│           ├── config.json
+│           ├── model.safetensors
+│           └── ... (other model files)
+├── run_app.py              # Application launcher
+├── requirements.txt         # Python dependencies
+└── README_SETUP.md         # Detailed setup guide
+```
+## 🔧 Features
+✅ **Robust Error Handling**
+- Path validation for model files
+- Graceful fallbacks if model loading fails
+- Input validation and sanitization
+- Clear error messages
+✅ **Security**
+- Input length limits (max 5000 characters)
+- CORS enabled for web interface
+- Safe model loading
+- Error logging
+✅ **User Experience**
+- Loading indicators
+- Real-time feedback
+- Arabic language support
+- Responsive design
+## 🧪 Testing
+### Test API Health
+```bash
+curl http://localhost:5000/api/health
+```
+### Test Summarization
+```bash
+curl -X POST http://localhost:5000/api/summarize \
+  -H "Content-Type: application/json" \
+  -d '{"text": "نص تجريبي للاختبار", "length": 2, "full_text": true}'
+```
+## 🐛 Troubleshooting
+### Model Not Found
+- Verify model path: `models/arabic_summarization_model/content/drive/MyDrive/arabic_summarization_model/`
+- Check that `config.json` exists
+- The app will search multiple possible locations automatically
+### Dependencies Missing
+```bash
+python check_dependencies.py
+pip install -r requirements.txt
+```
+### Port Already in Use
+```bash
+set PORT=5001
+python run_app.py
+```
+## 📝 API Documentation
+### POST /api/summarize
+Summarize Arabic text.
+**Request:**
+```json
+{
+  "text": "النص العربي...",
+  "length": 2,  // 1=short, 2=medium, 3=long
+  "full_text": true
+}
+```
+**Response:**
+```json
+{
+  "status": "success",
+  "summary": "الملخص...",
+  "original_length": 500,
+  "summary_length": 150
+}
+```
+## 🎯 Next Steps
+1. Install dependencies: `pip install -r requirements.txt`
+2. Run the app: `python run_app.py`
+3. Open browser: http://localhost:5000
+4. Write Arabic text and click "توليد الملخص"
+For detailed information, see `README_SETUP.md`.

README.md ADDED Viewed

	@@ -0,0 +1,41 @@

+---
+title: Bayan API
+emoji: ✍️
+colorFrom: green
+colorTo: blue
+sdk: docker
+app_port: 7860
+---
+# Arabic Grammarly (project)
+Project overview
+This repository contains the initial skeleton for an Arabic grammar & writing assistant (like Grammarly) project. It includes placeholders for data, models, and source code, plus configuration and dependency files. Use this repo as the single source-of-truth for team contributions.
+Key features you may implement here
+- Arabic spelling and grammar checking
+- Style / clarity suggestions for Modern Standard Arabic (MSA) and dialects
+- Sentence rewriting and paraphrasing suggestions
+- Plagiarism or similarity checks (optional)
+- Integration with web UI / browser extension / API
+Repository layout
+- `data/` — place raw and processed datasets (LANS, corpora, etc.). See `data/README.md` for details.
+- `models/` — store training checkpoints and exports (do NOT commit large binary files). See `models/README.md`.
+- `src/` — source code (training scripts, inference API, preprocessing). See `src/README.md`.
+- `requirements.txt` — Python dependencies for the project.
+- `.env.example` — template for environment variables.
+- `.gitignore` — sensible defaults for this project.
+Contributing notes
+- Keep large datasets and model weights out of Git (use cloud storage or Git LFS / DVC / Hugging Face Hub).
+- Add tests in `src/tests/` and keep the public API stable.
+- Use small, focused pull requests that include a short description and test(s) if applicable.
+Contact
+If you have questions about where to add files or how to name things, ask in the team chat and follow the README inside each folder for more guidance.

README_HF.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: Bayan API
+emoji: ✍️
+colorFrom: green
+colorTo: blue
+sdk: docker
+app_port: 7860
+---

README_SETUP.md ADDED Viewed

	@@ -0,0 +1,172 @@

+# Bayan - Arabic Text Summarization Setup Guide
+## Overview
+Bayan is an Arabic text summarization application with a web interface. This guide will help you set up and run the application.
+## Prerequisites
+- Python 3.8 or higher
+- pip (Python package manager)
+- At least 4GB RAM (8GB+ recommended for better performance)
+- Model files in the correct location (see below)
+## Installation Steps
+### 1. Install Dependencies
+```bash
+pip install -r requirements.txt
+```
+**Note:** If you encounter issues installing PyTorch, you may need to install it separately:
+- For CPU: `pip install torch --index-url https://download.pytorch.org/whl/cpu`
+- For CUDA: Visit https://pytorch.org/get-started/locally/ for the appropriate command
+### 2. Verify Model Location
+The model should be located at:
+```
+models/arabic_summarization_model/content/drive/MyDrive/arabic_summarization_model/
+```
+Required files:
+- `config.json`
+- `tokenizer.json`
+- `model.safetensors`
+- `sentencepiece.bpe.model`
+- Other tokenizer/model files
+### 3. Run the Application
+#### Option A: Using the run script (Recommended)
+```bash
+python run_app.py
+```
+#### Option B: Direct Flask run
+```bash
+cd src
+python app.py
+```
+#### Option C: Using Flask CLI
+```bash
+cd src
+export FLASK_APP=app.py
+flask run
+```
+### 4. Access the Application
+Open your browser and navigate to:
+```
+http://localhost:5000
+```
+## Configuration
+### Environment Variables
+- `PORT`: Server port (default: 5000)
+- `DEBUG`: Enable debug mode (default: False)
+  ```bash
+  export DEBUG=True
+  export PORT=8080
+  ```
+### Supabase Authentication (Phase 5)
+See `.env.example` and `PHASE_5_IMPLEMENTATION_PLAN.md`.
+1. Create a Supabase project and enable **Anonymous** + **Google** auth.
+2. Run `supabase/migrations/001_profiles.sql` in the SQL Editor.
+3. Set meta tags in `src/index.html`:
+   ```html
+   <meta name="supabase-url" content="https://YOUR_PROJECT.supabase.co">
+   <meta name="supabase-anon-key" content="YOUR_ANON_KEY">
+   ```
+4. Add redirect URL: `http://localhost:5000/**`
+If Supabase is not configured, the editor still works in offline auth mode.
+### Model Not Found Error
+If you see "Model not found" error:
+1. Verify the model path exists
+2. Check that all required files are present
+3. The application will search multiple possible paths automatically
+### Out of Memory Error
+If you encounter memory issues:
+1. Close other applications
+2. Use CPU mode (it will automatically use CPU if CUDA is not available)
+3. Reduce the `MAX_TEXT_LENGTH` in `src/app.py` if needed
+### Port Already in Use
+If port 5000 is already in use:
+```bash
+export PORT=5001
+python run_app.py
+```
+### Slow Performance
+- First run will be slower as the model loads
+- Subsequent requests will be faster
+- Using GPU (CUDA) significantly improves performance
+## API Endpoints
+### Health Check
+```
+GET /api/health
+```
+Returns server status and model loading state.
+### Summarize Text
+```
+POST /api/summarize
+Content-Type: application/json
+{
+  "text": "النص العربي المراد تلخيصه...",
+  "length": 2,  // 1=short, 2=medium, 3=long
+  "full_text": true
+}
+```
+Response:
+```json
+{
+  "status": "success",
+  "summary": "الملخص المولد...",
+  "original_length": 500,
+  "summary_length": 150
+}
+```
+## Security Features
+- Input validation (text length limits)
+- CORS enabled for web interface
+- Error handling and logging
+- Path validation for model files
+- Safe model loading with fallbacks
+## Development
+### Running in Debug Mode
+```bash
+export DEBUG=True
+python run_app.py
+```
+### Testing the API
+```bash
+curl -X POST http://localhost:5000/api/summarize \
+  -H "Content-Type: application/json" \
+  -d '{"text": "نص تجريبي للاختبار", "length": 2, "full_text": true}'
+```
+## Support
+For issues or questions:
+1. Check the logs in the terminal
+2. Verify model files are correct
+3. Ensure all dependencies are installed
+4. Check Python version compatibility

READMEquran.md ADDED Viewed

	@@ -0,0 +1,64 @@

+# Bayan Search API
+## Supported Languages
+The `search_bayan()` function supports the following values for the `target_type` parameter:
+```python
+languages = [
+    "تدقيق الايات",
+    "english",
+    "french",
+    "turkish",
+    "persian",
+    "urdu",
+    "russian",
+    "spanish",
+    "german",
+    "indonesian",
+    "malay",
+    "bengali",
+    "bosnian",
+    "portuguese",
+    "uzbek"
+]
+```
+## Language Descriptions
+| Value        | Output                                |
+| ------------ | ------------------------------------- |
+| تدقيق الايات | Quran text in Uthmani script (Arabic) |
+| english      | English translation                   |
+| french       | French translation                    |
+| turkish      | Turkish translation                   |
+| persian      | Persian (Farsi) translation           |
+| urdu         | Urdu translation                      |
+| russian      | Russian translation                   |
+| spanish      | Spanish translation                   |
+| german       | German translation                    |
+| indonesian   | Indonesian translation                |
+| malay        | Malay translation                     |
+| bengali      | Bengali translation                   |
+| bosnian      | Bosnian translation                   |
+| portuguese   | Portuguese translation                |
+| uzbek        | Uzbek translation                     |
+## Example Usage
+```python
+result = search_bayan(
+    "ولله المشرق والمغرب فأينما تولوا فثم وجه الله",
+    target_type="english"
+)
+print(result["matched_segment"])
+print(result["full_verse"])
+```
+## Notes
+* If `target_type` is omitted, the default value is `"تدقيق الايات"`.
+* The search engine supports fuzzy matching and can handle minor spelling mistakes.
+* Quranic Uthmani text is returned when using `"تدقيق الايات"`.
+* Translations are returned when using any of the supported language names above.

add_divider.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import re
+with open('src/index.html', 'r', encoding='utf-8') as f:
+    html = f.read()
+# Replace Navbar
+navbar_pattern = r'(<button onclick="showPage\(\'home\'\)" class="flex items-center) gap-3(" style="background:none;border:none;cursor:pointer;" aria-label="الرئيسية">)(.*?)(<span id="nav-brand" class="text-xl md:text-2xl font-bold text-gradient">بيان</span></button>)'
+navbar_replacement = r'\1 gap-2.5 md:gap-3\2\3<div class="h-6 w-[1.5px] bg-gray-300 dark:bg-gray-700 rounded-full"></div>\4'
+html = re.sub(navbar_pattern, navbar_replacement, html, flags=re.DOTALL)
+# Replace Footer
+footer_pattern = r'(<div class="flex items-center) gap-3( mb-4">)(.*?)(<span id="footer-brand" class="text-2xl font-bold text-gradient">بيان</span>)'
+footer_replacement = r'\1 gap-2.5 md:gap-3\2\3<div class="h-7 w-[1.5px] bg-gray-300 dark:bg-gray-700 rounded-full"></div>\4'
+html = re.sub(footer_pattern, footer_replacement, html, flags=re.DOTALL)
+with open('src/index.html', 'w', encoding='utf-8') as f:
+    f.write(html)
+print("Done replacing.")

add_extension_theme_toggle.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import re
+# 1. CSS Injection
+css_to_add = """
+/* Light Theme Variables */
+[data-theme="light"] {
+  --bayan-bg: #f9fafb;
+  --bayan-surface: #ffffff;
+  --bayan-surface-hover: #f3f4f6;
+  --bayan-surface-active: #e5e7eb;
+  --bayan-border: #e5e7eb;
+  --bayan-border-light: #d1d5db;
+  --bayan-text: #111827;
+  --bayan-text-secondary: #4b5563;
+  --bayan-text-muted: #9ca3af;
+  --bayan-success: #16a34a;
+  --bayan-warning: #d97706;
+}
+/* Theme Toggle Button Styles */
+.theme-toggle-animated {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  width: 32px;
+  height: 32px;
+  border: none;
+  border-radius: 50%;
+  background: var(--bayan-surface-hover);
+  color: var(--bayan-text-secondary);
+  cursor: pointer;
+  transition: background 0.3s ease, transform 0.3s ease, color 0.3s ease;
+  position: relative;
+  overflow: hidden;
+  margin-right: 8px;
+}
+.theme-toggle-animated:hover {
+  background: var(--bayan-primary);
+  color: #fff;
+  transform: rotate(15deg);
+}
+.theme-toggle-animated svg {
+  transition: transform 0.4s ease, opacity 0.3s ease;
+  position: absolute;
+}
+[data-theme="dark"] .theme-icon-sun {
+  transform: rotate(90deg) scale(0);
+  opacity: 0;
+}
+[data-theme="dark"] .theme-icon-moon {
+  transform: rotate(0) scale(1);
+  opacity: 1;
+}
+[data-theme="light"] .theme-icon-moon {
+  transform: rotate(-90deg) scale(0);
+  opacity: 0;
+}
+[data-theme="light"] .theme-icon-sun {
+  transform: rotate(0) scale(1);
+  opacity: 1;
+}
+"""
+def append_to_file(filepath, content):
+    with open(filepath, 'a', encoding='utf-8') as f:
+        f.write('\n' + content + '\n')
+append_to_file('extension/popup.css', css_to_add)
+append_to_file('extension/sidepanel/sidepanel.css', css_to_add)
+# 2. HTML Injection
+btn_html = """
+    <button id="ext-theme-toggle" class="theme-toggle-animated" aria-label="تبديل السمة" type="button">
+      <svg class="theme-icon-sun" width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z"/></svg>
+      <svg class="theme-icon-moon" width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M20.354 15.354A9 9 0 018.646 3.646 9.003 9.003 0 0012 21a9.003 9.003 0 008.354-5.646z"/></svg>
+    </button>
+"""
+def insert_html_button(filepath, pattern):
+    with open(filepath, 'r', encoding='utf-8') as f:
+        html = f.read()
+    # We want to put the button next to the status indicator.
+    # The pattern will match the <div class="bayan-header-status"...> (or sp-) and inject the button right before it
+    new_html = re.sub(pattern, btn_html + r'\1', html)
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write(new_html)
+insert_html_button('extension/popup.html', r'(<div class="bayan-header-status")')
+insert_html_button('extension/sidepanel/sidepanel.html', r'(<div class="sp-header-status")')
+# 3. JS Logic Injection
+js_to_add = """
+// ── Theme Toggle Logic ──
+document.addEventListener('DOMContentLoaded', () => {
+  const toggleBtn = document.getElementById('ext-theme-toggle');
+  // Load theme from storage
+  chrome.storage.local.get(['theme'], (result) => {
+    const currentTheme = result.theme || 'dark'; // default to dark
+    document.documentElement.setAttribute('data-theme', currentTheme);
+  });
+  if (toggleBtn) {
+    toggleBtn.addEventListener('click', () => {
+      let theme = document.documentElement.getAttribute('data-theme') || 'dark';
+      let targetTheme = theme === 'dark' ? 'light' : 'dark';
+      document.documentElement.setAttribute('data-theme', targetTheme);
+      chrome.storage.local.set({ theme: targetTheme });
+    });
+  }
+});
+"""
+append_to_file('extension/popup.js', js_to_add)
+append_to_file('extension/sidepanel/sidepanel.js', js_to_add)
+print("Theme toggle added successfully.")

analyze_failures.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import json
+with open('tests/phase10/reports/phase10_results.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+failures = [r for r in data['results'] if r['pipeline_verdict'] in ('FP', 'FN', 'ERROR')]
+md_content = "# Analysis of the 33 Benchmark Failures\n\n"
+md_content += "This document contains a detailed breakdown of the 33 examples that failed the benchmark, grouped by their dataset.\n\n"
+from collections import defaultdict
+grouped = defaultdict(list)
+for r in failures:
+    grouped[r.get('dataset', 'unknown')].append(r)
+for dataset, items in grouped.items():
+    md_content += f"## Dataset: {dataset.upper()} ({len(items)} failures)\n\n"
+    for idx, item in enumerate(items, 1):
+        md_content += f"### {idx}. ID: {item.get('id')} ({item.get('pipeline_verdict')})\n"
+        md_content += f"- **Input:** `{item.get('input')}`\n"
+        md_content += f"- **Expected:** `{item.get('expected')}`\n"
+        md_content += f"- **Actual Output:** `{item.get('pipeline_output')}`\n"
+        md_content += f"- **Failure Reason:** {item.get('pipeline_detail', 'N/A')}\n"
+        md_content += f"- **Root Cause:** {item.get('root_cause_stage', 'unknown')} ({item.get('root_cause_detail', 'N/A')})\n"
+        md_content += "\n"
+with open('C:\\Users\\youss\\.gemini\\antigravity-ide\\brain\\9f7cefbc-f722-4b96-bc24-80ce6ffbd124\\failures_analysis.md', 'w', encoding='utf-8') as out:
+    out.write(md_content)
+print("Analysis successfully written to artifact.")

apply_locks.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+def apply_lock_to_file(filepath, var_name, engine_name, func_name):
+    with open(filepath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+    out_lines = []
+    in_imports = False
+    added_threading = False
+    in_globals = False
+    added_lock_var = False
+    in_func = False
+    for line in lines:
+        if line.startswith('import ') and not added_threading:
+            out_lines.append(line)
+            out_lines.append("import threading\n")
+            added_threading = True
+            continue
+        if line.startswith(f'_{var_name} = None') and not added_lock_var:
+            out_lines.append(line)
+            out_lines.append(f"_load_lock = threading.Lock()\n")
+            added_lock_var = True
+            continue
+        if line.startswith(f'def {func_name}('):
+            in_func = True
+            out_lines.append(line)
+            continue
+        if in_func:
+            if line.startswith(f'    global '):
+                out_lines.append(line.replace('\n', f', _load_lock\n'))
+                continue
+            if line.startswith(f'    try:'):
+                # The start of the old try block. We wrap everything from here.
+                out_lines.append(f'    with _load_lock:\n')
+                out_lines.append(f'        if _{var_name} is not None:\n')
+                out_lines.append(f'            return _{var_name}\n\n')
+                out_lines.append(f'        try:\n')
+                continue
+            # If we are inside the function and past the global declaration,
+            # and it's indented with at least 4 spaces, we need to add 4 more spaces
+            # for the lines that were inside the old `try:` and `except:`
+            # EXCEPT for `if _xxx is not None: return _xxx` which comes before the try
+            if line.startswith('    if _') or line.startswith('        return _'):
+                # This is the old `if checker is not None:` logic before try. Leave it alone.
+                out_lines.append(line)
+                continue
+            if line.startswith('    '):
+                # Shift everything that was inside try/except right by 4 spaces
+                if line.strip() == '':
+                    out_lines.append('\n')
+                else:
+                    out_lines.append('    ' + line)
+                if line.startswith('    return _') or line.startswith('    raise RuntimeError'):
+                    # End of function
+                    in_func = False
+                continue
+        out_lines.append(line)
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.writelines(out_lines)
+apply_lock_to_file(r'src/nlp/spelling/araspell_service.py', 'spell_checker', 'AraSpell', 'get_spelling_model')
+apply_lock_to_file(r'src/nlp/punctuation/punctuation_service.py', 'punctuation_checker', 'PuncAra', 'get_punctuation_model')
+apply_lock_to_file(r'src/nlp/grammar/grammar_service.py', 'grammar_checker', 'Grammar', 'get_grammar_model')
+apply_lock_to_file(r'src/nlp/autocomplete/autocomplete_service.py', 'autocomplete_engine', 'Autocomplete', 'get_autocomplete_model')
+print("Locks applied perfectly with correct indentation!")

archive/legacy_scripts/AraSpell.py ADDED Viewed

	@@ -0,0 +1,2224 @@

+# AraSpell — Arabic Spell Checker Pipeline
+# Production-ready version
+import re
+import math
+import logging
+import torch
+import os
+from collections import Counter
+from transformers import AutoTokenizer, EncoderDecoderModel
+import Levenshtein
+import jellyfish
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+# ═══════════════════════════════════════════════════════════════════════════════
+# LOAD ARABERT SEQ2SEQ MODEL
+# ═══════════════════════════════════════════════════════════════════════════════
+from huggingface_hub import hf_hub_download
+MODEL_REPO = 'bayan10/AraSpell-Model'
+MODEL_FILENAME = 'last_model.pt'
+try:
+    logger.info(f"Downloading/loading model from Hugging Face: {MODEL_REPO}")
+    MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
+except Exception as e:
+    raise RuntimeError(f"Failed to download model from Hugging Face: {e}")
+MODEL_NAME = 'aubmindlab/bert-base-arabertv02'
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = EncoderDecoderModel.from_encoder_decoder_pretrained(MODEL_NAME, MODEL_NAME)
+model.config.decoder_start_token_id = tokenizer.cls_token_id
+model.config.pad_token_id = tokenizer.pad_token_id
+model.config.eos_token_id = tokenizer.sep_token_id
+model.generation_config.max_length = 128
+model.generation_config.decoder_start_token_id = tokenizer.cls_token_id
+model.generation_config.pad_token_id = tokenizer.pad_token_id
+model.generation_config.eos_token_id = tokenizer.sep_token_id
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+checkpoint = torch.load(MODEL_PATH, map_location=device, weights_only=False)
+model.load_state_dict(checkpoint['model_state_dict'], strict=False)
+model = model.to(device)
+model.eval()
+logger.info(f"Model loaded on {device}, epoch: {checkpoint.get('epoch', 'N/A')}")
+from enum import Enum
+from typing import List, Tuple, Optional
+# ─────────────────────────────────────────────────────────────────────────────
+# ERROR TYPE ENUM
+# ─────────────────────────────────────────────────────────────────────────────
+class ErrorType(Enum):
+    """Types of spelling errors"""
+    CHAR_REPETITION = "char_repetition"
+    WORD_MERGE = "word_merge"
+    CHAR_SUBSTITUTION = "char_substitution"
+    MIXED = "mixed"
+    CLEAN = "clean"
+# ═══════════════════════════════════════════════════════════════════════════════
+# POST PROCESSOR
+# ═══════════════════════════════════════════════════════════════════════════════
+class AraSpellPostProcessor:
+    """Arabic text post-processing techniques."""
+    ARABIC_HARAKAT = 'ًٌٍَُِّْ'
+    TATWEEL = 'ـ'
+    NORMALIZER_MAP = {
+        'ﻹ': 'لإ', 'ﻷ': 'لأ', 'ﻵ': 'لآ', 'ﻻ': 'لا', 'ﷲ': 'الله'
+    }
+    ARABIC_CONSONANTS = set('بتثجحخدذرزسشصضطظعغفقكلمن')
+    # --- Basic Normalization ---
+    @staticmethod
+    def remove_harakat(text: str) -> str:
+        """Remove Arabic diacritics"""
+        return re.sub(r'[ً-ْ]', '', text)
+    @staticmethod
+    def remove_tatweel(text: str) -> str:
+        """Remove Arabic kashida/tatweel"""
+        return text.replace(AraSpellPostProcessor.TATWEEL, '')
+    @staticmethod
+    def normalize_special_chars(text: str) -> str:
+        """Normalize special Arabic ligatures"""
+        for old, new in AraSpellPostProcessor.NORMALIZER_MAP.items():
+            text = text.replace(old, new)
+        return text
+    # --- Core Functions ---
+    @staticmethod
+    def unified_collapse_repeated(text: str) -> str:
+        """
+        Collapse repeated characters.
+        Arabic: 3+ consecutive → 1 | Latin: 2+ consecutive → 1
+        """
+        # Arabic characters: 3+ → 1
+        text = re.sub(r"([\u0600-\u06FF])\1{2,}", r"\1", text)
+        # Latin characters: 2+ → 1
+        text = re.sub(r"([a-zA-Z])\1+", r"\1", text)
+        return text
+    @staticmethod
+    def remove_duplicate_words(text: str) -> str:
+        """Remove consecutive duplicate words. e.g. كتاب كتاب → كتاب"""
+        words = text.split()
+        if len(words) < 2:
+            return text
+        result = [words[0]]
+        for i in range(1, len(words)):
+            if words[i] != words[i-1]:
+                result.append(words[i])
+        return ' '.join(result)
+    @staticmethod
+    def normalize_spaces(text: str) -> str:
+        """Normalize whitespace: multiple spaces, unicode spaces, punctuation spacing."""
+        # Multiple spaces → single
+        text = re.sub(r' +', ' ', text)
+        # Unicode spaces
+        text = text.replace('\u00A0', ' ')  # Non-breaking space
+        text = text.replace('\u200B', '')   # Zero-width space
+        text = text.replace('\u200C', '')   # Zero-width non-joiner
+        text = text.replace('\u200D', '')   # Zero-width joiner
+        # Trim
+        text = text.strip()
+        # Punctuation spacing
+        text = re.sub(r'\s*([،؛؟!.])\s*', r'\1 ', text)
+        text = text.strip()
+        return text
+    @staticmethod
+    def remove_word_repetition_with_wa(text: str) -> str:
+        """Remove word و word → word"""
+        words = text.split()
+        result = []
+        i = 0
+        while i < len(words):
+            if i + 2 < len(words) and words[i] == words[i+2] and words[i+1] == 'و':
+                result.append(words[i])
+                i += 3
+            else:
+                result.append(words[i])
+                i += 1
+        return ' '.join(result)
+    # --- Hamza & Ta Marbuta Handling ---
+    @staticmethod
+    def fix_hamza_conservative(text: str) -> str:
+        """Conservative Hamza normalization — only at word END, not middle."""
+        words = text.split()
+        result = []
+        for word in words:
+            if len(word) >= 3:
+                # Fix trailing أ → ا
+                if word.endswith('أ'):
+                    word = word[:-1] + 'ا'
+                # Fix trailing إ → ا
+                if word.endswith('إ'):
+                    word = word[:-1] + 'ا'
+            result.append(word)
+        return ' '.join(result)
+    @staticmethod
+    def fix_ha_ta_marbuta(text: str, vocab_manager=None) -> str:
+        """
+        Smart ه → ة fix at end of words.
+        Key insight: ه at word end can be:
+        - Ta Marbuta (should be ة): المدرسه → المدرسة
+        - Possessive pronoun (should stay ه): تحقيقه = his achievement
+        Strategy: Only convert if the ة version is IV (in tokenizer vocab).
+        This distinguishes المدرسة (IV) from تحقيقة (not a real word form).
+        Without vocab_manager, falls back to original pattern-based approach.
+        """
+        # Protected words: anything containing لله
+        PROTECTED_ENDINGS = ['لله']
+        words = text.split()
+        result = []
+        for word in words:
+            # Skip protected words (Allah-related)
+            if any(word.endswith(e) for e in PROTECTED_ENDINGS):
+                result.append(word)
+                continue
+            if len(word) >= 4 and word.endswith('ه'):
+                # Check if second-to-last char is a consonant
+                if word[-2] in AraSpellPostProcessor.ARABIC_CONSONANTS:
+                    candidate_with_ta = word[:-1] + 'ة'
+                    if vocab_manager:
+                        # SMART MODE: Use vocab to decide
+                        ta_iv = vocab_manager.is_iv(candidate_with_ta)
+                        ha_iv = vocab_manager.is_iv(word)
+                        if ta_iv:
+                            # ة version is IV → convert (المدرسه→المدرسة)
+                            result.append(candidate_with_ta)
+                            continue
+                        elif ha_iv:
+                            # Only ه version is IV → keep ه (possessive: تحقيقه)
+                            result.append(word)
+                            continue
+                        # else: NEITHER is IV → keep original ه
+                        # (safer than guessing — could be rare possessive)
+                    else:
+                        # FALLBACK: No vocab → use original pattern-based approach
+                        result.append(candidate_with_ta)
+                        continue
+            result.append(word)
+        return ' '.join(result)
+    # --- Hallucination Removal ---
+    @staticmethod
+    def remove_hallucinations(text: str) -> str:
+        """Remove model hallucinations: duplicate words, trailing 'و' artifacts."""
+        words = text.split()
+        if not words:
+            return text
+        result = []
+        i = 0
+        def normalize_word(w: str) -> str:
+            """Normalize for comparison"""
+            w = w.replace('ال', '').replace('ة', 'ه')
+            w = re.sub(r'[أإآ]', 'ا', w)
+            return w
+        while i < len(words):
+            word = words[i]
+            # Remove trailing 'و' artifacts (الماضيةو → الماضية)
+            if len(word) > 4 and word.endswith('و'):
+                prev_char = word[-2]
+                if prev_char in 'ةهاأإآء':
+                    word = word[:-1]
+            # Check for duplicate patterns
+            if i + 1 < len(words):
+                next_word = words[i + 1]
+                if normalize_word(word) == normalize_word(next_word):
+                    # Keep the one with 'ال' if possible
+                    keep = next_word if next_word.startswith('ال') and not word.startswith('ال') else word
+                    result.append(keep)
+                    i += 2
+                    continue
+            result.append(word)
+            i += 1
+        return ' '.join(result)
+    @staticmethod
+    def remove_hallucinated_prefix(text: str, original: str) -> str:
+        """Remove particles (و/في) added by model if not in original"""
+        if not original:
+            return text
+        if text.startswith('و ') and not original.startswith('و'):
+            rest = text[2:].strip()
+            # Verify it matches original
+            if AraSpellPostProcessor.normalize_special_chars(rest) == AraSpellPostProcessor.normalize_special_chars(original):
+                return rest
+        return text
+    # --- Word Splitting & Merging ---
+    @staticmethod
+    def merge_separated_al(text: str) -> str:
+        """Merge 'ال' separated by space: ال + كتاب → الكتاب"""
+        return re.sub(r'\bال\s+(\w+)', r'ال\1', text)
+    @staticmethod
+    def join_fragments(text: str) -> str:
+        """Join short fragments with validation. e.g. الط + الب → الطالب"""
+        words = text.split()
+        if len(words) < 2:
+            return text
+        # Common standalone words that should NOT be merged
+        STANDALONE_WORDS = {
+            'من', 'في', 'على', 'عن', 'مع', 'إلى', 'الى', 'حتى', 'منذ', 'خلال',
+            'بعد', 'قبل', 'ب', 'ل', 'ك', 'و', 'أو', 'لا', 'ما', 'لم', 'لن',
+            'هو', 'هي', 'هم', 'أن', 'إن', 'كل', 'كان', 'قد', 'قال', 'ذلك',
+            'هذا', 'هذه', 'تلك', 'التي', 'الذي', 'التى', 'اللذي'
+        }
+        result = []
+        i = 0
+        while i < len(words):
+            word = words[i]
+            if i + 1 < len(words):
+                next_word = words[i + 1]
+                # SAFETY: Don't merge if both are standalone words
+                if word in STANDALONE_WORDS and next_word in STANDALONE_WORDS:
+                    result.append(word)
+                    i += 1
+                    continue
+                # Case 1: Single char fragment (safe to merge)
+                if len(next_word) == 1:
+                    result.append(word + next_word)
+                    i += 2
+                    continue
+                # Case 2: Overlap (last char of word == first char of next)
+                if len(word) >= 2 and len(next_word) >= 2 and word[-1] == next_word[0]:
+                    if not (word in STANDALONE_WORDS and next_word in STANDALONE_WORDS):
+                        result.append(word[:-1] + next_word)
+                        i += 2
+                        continue
+                # Case 3: Short fragments (2-4 chars + 1-2 chars)
+                if (2 <= len(word) <= 4 and
+                    1 <= len(next_word) <= 2 and
+                    3 <= len(word) + len(next_word) <= 7):
+                    if not (word in STANDALONE_WORDS and next_word in STANDALONE_WORDS):
+                        result.append(word + next_word)
+                        i += 2
+                        continue
+            result.append(word)
+            i += 1
+        return ' '.join(result)
+    # --- Main Pipelines ---
+    @staticmethod
+    def full_postprocess(text: str, original: str = "", vocab_manager=None) -> str:
+        """
+        Apply all post-processing steps (OPTIMIZED ORDER!)
+        vocab_manager: optional, enables smart ه/ة handling
+        """
+        # 1. Remove hallucinated prefixes
+        if original:
+            text = AraSpellPostProcessor.remove_hallucinated_prefix(text, original)
+        # 2. Basic normalization
+        text = AraSpellPostProcessor.normalize_special_chars(text)
+        # 3. Remove hallucinations
+        text = AraSpellPostProcessor.remove_hallucinations(text)
+        # 4. Collapse repetitions (UNIFIED!)
+        text = AraSpellPostProcessor.unified_collapse_repeated(text)
+        # 5. Fix Hamza (CONSERVATIVE!)
+        text = AraSpellPostProcessor.fix_hamza_conservative(text)
+        # 6. Fix Ta Marbuta (SMART MODE with vocab_manager!)
+        text = AraSpellPostProcessor.fix_ha_ta_marbuta(text, vocab_manager=vocab_manager)
+        # 7. Remove word repetition with 'و'
+        text = AraSpellPostProcessor.remove_word_repetition_with_wa(text)
+        # 8. Remove duplicate words
+        text = AraSpellPostProcessor.remove_duplicate_words(text)
+        # 9. Final space normalization
+        text = AraSpellPostProcessor.normalize_spaces(text)
+        return text
+# ─────────────────────────────────────────────────────────────────────────────
+# ERROR CLASSIFIER
+# ─────────────────────────────────────────────────────────────────────────────
+class ErrorClassifier:
+    """Classify type of spelling error"""
+    NON_ARABIC_KEYBOARD = set('پگچژکەڕڤڵڎےۀۃھیټډڼڑ')
+    @staticmethod
+    def has_char_substitution(text: str) -> bool:
+        return any(c in ErrorClassifier.NON_ARABIC_KEYBOARD for c in text)
+    @staticmethod
+    def has_char_repetition(text: str, threshold: int = 3) -> bool:
+        return bool(re.search(r"(.)\1{" + str(threshold - 1) + ",}", text))
+    @staticmethod
+    def has_word_merge(text: str, max_word_len: int = 8) -> bool:
+        words = text.split()
+        if any(len(w) > max_word_len for w in words):
+            return True
+        if len(words) == 1 and len(text) > 6:
+            return True
+        return False
+    @staticmethod
+    def classify(text: str) -> ErrorType:
+        """Classify the error type"""
+        has_rep = ErrorClassifier.has_char_repetition(text)
+        has_merge = ErrorClassifier.has_word_merge(text)
+        has_sub = ErrorClassifier.has_char_substitution(text)
+        error_count = sum([has_rep, has_merge, has_sub])
+        if error_count >= 2:
+            return ErrorType.MIXED
+        elif has_sub:
+            return ErrorType.CHAR_SUBSTITUTION
+        elif has_rep:
+            return ErrorType.CHAR_REPETITION
+        elif has_merge:
+            return ErrorType.WORD_MERGE
+        else:
+            return ErrorType.CLEAN
+# ═══════════════════════════════════════════════════════════════════════════════
+# RULES-BASED CORRECTOR
+# ═══════════════════════════════════════════════════════════════════════════════
+class RulesBasedCorrector:
+    """Rules-based correction with keyboard proximity mapping."""
+    # Persian/Urdu → Arabic mapping
+    SUBSTITUTION_MAP = {
+        'ک': 'ك', 'ی': 'ي', 'ے': 'ي',
+        'پ': 'ب', 'چ': 'ج', 'ژ': 'ز',
+        'گ': 'ك', 'ڤ': 'ف', 'ڵ': 'ل',
+        'ڕ': 'ر', 'ڎ': 'د', 'ڼ': 'ن',
+        'ټ': 'ت', 'ډ': 'د', 'ړ': 'ر',
+        'ۀ': 'ه', 'ۃ': 'ة', 'ھ': 'ه',
+        'ە': 'ه', 'ڑ': 'ر'
+    }
+    # EXPANDED: 16 prepositions instead of 2
+    PREPOSITIONS = {
+        'من', 'في', 'على', 'عن', 'مع', 'إلى', 'الى',
+        'حتى', 'منذ', 'خلال', 'بعد', 'قبل',
+        'ب', 'ل', 'ك',
+        'لل'
+    }
+    # Keyboard Proximity Mapping
+    # Arabic keyboard layout adjacency
+    KEYBOARD_NEIGHBORS = {
+        'ض': ['ص', 'ق'],
+        'ص': ['ض', 'ث', 'ق'],
+        'ث': ['ص', 'ق'],
+        'ق': ['ض', 'ص', 'ث', 'ف', 'غ'],
+        'ف': ['ق', 'غ', 'ع', 'ب'],
+        'غ': ['ق', 'ف', 'ع', 'ه'],
+        'ع': ['ف', 'غ', 'ه', 'خ'],
+        'ه': ['غ', 'ع', 'خ', 'ح'],
+        'خ': ['ع', 'ه', 'ح', 'ج'],
+        'ح': ['ه', 'خ', 'ج'],
+        'ج': ['خ', 'ح', 'د'],
+        'د': ['ج', 'ذ'],
+        'ذ': ['د'],
+        'ش': ['س', 'ي', 'ئ'],
+        'س': ['ش', 'ي', 'ب'],
+        'ي': ['ش', 'س', 'ب', 'ت'],
+        'ب': ['ي', 'س', 'ف', 'ل', 'ن'],
+        'ل': ['ب', 'ا', 'ن', 'م'],
+        'ا': ['ل', 'ت', 'م'],
+        'ت': ['ي', 'ا', 'ن'],
+        'ن': ['ب', 'ل', 'ت', 'م', 'ك'],
+        'م': ['ل', 'ا', 'ن', 'ك'],
+        'ك': ['ن', 'م', 'ط'],
+        'ط': ['ك', 'ظ'],
+        'ظ': ['ط'],
+        'ئ': ['ش', 'ء', 'ر'],
+        'ء': ['ئ', 'ؤ'],
+        'ؤ': ['ء', 'ر'],
+        'ر': ['ئ', 'ؤ', 'لا', 'ى', 'ز'],
+        'لا': ['ر', 'ى'],
+        'ى': ['ر', 'لا', 'ة', 'ز'],
+        'ة': ['ى', 'و', 'ز'],
+        'و': ['ة', 'ز'],
+        'ز': ['ر', 'ى', 'ة', 'و'],
+        # Alif variants
+        'أ': ['ا', 'إ', 'آ'],
+        'إ': ['ا', 'أ'],
+        'آ': ['ا', 'أ'],
+    }
+    @staticmethod
+    def is_keyboard_neighbor(char1: str, char2: str) -> bool:
+        """Check if two Arabic chars are adjacent on keyboard."""
+        neighbors = RulesBasedCorrector.KEYBOARD_NEIGHBORS.get(char1, [])
+        return char2 in neighbors
+    @staticmethod
+    def fix_char_substitution(text: str) -> str:
+        """Replace Persian/Urdu characters with Arabic"""
+        for old, new in RulesBasedCorrector.SUBSTITUTION_MAP.items():
+            text = text.replace(old, new)
+        return text
+    @staticmethod
+    def fix_char_repetition(text: str) -> str:
+        """Remove excessive character repetition (3+ consecutive → 1)."""
+        # Only collapse 3+ repetitions (not 2+)
+        text = re.sub(r'([^\d\s])\1{2,}', r'\1', text)
+        return text
+    @staticmethod
+    def advanced_heuristic_repair(text: str) -> str:
+        """
+        Apply aggressive heuristic repairs to generate a strong baseline candidate.
+        1. Unified Char Fixes (Persian/Urdu + Repetition)
+        2. Aggressive Word Splitting (Iterative & Anchored)
+        """
+        # 1. Base Fixes
+        text = RulesBasedCorrector.fix_char_substitution(text)
+        text = RulesBasedCorrector.fix_char_repetition(text)
+        # 2. Heuristic Split
+        words = text.split()
+        processed_words = []
+        for word in words:
+            processed_words.append(RulesBasedCorrector._recursive_split(word))
+        return ' '.join(processed_words)
+    @staticmethod
+    def _recursive_split(word: str) -> str:
+        """
+        Recursively split merged words (Anchored to Start)
+        Avoids splitting 'المنزل' -> 'ال من زل' (middle split)
+        """
+        if len(word) < 4:
+            return word
+        # 1. Separable Prepositions (Must be at START)
+        # "فيالبيت" -> "في البيت"
+        separables = sorted(['من', 'في', 'على', 'عن', 'مع', 'إلى', 'الى', 'حتى', 'منذ', 'خلال', 'بعد', 'قبل'], key=len, reverse=True)
+        for sep in separables:
+            # Check matches: exact match or prefix match
+            if word == sep:
+                return word
+            if word.startswith(sep):
+                remainder = word[len(sep):]
+                # Condition: Remainder must be substantial (usually starts with al- or len > 2)
+                if len(remainder) >= 3:
+                     # Recursive call on remainder
+                     return sep + " " + RulesBasedCorrector._recursive_split(remainder)
+        # 2. Common typo merges (e.g. "يا" + Name)
+        if word.startswith('يا') and len(word) > 4:
+             return 'يا ' + RulesBasedCorrector._recursive_split(word[2:])
+        # 3. Attached Particles (Only 'Wa' and 'Fa' are commonly mistakenly merged with non-al words in typos)
+        # "وال" -> "و ال" is usually correct in tokenization but "و" is attached in script.
+        # We only split if it looks like a HARD merge error.
+        return word
+# ═══════════════════════════════════════════════════════════════════════════════
+# OUTPUT VALIDATOR (Hallucination Prevention)
+# ═══════════════════════════════════════════════════════════════════════════════
+class OutputValidator:
+    """Validate model outputs to prevent hallucinations"""
+    @staticmethod
+    def calculate_edit_distance(s1: str, s2: str) -> int:
+        """Calculate Levenshtein distance"""
+        return Levenshtein.distance(s1, s2)
+    @staticmethod
+    def check_character_preservation(original: str, corrected: str) -> Tuple[bool, str]:
+        """Check if characters are mostly preserved (Jaccard similarity)"""
+        chars_original = set(original)
+        chars_corrected = set(corrected)
+        if not chars_original:
+            return True, "valid"
+        intersection = chars_original & chars_corrected
+        union = chars_original | chars_corrected
+        jaccard = len(intersection) / len(union) if union else 0
+        if jaccard < 0.35:
+            return False, "low_character_similarity"
+        return True, "valid"
+    @staticmethod
+    def check_word_count(original: str, corrected: str) -> Tuple[bool, str]:
+        """
+        Check if word count is reasonable
+        Relaxed: Allow splitting merged words (count can double)
+        """
+        len_orig = len(original.split())
+        len_corr = len(corrected.split())
+        # Allow expanding 1 word to up to 3 (e.g. "فيالمدرسة" -> "في المدرسة")
+        if len_orig == 1:
+            if len_corr <= 3:
+                return True, "valid"
+            # If original is very long, allow more splits (e.g. "هذاالولدذهبإلىالمدرسة")
+            if len(original) > 12 and len_corr <= 6:
+                return True, "valid"
+        # For sentences, stricter ratio
+        ratio = len_corr / len_orig if len_orig > 0 else 0
+        if ratio > 2.0 or ratio < 0.5:
+             return False, "word_count_mismatch"
+        return True, "valid"
+    def validate(self, original: str, corrected: str, error_type: str) -> Tuple[bool, str]:
+        """
+        Main validation logic
+        """
+        # 0. Sanity Check
+        if not corrected or not corrected.strip():
+            return False, "empty_output"
+        # Space Leniency: if ONLY difference is whitespace → accept
+        original_no_space = original.replace(' ', '').replace('\u200c', '')  # Also handle ZWNJ
+        corrected_no_space = corrected.replace(' ', '').replace('\u200c', '')
+        if original_no_space == corrected_no_space:
+            # Only whitespace changed - accept immediately
+            return True, "space_leniency_accept"
+        # 1. Length Ratio Check
+        len_orig = len(original)
+        len_corr = len(corrected)
+        # Allow expansion for word splitting
+        if len_corr > len_orig * 2.5:
+             return False, "too_long"
+        # Allow shrinking (but not typically more than 50% unless removing repetition)
+        if len_corr < len_orig * 0.5:
+             # Exception: if original had excessive repetition
+             if error_type == ErrorType.CHAR_REPETITION:
+                 pass
+             else:
+                 return False, "too_short"
+        # 2. Check Word Count
+        is_valid_count, reason = self.check_word_count(original, corrected)
+        if not is_valid_count:
+            return False, reason
+        # 3. Check Character Preservation
+        # Critical for avoiding hallucinations
+        is_valid_chars, reason = self.check_character_preservation(original, corrected)
+        if not is_valid_chars:
+             # Exception: If input was garbage/keyboard mash, preservation might be low.
+             # But for valid inputs, this prevents changing "كتاب" to "مكتبة" (if no roots match)
+             return False, reason
+        return True, "valid"
+# ═══════════════════════════════════════════════════════════════════════════════
+# VOCABULARY MANAGER
+# ═══════════════════════════════════════════════════════════════════════════════
+class VocabularyManager:
+    """
+    Centralized vocabulary management for OOV/IV detection.
+    Key for vocabulary-aware acceptance: OOV→IV = accept, IV→OOV = reject.
+    """
+    # Arabic character equivalence for normalization
+    HAMZA_VARIANTS = {'أ', 'إ', 'آ', 'ء', 'ؤ', 'ئ', 'ا'}
+    ALEF_NORMALIZED = 'ا'
+    TA_MARBUTA = 'ة'
+    HA = 'ه'
+    YA_VARIANTS = {'ي', 'ى'}
+    YA_NORMALIZED = 'ي'
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+        # Build vocabulary set from tokenizer (exclude subwords and short tokens)
+        self.vocab = {
+            w for w in tokenizer.get_vocab().keys()
+            if w.isalpha() and not w.startswith('##') and len(w) > 1
+        }
+        # Frequency rank: lower index = more common (usually)
+        self.vocab_rank = {w: i for w, i in tokenizer.get_vocab().items()}
+        # Build normalized vocabulary for fuzzy matching
+        self.normalized_vocab = {self.normalize_for_comparison(w): w for w in self.vocab}
+        logger.info(f"VocabularyManager initialized: {len(self.vocab)} words")
+    @classmethod
+    def normalize_for_comparison(cls, word: str) -> str:
+        """
+        Normalize Arabic word for comparison (hamza, ta marbuta, etc.)
+        Used for equivalence checking, not for final output.
+        """
+        result = []
+        for i, char in enumerate(word):
+            # Normalize Hamza variants to Alef
+            if char in cls.HAMZA_VARIANTS:
+                result.append(cls.ALEF_NORMALIZED)
+            # Normalize Ta Marbuta to Ha at word end
+            elif char == cls.TA_MARBUTA and i == len(word) - 1:
+                result.append(cls.HA)
+            # Normalize Ya variants
+            elif char in cls.YA_VARIANTS:
+                result.append(cls.YA_NORMALIZED)
+            else:
+                result.append(char)
+        return ''.join(result)
+    def is_iv(self, word: str) -> bool:
+        """Check if word is In-Vocabulary (known word)."""
+        clean = re.sub(r'[^\w]', '', word)
+        if not clean:
+            return True  # Empty/punctuation only = treat as valid
+        # Direct check
+        if clean in self.vocab:
+            return True
+        # Normalized check (handles hamza/ta marbuta variations)
+        normalized = self.normalize_for_comparison(clean)
+        if normalized in self.normalized_vocab:
+            return True
+        return False
+    def is_oov(self, word: str) -> bool:
+        """Check if word is Out-Of-Vocabulary (unknown word)."""
+        return not self.is_iv(word)
+    def get_frequency_rank(self, word: str) -> int:
+        """Get frequency rank (lower = more common). Returns 999999 for OOV."""
+        clean = re.sub(r'[^\w]', '', word)
+        return self.vocab_rank.get(clean, 999999)
+    def all_words_iv(self, text: str) -> bool:
+        """Check if ALL words in text are In-Vocabulary."""
+        words = text.split()
+        return all(self.is_iv(w) for w in words)
+    def count_oov_words(self, text: str) -> int:
+        """Count number of OOV words in text."""
+        words = text.split()
+        return sum(1 for w in words if self.is_oov(w))
+    def get_oov_words(self, text: str) -> List[str]:
+        """Get list of OOV words in text."""
+        words = text.split()
+        return [w for w in words if self.is_oov(w)]
+    def words_are_equivalent(self, word1: str, word2: str) -> bool:
+        """
+        Check if two words are equivalent (considering Arabic character variations).
+        Useful for accepting corrections that only differ in hamza/ta marbuta.
+        """
+        norm1 = self.normalize_for_comparison(word1)
+        norm2 = self.normalize_for_comparison(word2)
+        return norm1 == norm2
+    @staticmethod
+    def damerau_levenshtein_distance(s1: str, s2: str) -> int:
+        """
+        Calculate Damerau-Levenshtein distance (transpositions count as 1 edit).
+        This is better for Arabic typos like اقصتاديا→اقتصاديا (swap صت→تص).
+        """
+        return jellyfish.damerau_levenshtein_distance(s1, s2)
+    def calculate_similarity(self, original: str, corrected: str) -> float:
+        """
+        Calculate similarity score using Damerau-Levenshtein distance.
+        Returns value between 0 and 1 (1 = identical).
+        """
+        dist = self.damerau_levenshtein_distance(original, corrected)
+        max_len = max(len(original), len(corrected), 1)
+        return 1.0 - (dist / max_len)
+# ═══════════════════════════════════════════════════════════════════════════════
+# WORD ALIGNER
+# ═══════════════════════════════════════════════════════════════════════════════
+class WordAligner:
+    """
+    Aligns input and output words to create hybrid corrections.
+    Helps when model fixes one word but breaks another (Raw Wins/Both Wrong cause).
+    """
+    def __init__(self, vocab_manager):
+        """Initialize with VocabularyManager for IV checks."""
+        self.vocab = vocab_manager
+    def align_words(self, input_text: str, output_text: str) -> str:
+        """
+        Create hybrid by selecting best word from each position.
+        Uses simple space-based alignment (works for most Arabic cases).
+        """
+        input_words = input_text.split()
+        output_words = output_text.split()
+        # If lengths differ significantly, alignment is risky -> fallback to output
+        if abs(len(input_words) - len(output_words)) > 2:
+            input_oov = self.vocab.count_oov_words(input_text)
+            output_oov = self.vocab.count_oov_words(output_text)
+            return output_text if output_oov < input_oov else input_text
+        result = []
+        # Simple position-based alignment (min length)
+        min_len = min(len(input_words), len(output_words))
+        for i in range(min_len):
+            in_word = input_words[i]
+            out_word = output_words[i]
+            best_word = self._select_best_word(in_word, out_word)
+            result.append(best_word)
+        # Append remaining words from the longer sequence
+        if len(output_words) > min_len:
+            result.extend(output_words[min_len:])
+        elif len(input_words) > min_len:
+            # If input is longer, verify if trailing words are IV
+            # If trailing input words are OOV, maybe model was right to remove them?
+            # Safest is to keep them if they are IV, else drop.
+            for w in input_words[min_len:]:
+                 if self.vocab.is_iv(w):
+                     result.append(w)
+        return ' '.join(result)
+    def _select_best_word(self, input_word: str, output_word: str) -> str:
+        """
+        Select best word between input and output version.
+        Logic:
+        1. Input OOV + Output IV → Take Output (Model fixed it)
+        2. Input IV + Output OOV → Keep Input (Model broke it)
+        3. Input IV + Output IV → Keep Input (Conservative) unless Output is much better?
+           - For now, strict conservative: if input is valid, keep it.
+        4. Both OOV → Take Output (Model likely closer)
+        """
+        if input_word == output_word:
+            return input_word
+        in_iv = self.vocab.is_iv(input_word)
+        out_iv = self.vocab.is_iv(output_word)
+        # Case 1: Correction worked (OOV -> IV)
+        if not in_iv and out_iv:
+            return output_word
+        # Case 2: Correction broke it (IV -> OOV)
+        if in_iv and not out_iv:
+            return input_word
+        # Case 3: Both IV (Semantic change or split/merge)
+        # Conservative: Keep input to avoid semantic drift (Contextual errors are rare compared to typos)
+        if in_iv and out_iv:
+            return input_word
+        # Case 4: Both OOV
+        # Subword-level correction
+        # If words are similar length, try character-level blending to find IV
+        if len(input_word) == len(output_word) and len(input_word) >= 3:
+            # Try replacing one char at a time from output into input
+            for i in range(len(input_word)):
+                if input_word[i] != output_word[i]:
+                    # Try input with this one char from output
+                    hybrid = input_word[:i] + output_word[i] + input_word[i+1:]
+                    if self.vocab.is_iv(hybrid):
+                        return hybrid
+                    # Try output with this one char from input
+                    hybrid2 = output_word[:i] + input_word[i] + output_word[i+1:]
+                    if self.vocab.is_iv(hybrid2):
+                        return hybrid2
+        # Default: Take output, usually closer to target even if still OOV
+        return output_word
+# ═══════════════════════════════════════════════════════════════════════════════
+# SPLIT/MERGE SPECIALIST
+# ═══════════════════════════════════════════════════════════════════════════════
+class SplitMergeSpecialist:
+    """
+    Handles word splitting and merging with vocabulary validation.
+    Key patterns:
+    1. SPLIT: OOV word that can be split into two IV words
+       - فيالغالب → في الغالب
+       - يقعبجماعة → يقع بجماعة
+    2. MERGE: Adjacent OOV fragments that can merge to IV
+       - السوري ة → السورية (ta-marbuta attachment)
+       - ال كتاب → الكتاب
+    """
+    # Common Arabic prefixes that can be detached
+    SEPARABLE_PREFIXES = [
+        # Prepositions (longer first for greedy matching)
+        'من', 'في', 'على', 'عن', 'مع', 'إلى', 'الى', 'حتى', 'منذ', 'خلال',
+        'بعد', 'قبل', 'بين', 'حول', 'تحت', 'فوق', 'أمام', 'وراء', 'دون',
+        # Particles
+        'أن', 'لن', 'لم', 'قد', 'سوف', 'كي', 'إذا', 'لو', 'مثل', 'غير',
+        # Call particle
+        'يا',
+    ]
+    # Protected short words that shouldn't be split
+    PROTECTED_WORDS = {
+        'في', 'من', 'على', 'عن', 'مع', 'إلى', 'الى', 'ان', 'أن', 'لا', 'ما', 'هو', 'هي',
+        'لم', 'لن', 'قد', 'كل', 'كان', 'ذلك', 'هذا', 'هذه', 'التي', 'الذي', 'بين',
+    }
+    def __init__(self, vocab_manager):
+        """Initialize with VocabularyManager for IV checks."""
+        self.vocab = vocab_manager
+        self.separable_prefixes = sorted(
+            self.SEPARABLE_PREFIXES, key=len, reverse=True
+        )
+    # Attached prefix patterns that should NOT be split (normal Arabic word formations)
+    ATTACHED_PREFIXES = [
+        'وال', 'بال', 'فال', 'كال', 'لل',   # Conjunction/Preposition + Article
+        'وب', 'وف', 'ول', 'وك', 'وم', 'ون',  # Conjunction + Preposition
+        'فب', 'فل', 'فك', 'فم',              # Conjunction + Preposition
+    ]
+    def split_word(self, word: str) -> str:
+        """
+        Try to split an OOV word into IV components.
+        Strict Strategy:
+        - Only split when BOTH parts are IV
+        - Protect attached prefix patterns (وال، بال، etc.)
+        - Minimum part lengths to prevent micro-splits
+        """
+        # Short words: don't split (increased from 4 to 5 for safety)
+        if len(word) < 5:
+            return word
+        # Already IV: no need to split
+        if self.vocab.is_iv(word):
+            return word
+        # Protected words: don't split
+        if word in self.PROTECTED_WORDS:
+            return word
+        # Protected prefix patterns (وال، بال، فال، etc.)
+        # These are normal Arabic word formations, NOT merge errors
+        for prefix in self.ATTACHED_PREFIXES:
+            if word.startswith(prefix):
+                remainder = word[len(prefix):]
+                # If the remainder (without the prefix) is IV, this is a valid prefixed word
+                if self.vocab.is_iv(remainder):
+                    return word  # Don't split — it's prefix+valid_word
+                # Also check with article: e.g. والخصوصي → وال+خصوصي, check خصوصي
+                if prefix.endswith('ال') and self.vocab.is_iv(remainder):
+                    return word
+        # 1. Try separable prefixes first (higher priority)
+        for prefix in self.separable_prefixes:
+            if word.startswith(prefix) and len(word) > len(prefix) + 2:  # Remainder must be > 2 chars
+                remainder = word[len(prefix):]
+                # Only accept if remainder is IV
+                if self.vocab.is_iv(remainder):
+                    return f"{prefix} {remainder}"
+        # 2. Try all positions - STRICT: BOTH parts must be IV AND both >= 3 chars
+        for i in range(3, len(word) - 2):  # Both parts at least 3 chars
+            left = word[:i]
+            right = word[i:]
+            if self.vocab.is_iv(left) and self.vocab.is_iv(right):
+                return f"{left} {right}"
+        # No valid split found
+        return word
+    # Common Arabic pronoun/possessive suffixes (2-3 chars)
+    # These are often incorrectly split from their host word
+    PRONOUN_SUFFIXES = {'كم', 'هم', 'ها', 'هن', 'كن', 'نا', 'هما', 'كما', 'تم', 'تن'}
+    def merge_fragments(self, text: str) -> str:
+        """
+        Try to merge adjacent OOV fragments into IV words.
+        Key patterns:
+        1. Ta-marbuta detachment: السوري ة → السورية
+        2. Al- detachment: ال كتاب → الكتاب
+        3. General OOV+OOV merging: Only if both are OOV and result is IV
+        4. Short OOV fragment: 1-2 char OOV + next → IV
+        5. Pronoun suffix reattachment: علي كم → عليكم
+        """
+        words = text.split()
+        if len(words) < 2:
+            return text
+        result = []
+        i = 0
+        while i < len(words):
+            word = words[i]
+            # Try to merge with next word
+            if i + 1 < len(words):
+                next_word = words[i + 1]
+                merged = word + next_word
+                # Pattern 1: Detached suffix (ة، ه، ي، ك...)
+                # Allow merging even if 'word' is IV because detached suffix is definitely wrong
+                if len(next_word) == 1 and next_word in 'ةهاي':
+                    if self.vocab.is_iv(merged):
+                        result.append(merged)
+                        i += 2
+                        continue
+                # Pattern 2: Detached 'Al-' prefix
+                # ال كتاب → الكتاب (Safe to merge)
+                if word == 'ال' and len(next_word) >= 2:
+                    if self.vocab.is_iv(merged):
+                        result.append(merged)
+                        i += 2
+                        continue
+                # Pattern 3: General OOV + OOV → IV
+                # STRICT: Both must be OOV to avoid merging valid words
+                if self.vocab.is_oov(word) and self.vocab.is_oov(next_word):
+                    if self.vocab.is_iv(merged):
+                        result.append(merged)
+                        i += 2
+                        continue
+                # Pattern 4: Short OOV fragment (1-2 chars) merge
+                if len(word) <= 2 and self.vocab.is_oov(word):
+                    if self.vocab.is_iv(merged):
+                        result.append(merged)
+                        i += 2
+                        continue
+                # Pattern 5: Pronoun suffix reattachment
+                # Fixes over-splitting: علي كم → عليكم
+                if next_word in self.PRONOUN_SUFFIXES:
+                    if self.vocab.is_iv(merged) and not self.vocab.is_iv(word):
+                        result.append(merged)
+                        i += 2
+                        continue
+                # Pattern 6: Short fragment merge
+                # Merges two short words when combined they form a valid longer word
+                # Fixes: علي كم → عليكم, ويت أمل → ويتأمل, المد فتر → المدفتر
+                # Condition: both words ≤ 3 chars, merged ≥ 5 chars and IV
+                if len(word) <= 3 and len(next_word) <= 3:
+                    if len(merged) >= 5 and self.vocab.is_iv(merged):
+                        result.append(merged)
+                        i += 2
+                        continue
+            result.append(word)
+            i += 1
+        return ' '.join(result)
+    def process_text(self, text: str) -> str:
+        """
+        Apply full split/merge processing to text.
+        Order: First merge, then split.
+        """
+        # Step 1: Merge fragments
+        text = self.merge_fragments(text)
+        # Step 2: Split OOV words
+        words = text.split()
+        processed = []
+        for word in words:
+            if self.vocab.is_oov(word) and len(word) >= 4:
+                split_result = self.split_word(word)
+                processed.append(split_result)
+            else:
+                processed.append(word)
+        return ' '.join(processed)
+# ═══════════════════════════════════════════════════════════════════════════════
+# EDIT DISTANCE CORRECTOR
+# ═══════════════════════════════════════════════════════════════════════════════
+class EditDistanceCorrector:
+    """
+    Generates candidates based on Levenshtein distance.
+    Uses BERT Vocabulary to filter for valid words.
+    """
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+        # Build strict vocabulary (ignore subwords starting with ## and punctuation)
+        self.vocab = {
+            w for w in tokenizer.get_vocab().keys()
+            if w.isalpha() and not w.startswith('##') and len(w) > 1
+        }
+        # Frequency rank heuristic: lower index = higher frequency (usually)
+        self.vocab_rank = {w: i for w, i in tokenizer.get_vocab().items()}
+    def edits1(self, word):
+        """All edits that are one edit away from `word`."""
+        letters    = 'أابتثجحخدذرزسشصضطظعغفقكلمنهويءآىةئؤ' # Arabic chars
+        splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
+        deletes    = [L + R[1:]               for L, R in splits if R]
+        transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
+        replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
+        inserts    = [L + c + R               for L, R in splits for c in letters]
+        return set(deletes + transposes + replaces + inserts)
+    def edits2(self, word):
+        """All edits that are two edits away from `word`."""
+        return (e2 for e1 in self.edits1(word) for e2 in self.edits1(e1))
+    def known(self, words):
+        """The subset of `words` that appear in the dictionary of known words."""
+        return set(w for w in words if w in self.vocab)
+    def generate_candidate(self, text: str) -> str:
+        """
+        Generate a candidate sentence by fixing OOV words using Edit Distance.
+        """
+        words = text.split()
+        corrected_words = []
+        for word in words:
+            # Clean word for checking
+            clean_word = re.sub(r'[^\w]', '', word)
+            # If word is known, keep it
+            if clean_word in self.vocab:
+                corrected_words.append(word)
+                continue
+            # If OOV, try to find neighbor
+            # 1. Edits 1
+            candidates = self.known(self.edits1(clean_word))
+            # 2. Edits 2 (if no Edits 1)
+            if not candidates:
+                # Optimize: Only check edits2 if word length is reasonable
+                if len(clean_word) < 7:
+                    candidates = self.known(self.edits2(clean_word))
+            if candidates:
+                # Pick best candidate: Lowest vocab rank (most frequent)
+                best_candidate = min(candidates, key=lambda w: self.vocab_rank.get(w, 999999))
+                corrected_words.append(best_candidate)
+            else:
+                # No correction found, keep original
+                corrected_words.append(word)
+        return ' '.join(corrected_words)
+# ═══════════════════════════════════════════════════════════════════════════════
+# CONTEXTUAL CORRECTOR (MLM-based with Batch Scoring)
+# ════════════════════════���══════════════════════════════════════════════════════
+class ContextualCorrector:
+    """MLM-based contextual correction for confusion pairs"""
+    # Common confusion pairs in Arabic
+    CONFUSION_PAIRS = [
+        ('ض', 'ظ'), ('ذ', 'ز'), ('ث', 'س'), ('ص', 'س'),
+        ('ط', 'ت'), ('ق', 'ك'), ('ه', 'ة'), ('ا', 'ى'),
+        ('ت', 'د'), ('د', 'ض'), ('ك', 'ق'), ('غ', 'ق'),
+        ('ج', 'ش'), ('س', 'ز'), ('ف', 'ب'), ('و', 'و'), # (و, و) placeholder, maybe (و, ؤ)?
+        ('ؤ', 'و'), ('ئ', 'ي'), ('ء', 'أ'), ('إ', 'أ'),
+    ]
+    def __init__(self, model_name: str = 'aubmindlab/bert-base-arabertv02', cache_size: int = 10000):
+        """Initialize with BERT MLM model and LRU cache"""
+        from transformers import AutoTokenizer, AutoModelForMaskedLM
+        from functools import lru_cache
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForMaskedLM.from_pretrained(model_name)
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        # Build confusion map
+        self.confusion_map = self._build_confusion_map()
+        # Stats
+        self.cache_hits = 0
+        self.cache_misses = 0
+        # Create LRU cache for scoring
+        self._score_cache = {}
+        self.cache_size = cache_size
+        # Load vocabulary for filtering
+        self.vocab = self.tokenizer.get_vocab()
+    def _build_confusion_map(self):
+        """Build bidirectional confusion map"""
+        confusion_map = {}
+        for char1, char2 in self.CONFUSION_PAIRS:
+            if char1 not in confusion_map:
+                confusion_map[char1] = []
+            if char2 not in confusion_map:
+                confusion_map[char2] = []
+            confusion_map[char1].append(char2)
+            confusion_map[char2].append(char1)
+        return confusion_map
+    def get_confusable_chars(self, char: str) -> List[str]:
+        """Get confusable characters for a given char"""
+        return self.confusion_map.get(char, [])
+    def generate_candidates(self, word: str) -> List[str]:
+        """Generate candidate corrections for a word"""
+        candidates = [word]
+        # 1. Substitute confusable chars
+        for i, char in enumerate(word):
+            confusables = self.get_confusable_chars(char)
+            for conf_char in confusables:
+                candidate = word[:i] + conf_char + word[i+1:]
+                if candidate not in candidates:
+                    candidates.append(candidate)
+        # 2. Remove repeated characters (deletion)
+        # Fixes: مدررسة -> مدرسة, جميلل -> جميل
+        for i in range(len(word) - 1):
+            if word[i] == word[i+1]:
+                # Remove one instance of the repeated char
+                candidate = word[:i] + word[i+1:]
+                if candidate not in candidates:
+                    candidates.append(candidate)
+        # 3. Edit Distance 1 Candidates (Insertions, Substitutions, Transpositions)
+        # Using a restricted set of characters to avoid explosion
+        COMMON_CHARS = 'ابتثجحخدذرزسشصضطظعغفقكلمنهويأإآءئؤةى'
+        # Filter candidates by vocabulary to prevent hallucinations and scoring errors
+        # Only keep candidates that are valid single tokens in the vocabulary.
+        # Insertions (missing char)
+        for i in range(len(word) + 1):
+            for char in COMMON_CHARS:
+                candidate = word[:i] + char + word[i:]
+                if candidate in self.vocab and candidate not in candidates:
+                    candidates.append(candidate)
+        # Substitutions (wrong char)
+        if len(word) < 7:
+            for i in range(len(word)):
+                for char in COMMON_CHARS:
+                    if char != word[i]:
+                        candidate = word[:i] + char + word[i+1:]
+                        if candidate in self.vocab and candidate not in candidates:
+                            candidates.append(candidate)
+        # Deletions (extra char) - General
+        for i in range(len(word)):
+            candidate = word[:i] + word[i+1:]
+            if len(candidate) > 1:
+                # For deletions, candidate might be a valid word even if not in vocab?
+                # But to be safe and consistent with scoring, let's enforce vocab.
+                # (Note: 'جميل' IS in vocab, so it works).
+                if candidate in self.vocab and candidate not in candidates:
+                    candidates.append(candidate)
+        return candidates
+    def score_with_mlm(self, text: str, position: int, word: str) -> float:
+        """Score a word in context using BERT MLM"""
+        # Check cache
+        cache_key = f"{text}|{position}|{word}"
+        if cache_key in self._score_cache:
+            self.cache_hits += 1
+            return self._score_cache[cache_key]
+        self.cache_misses += 1
+        # Create masked text
+        words = text.split()
+        if position >= len(words):
+            return 0.0
+        masked_words = words.copy()
+        masked_words[position] = '[MASK]'
+        masked_text = ' '.join(masked_words)
+        # Tokenize
+        inputs = self.tokenizer(masked_text, return_tensors='pt', padding=True, truncation=True)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Get predictions
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            predictions = outputs.logits
+        # Find mask position
+        mask_token_index = (inputs['input_ids'] == self.tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
+        if len(mask_token_index) == 0:
+            return 0.0
+        # Get probabilities for the word
+        mask_token_logits = predictions[0, mask_token_index[0], :]
+        probs = torch.softmax(mask_token_logits, dim=0)
+        # Get word token id
+        word_tokens = self.tokenizer.encode(word, add_special_tokens=False)
+        if not word_tokens:
+            return 0.0
+        word_token_id = word_tokens[0]
+        score = probs[word_token_id].item()
+        # Update cache (with size limit)
+        if len(self._score_cache) >= self.cache_size:
+            # Remove oldest entry (simple FIFO)
+            self._score_cache.pop(next(iter(self._score_cache)))
+        self._score_cache[cache_key] = score
+        return score
+    def score_candidates_batch(self, text: str, position: int, candidates: List[str]) -> dict:
+        """
+        Batch score multiple candidates (NEW - more efficient!)
+        Returns: {candidate: score}
+        """
+        scores = {}
+        for candidate in candidates:
+            scores[candidate] = self.score_with_mlm(text, position, candidate)
+        return scores
+    def predict_masked_token(self, text: str, position: int, top_k: int = 5) -> List[Tuple[str, float]]:
+        """Predict words for a masked position. Returns list of (word, score)."""
+        words = text.split()
+        if position >= len(words):
+            return []
+        masked_words = words.copy()
+        masked_words[position] = '[MASK]'
+        masked_text = ' '.join(masked_words)
+        inputs = self.tokenizer(masked_text, return_tensors='pt', padding=True, truncation=True).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            predictions = outputs.logits
+        mask_token_index = (inputs['input_ids'] == self.tokenizer.mask_token_id).nonzero(as_tuple=True)[1]
+        if len(mask_token_index) == 0:
+            return []
+        mask_token_logits = predictions[0, mask_token_index[0], :]
+        probs = torch.softmax(mask_token_logits, dim=0)
+        top_k_weights, top_k_indices = torch.topk(probs, top_k, sorted=True)
+        results = []
+        for i in range(top_k):
+            token_id = top_k_indices[i].item()
+            score = top_k_weights[i].item()
+            token = self.tokenizer.decode([token_id]).strip()
+            if not token.startswith("##") and token not in self.tokenizer.all_special_tokens:
+                results.append((token, score))
+        return results
+    def refine_sentence_with_mask(self, text: str, threshold: float = 0.001, vocab_manager=None, raw_model_output=None) -> str:
+        """Refine sentence by masking weak words and predicting replacements.
+        IV-Safe + Strict similarity + BERT Kill Switch.
+        """
+        words = text.split()
+        refined_words = words.copy()
+        # Build set of raw model words for kill switch
+        raw_words = raw_model_output.split() if raw_model_output else []
+        for i, word in enumerate(words):
+            # IV-Safe check - NEVER replace IV words
+            if vocab_manager and vocab_manager.is_iv(word):
+                continue
+            # BERT Kill Switch: skip words matching raw model output
+            if i < len(raw_words) and word == raw_words[i]:
+                continue
+            # Skip very short words (prepositions etc)
+            if len(word) <= 2:
+                continue
+            # 1. Check confidence
+            current_score = self.score_with_mlm(text, i, word)
+            if current_score > threshold:
+                continue
+            # 2. Mask and Predict
+            predictions = self.predict_masked_token(text, i, top_k=10)
+            # 3. Filter and Select (strict)
+            for pred_word, pred_score in predictions:
+                if pred_word == word:
+                    continue
+                if abs(len(pred_word) - len(word)) > 1:
+                     continue
+                # Similarity Check (0.90 minimum)
+                dist = Levenshtein.distance(word, pred_word)
+                max_len = max(len(word), len(pred_word))
+                similarity = 1.0 - (dist / max_len)
+                if similarity < 0.90:
+                    continue
+                # Must be IV
+                if vocab_manager and vocab_manager.is_oov(pred_word):
+                    continue
+                # Minimum absolute confidence gate (12%)
+                if pred_score < 0.12:
+                    continue
+                # Score Improvement
+                is_original_common = current_score > 0.001
+                if is_original_common:
+                     if pred_score > current_score * 1000:
+                         refined_words[i] = pred_word
+                         break
+                else:
+                    if pred_score > current_score * 50 and pred_score > 0.2:
+                        refined_words[i] = pred_word
+                        break
+        return ' '.join(refined_words)
+    def calculate_sentence_score(self, text: str) -> float:
+        """Calculate fluency score using BERT MLM average word probability."""
+        words = text.split()
+        if not words:
+            return 0.0
+        total_score = 0.0
+        scored_words = 0
+        for i, word in enumerate(words):
+            score = self.score_with_mlm(text, i, word)
+            total_score += score
+            scored_words += 1
+        if scored_words == 0:
+            return 0.0
+        return total_score / scored_words
+# ═══════════════════════════════════════════════════════════════════════════════
+# MAIN SPELL CHECKER CLASS
+# ═══════════════════════════════════════════════════════════════════════════════
+class ArabicSpellChecker:
+    """Main Arabic Spell Checker class"""
+    def __init__(self, model, tokenizer, device, use_contextual: bool = True):
+        """Initialize spell checker with model and components"""
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = device
+        # Initialize components
+        self.postprocessor = AraSpellPostProcessor()
+        self.classifier = ErrorClassifier()
+        self.rules = RulesBasedCorrector()
+        self.validator = OutputValidator()
+        self.vocab_manager = VocabularyManager(tokenizer)
+        self.edit_corrector = EditDistanceCorrector(tokenizer)  # Edit Distance candidates
+        self.split_merge = SplitMergeSpecialist(self.vocab_manager)
+        # WordAligner for word-level hybrid corrections
+        self.word_aligner = WordAligner(self.vocab_manager)
+        # Initialize contextual corrector (optional)
+        self.use_contextual = use_contextual
+        if use_contextual:
+            try:
+                self.contextual = ContextualCorrector()
+                logger.info("Contextual correction enabled")
+            except Exception as e:
+                logger.warning(f"Contextual correction disabled: {e}")
+                self.contextual = None
+                self.use_contextual = False
+        else:
+            self.contextual = None
+    def _fix_repeated_end_chars(self, text: str) -> str:
+        """
+        🆕 Fix repeated characters at word endings
+        Examples:
+            اليومم → اليوم
+            جميلل → جميل
+            صباحح → صباح
+        """
+        # Remove repeated chars at word end (keep only one)
+        text = re.sub(r'([ا-ي])\1+\b', r'\1', text)
+        return text
+    def _fix_merged_with_errors(self, text: str) -> str:
+        """ Fix merged words that contain errors
+        Examples:
+            الممدرسة → المدرسة
+            الكتابب → الكتاب
+            الططالب → الطالب
+        """
+        # Pattern 1: ال + repeated char + word
+        text = re.sub(r'ال([ا-ي])\1+([ا-ي]{2,})', r'ال\2', text)
+        # Pattern 2: word + repeated char at end
+        text = re.sub(r'\b([ا-ي]{3,})([ا-ي])\2+\b', r'\1\2', text)
+        return text
+    def _split_merged_words_linguistic(self, text: str) -> str:
+        """ Split merged words using linguistic patterns
+        Examples:
+            كلصباح → كل صباح
+            فيالطريق → في الطريق
+            السلامعليكم → السلام عليكم
+        """
+        # Pattern 1: Prepositions + (article)? + word
+        # Added: ك (like in كالكتاب) but careful not to split overlapping words
+        text = re.sub(
+            r'\b(في|من|إلى|الى|حتى|منذ|خلال|بعد|قبل)(ال)?([ا-ي]{3,})',
+            r'\1 \2\3',
+            text
+        )
+        # Pattern 2: كل + word
+        text = re.sub(r'\b(كل)([ا-ي]{3,})', r'\1 \2', text)
+        # Pattern 3: Article repetition
+        text = re.sub(r'([ا-ي]{3,})(ال)([ا-ي]{3,})', r'\1 \2\3', text)
+        # Pattern 4: Single-letter prepositions
+        text = re.sub(r'\b([بلك])(ال)?([ا-ي]{3,})', r'\1 \2\3', text)
+        # Pattern 5: Word + عليكم/عليك
+        text = re.sub(r'([ا-ي]{4,})(عليكم|عليك|عليه|عليها)', r'\1 \2', text)
+        # Pattern 6: على/عن in middle of (merged) words
+        text = re.sub(r'([ا-ي]{3,})(على|عن)([ا-ي]{3,})', r'\1 \2 \3', text)
+        return text
+    def _split_long_words_heuristic(self, text: str, max_length: int = 15) -> str:
+        """ Split suspiciously long words using heuristics
+        """
+        words = text.split()
+        result = []
+        for word in words:
+            if len(word) <= max_length:
+                result.append(word)
+                continue
+            # Check for embedded article
+            if 'ال' in word[2:]:
+                parts = word.split('ال', 1)
+                if len(parts[0]) >= 2 and len(parts[1]) >= 3:
+                    result.extend([parts[0], 'ال' + parts[1]])
+                    continue
+            # Check for common prefixes at start of long word
+            if len(word) >= 8:
+                split_found = False
+                for split_pos in [2, 3]:
+                    prefix = word[:split_pos]
+                    suffix = word[split_pos:]
+                    if prefix in ['في', 'من', 'على', 'عن', 'مع', 'كل', 'ب', 'ل', 'ك']:
+                        result.extend([prefix, suffix])
+                        split_found = True
+                        break
+                if not split_found:
+                    result.append(word)
+            else:
+                result.append(word)
+        return ' '.join(result)
+    def _normalize_tanween_patterns(self, text: str) -> str:
+        """ Normalize tanween patterns
+        Examples:
+            جدأ → جداً
+            كثيرأ → كثيراً
+        """
+        # أ at word end → اً
+        text = re.sub(r'([ا-ي]{2,})أ\b', r'\1اً', text)
+        # Remove standalone أ
+        text = re.sub(r'\s+أ\s+', ' ', text)
+        # Fix accidental splits (e.g. ب + space + word)
+        text = re.sub(r'\b([بلك])\s+([ا-ي])', r'\1\2', text)
+        return text
+    def preprocess(self, text: str) -> str:
+        """Preprocessing pipeline (مع التحسينات المدمجة)"""
+        # Basic normalization
+        text = self.postprocessor.remove_harakat(text)
+        text = self.postprocessor.remove_tatweel(text)
+        text = self.postprocessor.normalize_special_chars(text)
+        # Integrated improvements
+        # Fix repeated chars and merged words with errors FIRST
+        text = self._fix_repeated_end_chars(text)
+        text = self._fix_merged_with_errors(text)
+        # Then split merged words
+        text = self._split_merged_words_linguistic(text)
+        text = self._split_long_words_heuristic(text)
+        text = self._normalize_tanween_patterns(text)
+        # Merge separated 'ال'
+        text = self.postprocessor.merge_separated_al(text)
+        # Collapse repetitions
+        text = self.postprocessor.unified_collapse_repeated(text)
+        # Rules-based fixes
+        text = self.rules.fix_char_substitution(text)
+        text = self.rules.fix_char_repetition(text)
+        # Normalize spaces
+        text = self.postprocessor.normalize_spaces(text)
+        return text
+    def postprocess(self, text: str, original: str = "") -> str:
+        """Postprocessing pipeline — passes vocab_manager for smart ه/ة handling"""
+        return self.postprocessor.full_postprocess(text, original, vocab_manager=self.vocab_manager)
+    def model_inference(self, text: str, num_return_sequences: int = 5) -> List[str]:
+        """Run seq2seq model inference and return top candidates.
+        Also extracts beam scores (token-level probabilities) for diagnostics.
+        """
+        # Tokenize
+        inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=128)
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Generate with beam search
+        # Keeping 5 beams as model was trained/optimized for this
+        # Keeping 5 beams as model was trained/optimized for this
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                num_beams=5,
+                num_return_sequences=num_return_sequences,
+                early_stopping=True,
+                return_dict_in_generate=True,
+                output_scores=True
+            )
+        # Decode
+        candidates = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=True)
+        # Store beam scores for potential use
+        self._last_beam_scores = {}
+        if hasattr(outputs, 'sequences_scores') and outputs.sequences_scores is not None:
+            scores = outputs.sequences_scores.tolist()
+            for cand, score in zip(candidates, scores):
+                self._last_beam_scores[cand] = score
+        return candidates
+    def correct(self, text: str) -> str:
+        """
+        Main correction pipeline (RERANKING APPROACH)
+        Steps:
+        1. Preprocess
+        2. Generate Candidates (Model Beams + Baseline)
+        3. Rerank Candidates (Validator + Fluency)
+        4. Select Best
+        5. Postprocess
+        """
+        if not text or not text.strip():
+            return text
+        original = text
+        # 1. Preprocess
+        # This provides a strong baseline candidate
+        preprocessed_text = self.preprocess(text)
+        # 2. Classify error type
+        error_type = self.classifier.classify(preprocessed_text)
+        # 3. Generate Candidates
+        candidates = []
+        # A. Baseline (Preprocessed)
+        candidates.append(preprocessed_text)
+        # B. Smart Rules Candidate (Aggressive Heuristic)
+        rules_candidate = self.rules.advanced_heuristic_repair(text)
+        candidates.append(rules_candidate)
+        # B2. Edit Distance Candidate
+        edit_candidate = self.edit_corrector.generate_candidate(text)
+        if edit_candidate != text and edit_candidate != rules_candidate:
+            candidates.append(edit_candidate)
+        # C. Model Beams
+        raw_model_output = None  # Track for safety net
+        try:
+            model_candidates = self.model_inference(preprocessed_text, num_return_sequences=5)
+            raw_model_output = model_candidates[0] if model_candidates else None
+            candidates.extend(model_candidates)
+            # D. Word-Aligned Hybrid Candidate
+            # Creates a hybrid by selecting best word from each position
+            if model_candidates:
+                hybrid_candidate = self.word_aligner.align_words(preprocessed_text, model_candidates[0])
+                if hybrid_candidate not in candidates:
+                    candidates.append(hybrid_candidate)
+                # E. Word-Aligned with ALL top beams (not just beam 0)
+                for beam in model_candidates[1:3]:  # Top 3 beams
+                    hybrid_beam = self.word_aligner.align_words(preprocessed_text, beam)
+                    if hybrid_beam not in candidates:
+                        candidates.append(hybrid_beam)
+            # D2. Token-level Voting Candidate
+            # Majority-vote each token across all beams
+            if model_candidates and len(model_candidates) >= 3:
+                try:
+                    beam_word_lists = [c.split() for c in model_candidates]
+                    max_words = max(len(wl) for wl in beam_word_lists)
+                    voted_words = []
+                    for pos in range(max_words):
+                        words_at_pos = []
+                        for wl in beam_word_lists:
+                            if pos < len(wl):
+                                words_at_pos.append(wl[pos])
+                        if words_at_pos:
+                            most_common = Counter(words_at_pos).most_common(1)[0][0]
+                            voted_words.append(most_common)
+                    voted_candidate = ' '.join(voted_words)
+                    if voted_candidate not in candidates:
+                        candidates.append(voted_candidate)
+                except Exception:
+                    pass
+        except Exception as e:
+            logger.warning(f"Model inference failed: {e}")
+        # Remove duplicates while preserving order
+        unique_candidates = []
+        seen = set()
+        for c in candidates:
+            if c not in seen:
+                unique_candidates.append(c)
+                seen.add(c)
+        candidates = unique_candidates
+        # 4. Rerank Candidates
+        best_candidate = preprocessed_text
+        best_score = -1.0
+        # Debug info
+        candidate_scores = []
+        for cand in candidates:
+            # A. Validation Score (Hard Penalty)
+            # Check validity against strict original
+            is_valid, reason = self.validator.validate(original, cand, error_type.value)
+            # Additional check: If candidate is suspiciously shorter than original (and not just harakat removal)
+            if len(cand) < len(original) * 0.5:
+                is_valid = False
+                reason = "too_short"
+            # ═══════════════════════════════════════════════════════════════════════════
+            # VOCABULARY-AWARE ACCEPTANCE
+            # ═══════════════════════════════════════════════════════════════════════════
+            # Logic: OOV→IV = ACCEPT (boost), IV→OOV = REJECT (penalize)
+            # This prevents over-conservative validation from rejecting correct corrections
+            input_oov_count = self.vocab_manager.count_oov_words(original)
+            cand_oov_count = self.vocab_manager.count_oov_words(cand)
+            vocab_boost = 1.0
+            # Case 1: OOV→IV (Correction fixed unknown words) → Accept more readily
+            if input_oov_count > 0 and cand_oov_count < input_oov_count:
+                # Significant boost for reducing OOV words
+                oov_reduction = input_oov_count - cand_oov_count
+                vocab_boost = 1.0 + (oov_reduction * 0.3)  # +30% per OOV fixed
+                # If ALL words are now IV, accept even with higher edit distance
+                if cand_oov_count == 0 and self.vocab_manager.all_words_iv(cand):
+                    # Override validation rejection if OOV→IV
+                    if not is_valid and reason not in ["empty_output"]:
+                        is_valid = True
+                        reason = "vocab_aware_accept"
+            # Case 2: IV→OOV (Correction introduced unknown words) → Penalize
+            elif cand_oov_count > input_oov_count:
+                # Penalize for introducing new OOV words
+                vocab_boost = 0.5  # 50% penalty
+            # Case 3: All IV to begin with → Standard validation
+            elif input_oov_count == 0 and cand_oov_count == 0:
+                # Both are valid vocab, prefer minimal edits
+                vocab_boost = 1.0
+            # ═══════════════════════════════════════════════════════════════════════════
+            # Penalty factor
+            # Valid: 1.0
+            # Invalid: 0.01 (Heavy penalty, essentially disqualified unless all are invalid)
+            validity_factor = 1.0 if is_valid else 0.001
+            # B. Fluency Score (BERT MLM)
+            fluency_score = 0.0
+            if self.use_contextual and self.contextual:
+                try:
+                    fluency_score = self.contextual.calculate_sentence_score(cand)
+                except Exception as e:
+                    logger.warning(f"Scoring failed: {e}")
+                    fluency_score = 0.5 # Default fallback
+            else:
+                fluency_score = 1.0
+            # C. Similarity Score (Damerau-Levenshtein Distance)
+            dist = VocabularyManager.damerau_levenshtein_distance(preprocessed_text, cand)
+            max_len = max(len(preprocessed_text), len(cand), 1)
+            similarity = 1.0 - (dist / max_len)
+            # Boost exact matches
+            if cand == preprocessed_text:
+                similarity = 1.0
+            # Keyboard Proximity Bonus
+            # If changes between input and candidate are keyboard-adjacent,
+            # it's more likely a typo fix (give bonus)
+            keyboard_bonus = 1.0
+            input_words = preprocessed_text.split()
+            cand_words = cand.split()
+            if len(input_words) == len(cand_words):
+                for iw, cw in zip(input_words, cand_words):
+                    if iw != cw and len(iw) == len(cw):
+                        # Check char-by-char differences
+                        for ic, cc in zip(iw, cw):
+                            if ic != cc and RulesBasedCorrector.is_keyboard_neighbor(ic, cc):
+                                keyboard_bonus *= 1.05  # 5% bonus per keyboard-adjacent fix
+            # HIGH CONFIDENCE GATING
+            # If model is extremely confident (high fluency) and words are valid, relax validation
+            # This allows correcting severe corruptions that fail strict edit distance
+            if fluency_score > 0.85 and cand_oov_count == 0:
+                 if not is_valid and reason in ["too_short", "low_character_similarity", "word_count_mismatch"]:
+                      # Check if it makes sense length-wise (don't allow completely empty or massive hallucinations)
+                      if len(cand) >= len(original) * 0.4:
+                          is_valid = True
+                          reason = "high_confidence_override"
+                          vocab_boost *= 1.2  # Bonus for high confidence
+                          validity_factor = 1.0  # Reset validity factor
+            # Final Score = (Fluency^0.3) * (Similarity^3.0) * Validity * VocabBoost * KeyboardBonus * BeamBoost
+            fluency_exp = 0.3
+            similarity_exp = 3.0
+            # Beam 0 Boost — model's top beam gets 15% priority
+            beam_boost = 1.0
+            if raw_model_output and cand == raw_model_output:
+                beam_boost = 1.15
+            final_score = (fluency_score ** fluency_exp) * (similarity ** similarity_exp) * validity_factor * vocab_boost * keyboard_bonus * beam_boost
+            candidate_scores.append({
+                'text': cand,
+                'is_valid': is_valid,
+                'reason': reason,
+                'fluency': fluency_score,
+                'similarity': similarity,
+                'vocab_boost': vocab_boost,
+                'input_oov': input_oov_count,
+                'cand_oov': cand_oov_count,
+                'final_score': final_score
+            })
+            if final_score > best_score:
+                best_score = final_score
+                best_candidate = cand
+        # ═══════════════════════════════════════════════════════════════════════════
+        # --- Output Quality Scoring (Minimum Score Threshold) ---
+        # If ALL candidates scored poorly, the correction is unreliable → keep input
+        # ═══════════════════════════════════════════════════════════════════════════
+        if best_candidate != preprocessed_text:
+            # Check: did the best candidate actually get a decent score?
+            # The preprocessed input (candidate 0) is always in the pool.
+            # If the best candidate barely beats preprocessed_text, it might not be trustworthy.
+            preprocessed_score = 0.0
+            for cs in candidate_scores:
+                if cs['text'] == preprocessed_text:
+                    preprocessed_score = cs['final_score']
+                    break
+            # If best score is less than 1.05x the preprocessed score AND
+            # the best candidate introduced OOV words → fall back to preprocessed
+            if preprocessed_score > 0 and best_score < preprocessed_score * 1.05:
+                best_oov = self.vocab_manager.count_oov_words(best_candidate)
+                prep_oov = self.vocab_manager.count_oov_words(preprocessed_text)
+                if best_oov > prep_oov:
+                    best_candidate = preprocessed_text
+                    best_score = preprocessed_score
+        # ═══════════════════════════════════════════════════════════════════════════
+        # --- Contextual Validation Layer ---
+        # Compare fluency of input vs best candidate
+        # If correction made text LESS fluent → reject the correction
+        # ═══════════════════════════════════════════════════════════════════════════
+        if best_candidate != preprocessed_text and self.use_contextual and self.contextual:
+            try:
+                input_fluency = self.contextual.calculate_sentence_score(preprocessed_text)
+                best_fluency = 0.0
+                for cs in candidate_scores:
+                    if cs['text'] == best_candidate:
+                        best_fluency = cs['fluency']
+                        break
+                # If input is significantly more fluent than best candidate
+                # AND both have similar OOV counts → prefer input
+                if input_fluency > 0 and best_fluency > 0:
+                    if input_fluency > best_fluency * 1.5:  # Input 50% more fluent
+                        input_oov = self.vocab_manager.count_oov_words(preprocessed_text)
+                        best_oov = self.vocab_manager.count_oov_words(best_candidate)
+                        if input_oov <= best_oov:
+                            # Input is more fluent AND has fewer/equal OOV → keep input
+                            best_candidate = preprocessed_text
+            except Exception:
+                pass  # Contextual validation is optional
+        # 5. Postprocess Winner
+        result = self.postprocess(best_candidate, original)
+        # 5.5 IV-Safe Postprocessing Check
+        # If postprocessing changed an IV word to OOV, revert that word
+        if result != best_candidate:
+            result_words = result.split()
+            best_words = best_candidate.split()
+            if len(result_words) == len(best_words):
+                fixed_words = []
+                input_words_pp = preprocessed_text.split()
+                for idx_fw, (rw, bw) in enumerate(zip(result_words, best_words)):
+                    if rw != bw:
+                        # Postprocessor changed this word
+                        bw_iv = self.vocab_manager.is_iv(bw)
+                        rw_iv = self.vocab_manager.is_iv(rw)
+                        if bw_iv and not rw_iv:
+                            # IV → OOV: revert to pre-postprocess version
+                            fixed_words.append(bw)
+                        elif bw_iv and rw_iv:
+                            # Postprocess Distance Guard
+                            # DISABLED: Caused word-level regression. When both are IV,
+                            # the postprocessor's choice (rw) is usually better because
+                            # it applies Arabic-specific rules (hamza, ta marbuta).
+                            fixed_words.append(rw)
+                        else:
+                            fixed_words.append(rw)
+                    else:
+                        fixed_words.append(rw)
+                result = ' '.join(fixed_words)
+        # 6. Contextual fine-tuning (BERT Masked Refinement)
+        # IV-Safe mode - pass vocab_manager to protect IV words
+        # BERT Kill Switch - also pass raw_model_output to protect model-confident words
+        if self.use_contextual and self.contextual:
+             if len(result) > 3:
+                 result = self.contextual.refine_sentence_with_mask(
+                     result, vocab_manager=self.vocab_manager,
+                     raw_model_output=raw_model_output
+                 )
+        # 7. Safe Split/Merge Post-processing
+        # Only apply merge_fragments (safe: only merges when result is IV)
+        result = self.split_merge.merge_fragments(result)
+        # ═══════════════════════════════════════════════════════════════════════════
+        # VALIDATION & QUALITY CHECKS
+        # ═══════════════════════════════════════════════════════════════════════════
+        # 8. Output Stability Test (Solution 30)
+        # If correcting the output again changes it → unstable correction → reject
+        # Stable corrections are idempotent: correct(correct(x)) == correct(x)
+        if result != preprocessed_text and raw_model_output:
+            try:
+                # Quick stability check: run the result through preprocessing only
+                # (full model inference would be too slow)
+                re_preprocessed = self.preprocess(result)
+                # If re-preprocessing changes the result significantly, it was unstable
+                stability_dist = VocabularyManager.damerau_levenshtein_distance(result, re_preprocessed)
+                result_len = max(len(result), 1)
+                if stability_dist > 0:
+                    # Result is not stable under re-preprocessing
+                    stability_ratio = stability_dist / result_len
+                    if stability_ratio > 0.15:  # More than 15% changed → very unstable
+                        # Fall back to raw model output if it's more stable
+                        raw_re = self.preprocess(raw_model_output)
+                        raw_stability = VocabularyManager.damerau_levenshtein_distance(
+                            raw_model_output, raw_re
+                        ) / max(len(raw_model_output), 1)
+                        if raw_stability < stability_ratio:
+                            # Raw is more stable → use it
+                            raw_oov = self.vocab_manager.count_oov_words(raw_model_output)
+                            our_oov = self.vocab_manager.count_oov_words(result)
+                            if raw_oov <= our_oov:
+                                result = raw_model_output
+            except Exception:
+                pass  # Stability check is optional, don't break pipeline
+        # 9. Bidirectional Word-Level Validation (Solution 24)
+        # Compare our result word-by-word with raw model output
+        # If we corrupted a word that the model got right, revert that word
+        if raw_model_output and result != raw_model_output:
+            result_words = result.split()
+            raw_words = raw_model_output.split()
+            if len(result_words) == len(raw_words):
+                corrected_words = []
+                changed = False
+                for rw, raw_w in zip(result_words, raw_words):
+                    if rw != raw_w:
+                        rw_iv = self.vocab_manager.is_iv(rw)
+                        raw_iv = self.vocab_manager.is_iv(raw_w)
+                        # Case 1: Our word is OOV but raw word is IV → take raw
+                        if not rw_iv and raw_iv:
+                            corrected_words.append(raw_w)
+                            changed = True
+                        # Case 2: Both IV but our word is further from input
+                        elif rw_iv and raw_iv:
+                            # Find corresponding input word
+                            input_words = preprocessed_text.split()
+                            idx = len(corrected_words)
+                            if idx < len(input_words):
+                                input_w = input_words[idx]
+                                rw_dist = Levenshtein.distance(input_w, rw)
+                                raw_dist = Levenshtein.distance(input_w, raw_w)
+                                # If raw is closer to input AND both are IV → prefer raw
+                                # (our pipeline likely introduced unnecessary change)
+                                if raw_dist < rw_dist:
+                                    corrected_words.append(raw_w)
+                                    changed = True
+                                else:
+                                    corrected_words.append(rw)
+                            else:
+                                corrected_words.append(rw)
+                        else:
+                            corrected_words.append(rw)
+                    else:
+                        corrected_words.append(rw)
+                if changed:
+                    new_result = ' '.join(corrected_words)
+                    # Only accept if the new result doesn't increase OOV
+                    new_oov = self.vocab_manager.count_oov_words(new_result)
+                    old_oov = self.vocab_manager.count_oov_words(result)
+                    if new_oov <= old_oov:
+                        result = new_result
+        # 10. SAFETY NET: Compare with raw model output (Conservative)
+        # Only switch to raw if raw is CLEARLY better
+        if raw_model_output and raw_model_output != result:
+            raw_oov = self.vocab_manager.count_oov_words(raw_model_output)
+            our_oov = self.vocab_manager.count_oov_words(result)
+            # Case A: Raw all-IV, ours has OOV
+            if raw_oov == 0 and our_oov > 0:
+                is_valid, reason = self.validator.validate(original, raw_model_output, "mixed")
+                if is_valid or reason == "space_leniency_accept":
+                    result = raw_model_output
+            # Case B: Both all-IV but raw is more similar to input
+            # Catches BERT/postprocess damage (word substitutions up to 5 char distance)
+            elif raw_oov == 0 and our_oov == 0:
+                raw_dist = VocabularyManager.damerau_levenshtein_distance(original, raw_model_output)
+                our_dist = VocabularyManager.damerau_levenshtein_distance(original, result)
+                result_vs_raw_dist = VocabularyManager.damerau_levenshtein_distance(result, raw_model_output)
+                # Threshold at 3 chars — covers single char edits and small substitutions
+                # (widening to 5 caused regression by reverting valid hybrid corrections)
+                if raw_dist < our_dist and result_vs_raw_dist <= 3:
+                    raw_valid, _ = self.validator.validate(original, raw_model_output, "mixed")
+                    if raw_valid:
+                        result = raw_model_output
+            # Case C: Word count differs — raw might have correct splitting
+            # Catches: 'فيلق → في فيلق' (pipeline added word)
+            # or 'بلاكبيرن روفرز → بلاكبيرن روفر' (pipeline lost word ending)
+            elif raw_oov == 0:
+                raw_wc = len(raw_model_output.split())
+                our_wc = len(result.split())
+                if raw_wc != our_wc:
+                    raw_dist = VocabularyManager.damerau_levenshtein_distance(original, raw_model_output)
+                    our_dist = VocabularyManager.damerau_levenshtein_distance(original, result)
+                    if raw_dist < our_dist:
+                        raw_valid, _ = self.validator.validate(original, raw_model_output, "mixed")
+                        if raw_valid:
+                            result = raw_model_output
+        return result
+# ═══════════════════════════════════════════════════════════════════════════════
+# PUBLIC API
+# ═══════════════════════════════════════════════════════════════════════════════
+# Exported for use by benchmark.py and external consumers
+spell_checker = None  # Will be initialized on first import with __main__ or by benchmark
+def initialize(use_contextual=True):
+    """Initialize the spell checker. Call once before using."""
+    global spell_checker
+    spell_checker = ArabicSpellChecker(model, tokenizer, device, use_contextual=use_contextual)
+    logger.info("Spell checker initialized")
+    return spell_checker
+if __name__ == "__main__":
+    sc = initialize(use_contextual=True)
+    # Quick demo
+    test_cases = [
+        "السلام عليكممم",
+        "فيالمدرسه",
+        "الطقص جميل اليومم",
+    ]
+    print("\n" + "=" * 60)
+    print("AraSpell Demo")
+    print("=" * 60)
+    for text in test_cases:
+        corrected = sc.correct(text)
+        print(f"\n  Input:     {text}")
+        print(f"  Corrected: {corrected}")
+    print("\n" + "=" * 60)
+    print("For full benchmark, run: python benchmark.py")
+    print("=" * 60)

archive/legacy_scripts/Grammer_Rules.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import re
+from camel_tools.tokenizers.word import simple_word_tokenize
+from camel_tools.disambig.mle import MLEDisambiguator
+class ArabicGrammarGuard:
+    def __init__(self):
+        self.mle = MLEDisambiguator.pretrained()
+        self.number_words = ["واحد", "اثنان", "اثنين", "ثلاث", "أربع", "خمس", "ست", "سبع", "ثمان", "تسع", "عشر",
+                             "عشرون", "عشرين", "ثلاثون", "ثلاثين", "أربعون", "أربعين", "خمسون", "خمسين",
+                             "ستون", "ستين", "سبعون", "سبعين", "ثمانون", "ثمانين", "تسعون", "تسعين", "مائة", "ألف"]
+        self.asmaa_khamsa_roots = ['اب', 'اخ', 'حم', 'فو', 'ذو']
+    def preserve_numbers(self, original_text, generated_text):
+        orig_digits = re.findall(r'\d+', original_text)
+        gen_digits = re.findall(r'\d+', generated_text)
+        if orig_digits and gen_digits and orig_digits != gen_digits:
+            return original_text
+        orig_words = [w for w in original_text.split() if any(num in w for num in self.number_words)]
+        gen_words = [w for w in generated_text.split() if any(num in w for num in self.number_words)]
+        if len(orig_words) > 0 and len(gen_words) > 0:
+            if not any(orig[:3] in gen for orig in orig_words for gen in gen_words):
+                 return original_text
+        return generated_text
+    def fix_number_and_gender_agreement(self, text):
+        tokens = simple_word_tokenize(text)
+        disambig_tokens = self.mle.disambiguate(tokens)
+        corrected_tokens = list(tokens)
+        for i in range(len(disambig_tokens) - 1):
+            w1_info = disambig_tokens[i].analyses[0] if disambig_tokens[i].analyses else None
+            w2_info = disambig_tokens[i+1].analyses[0] if disambig_tokens[i+1].analyses else None
+            if not w1_info or not w2_info: continue
+            w1_pos = w1_info.analysis.get('pos', 'unknown')
+            w2_pos = w2_info.analysis.get('pos', 'unknown')
+            w1_word = corrected_tokens[i]
+            w2_word = corrected_tokens[i+1]
+            if w1_pos == 'verb' and w2_pos == 'noun':
+                if (w1_word.endswith('ون') or w1_word.endswith('وا')) and (w2_word.endswith('ون') or w2_word.endswith('ين')):
+                    if w1_word.endswith('ون'): corrected_tokens[i] = w1_word[:-2]
+                    elif w1_word.endswith('وا'): corrected_tokens[i] = w1_word[:-2]
+            elif w1_pos == 'noun' and w2_pos == 'verb':
+                if w1_word.endswith('ون') and not (w2_word.endswith('ون') or w2_word.endswith('وا') or w2_word.endswith('ين')):
+                    if w2_info.analysis.get('num') == 's':
+                        corrected_tokens[i+1] = w2_word + 'ون'
+            # ⚠️ التعديل الجذري هنا: المطابقة للصفات (adj) فقط، ومنع الكلمات التي تبدأ بـ "ب" أو تنتهي بألف التنوين
+            elif w1_pos == 'noun' and w2_pos == 'adj':
+                if w1_word.endswith('ون') and not w2_word.endswith('ون'):
+                    if w2_info.analysis.get('num') == 's' and w2_info.analysis.get('gen') == 'm':
+                        if len(w2_word) > 2 and not w2_word.endswith('ا') and not w2_word.startswith('ب'):
+                            corrected_tokens[i+1] = w2_word + 'ون'
+        return " ".join(corrected_tokens)
+    def smart_asmaa_khamsa_fix(self, text):
+        tokens = simple_word_tokenize(text)
+        disambig_tokens = self.mle.disambiguate(tokens)
+        corrected_tokens = []
+        verb_seen = False
+        for i, token_info in enumerate(disambig_tokens):
+            word = tokens[i]
+            pos_tag = token_info.analyses[0].analysis.get('pos', 'unknown') if token_info.analyses else 'unknown'
+            if pos_tag == 'verb':
+                verb_seen = True
+                corrected_tokens.append(word)
+                continue
+            is_asmaa = any(word.startswith(root) or word.startswith('أ' + root[1:]) for root in self.asmaa_khamsa_roots if len(root)>1)
+            if is_asmaa and len(word) >= 3:
+                if verb_seen:
+                    word = word.replace('ا', 'و').replace('ي', 'و')
+                    verb_seen = False
+            corrected_tokens.append(word)
+        return " ".join(corrected_tokens)
+    def fix_verbs_nasb_and_jazm(self, text):
+        tokens = simple_word_tokenize(text)
+        disambig_tokens = self.mle.disambiguate(tokens)
+        nasb_particles = ['أن', 'لن', 'كي', 'لكي', 'حتى', 'إذن']
+        jazm_particles = ['لم', 'لما', 'لا']
+        corrected_tokens = []
+        for i, token_info in enumerate(disambig_tokens):
+            word = tokens[i]
+            pos_tag = token_info.analyses[0].analysis.get('pos', 'unknown') if token_info.analyses else 'unknown'
+            is_nasb_context = False
+            is_jazm_context = False
+            if i > 0:
+                prev_word = tokens[i-1]
+                if prev_word in nasb_particles or word.startswith('ل'):
+                    is_nasb_context = True
+                if prev_word in jazm_particles or word.startswith('ل') or word.startswith('ول'):
+                    is_jazm_context = True
+            if pos_tag == 'verb' and (is_nasb_context or is_jazm_context):
+                if word.endswith('ون'):
+                    word = word[:-2] + 'وا'
+                elif word.endswith('ان'):
+                    word = word[:-2] + 'ا'
+                elif word.endswith('ين'):
+                    word = word[:-2] + 'ي'
+                elif is_jazm_context:
+                    if word.endswith('و') and len(word) > 3:
+                        word = word[:-1] + 'ُ'
+                    elif (word.endswith('i') or word.endswith('ي')) and len(word) > 3:
+                        if word.endswith('ي'): word = word[:-1] + 'ِ'
+                    elif (word.endswith('ى') or word.endswith('ا')) and len(word) > 3:
+                        word = word[:-1] + 'َ'
+            corrected_tokens.append(word)
+        return " ".join(corrected_tokens)
+    def fix_gender_agreement(self, text):
+        text = re.sub(r'\bهذان\s+(ال[أ-ي]+تان)\b', r'هاتان \1', text)
+        text = re.sub(r'\bهاتان\s+(ال[أ-ي]+[^ت]ان)\b', r'هذان \1', text)
+        text = re.sub(r'\bهذهن\b', 'هاتان', text)
+        text = re.sub(r'\bأحد عشر\s+([أ-ي]+ة)\b', r'إحدى عشرة \1', text)
+        text = re.sub(r'\bأحد عشرة\s+([أ-ي]+ة)\b', r'إحدى عشرة \1', text)
+        text = re.sub(r'\bإحدى عشرة\s+([أ-ي]+ا|رجل[اأ]|طالب[اأ]|مهندس[اأ])\b', r'أحد عشر \1', text)
+        text = re.sub(r'\bإحدى عشر\s+([أ-ي]+ا|رجل[اأ]|طالب[اأ]|مهندس[اأ])\b', r'أحد عشر \1', text)
+        return text
+    def fix_prepositions_advanced(self, text):
+        # ⚠️ السماح بحروف العطف (و، ف) قبل حرف الجر
+        # (في المهندسون) -> (في المهندسين)
+        text = re.sub(r'\b([وف]?(?:في|من|إلى|على|عن|حتى))\s+([أ-ي]{2,})(ون|ان)\b', r'\1 \2ين', text)
+        # (وبالمبرمجون) -> (وبالمبرمجين)
+        text = re.sub(r'\b([وف]?[بلكف])ال([أ-ي]{2,})(ون|ان)\b', r'\1ال\2ين', text)
+        # (ولمهندسون) -> (ولمهندسين)
+        text = re.sub(r'\b([وف]?ل)([أ-ي]{2,})(ون|ان)\b', r'\1\2ين', text)
+        return text
+    def regex_rules_fallback(self, text):
+        # إن وأخواتها (كما هي)
+        text = re.sub(r'\b(إن|أن|كأن|لكن|لعل|ليت)\s+(أبوك|أخوك|ذو|فوك)\b',
+                      lambda m: f"{m.group(1)} {m.group(2).replace('و', 'ا')}", text)
+        # ⚠️ حروف الجر المنفصلة بمسافة (في أخوك -> في أخيك)
+        text = re.sub(r'\b([وف]?(?:في|من|إلى|على|عن))\s+(أبوك|أباك|أخوك|أخاك|ذو|ذا)\b',
+                      lambda m: f"{m.group(1)} {m.group(2).replace('و', 'ي').replace('ا', 'ي')}", text)
+        # ⚠️ حروف الجر المتصلة بدون مسافة (بأخوك، لأبوك -> بأخيك، لأبيك)
+        text = re.sub(r'\b([وف]?[بل])(أبوك|أباك|أخوك|أخاك|ذو|ذا)\b',
+                      lambda m: f"{m.group(1)}{m.group(2).replace('و', 'ي').replace('ا', 'ي')}", text)
+        return text
+    def process(self, original_text, generated_text):
+        text = self.preserve_numbers(original_text, generated_text)
+        text = self.fix_number_and_gender_agreement(text)
+        text = self.smart_asmaa_khamsa_fix(text)
+        text = self.fix_verbs_nasb_and_jazm(text)
+        text = self.fix_gender_agreement(text)
+        text = self.fix_prepositions_advanced(text)
+        text = self.regex_rules_fallback(text)
+        text = re.sub(r'\s+', ' ', text).strip()
+        return text

archive/legacy_scripts/PuncAra.py ADDED Viewed

	@@ -0,0 +1,180 @@

+# -*- coding: utf-8 -*-
+"""Untitled18.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1ebBGzEo4wbwwvReea_n0PRHdfYescKcs
+"""
+import os
+import torch
+from transformers import EncoderDecoderModel, AutoTokenizer
+import re
+# تعريف الثوابت
+HF_REPO_ID = "bayan10/PuncAra-v1"
+# متغيرات عامة
+device = None
+test_model = None
+test_tokenizer = None
+def initialize_model(repo_id=HF_REPO_ID):
+    """
+    تهيئة وإعداد كرت الشاشة وتحميل النموذج والـ Tokenizer من Hugging Face Hub.
+    يتم استدعاء هذه الدالة مرة واحدة فقط في بداية تشغيل المشروع.
+    """
+    global device, test_model, test_tokenizer
+    print(f"Loading test model directly from Hugging Face Hub: {repo_id}")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Loading test model to: {device}")
+    if device.type == "cuda" and not torch.cuda.is_available():
+        print("Warning: CUDA device requested, but torch.cuda.is_available() is False. Model will be loaded to CPU.")
+        device = torch.device("cpu")
+    test_model = EncoderDecoderModel.from_pretrained(repo_id).to(device)
+    test_tokenizer = AutoTokenizer.from_pretrained(repo_id)
+    # إعداد الـ Special tokens للـ Decoder والـ Encoder
+    test_model.config.decoder_start_token_id = test_tokenizer.cls_token_id
+    test_model.config.bos_token_id = test_tokenizer.cls_token_id
+    test_model.config.eos_token_id = test_tokenizer.sep_token_id
+    test_model.config.pad_token_id = test_tokenizer.pad_token_id
+    print("Model and Tokenizer loaded successfully!")
+def predict_chunk(text_chunk):
+    """توليد التوقعات لعلامات الترقيم لقطعة نصية صغيرة لا تتعدى الـ 128 Token."""
+    global device, test_model, test_tokenizer
+    if test_model is None or test_tokenizer is None:
+        raise RuntimeError("الموديل لم يتم تهيئته بعد. يرجى استدعاء initialize_model() أولاً.")
+    # تطبيق الـ Preprocessing لتنظيف التشكيل قبل دخول النص للموديل
+    text_chunk = arabic_preprocessing(text_chunk)
+    inputs = test_tokenizer(text_chunk, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
+    outputs = test_model.generate(
+          inputs.input_ids,
+          attention_mask=inputs.attention_mask,
+          decoder_start_token_id=test_tokenizer.cls_token_id,
+          bos_token_id=test_tokenizer.cls_token_id,
+          eos_token_id=test_tokenizer.sep_token_id,
+          pad_token_id=test_tokenizer.pad_token_id,
+          max_length=128,
+          num_beams=3,
+          repetition_penalty=1.2,
+          length_penalty=1.0,
+          early_stopping=True,
+          do_sample=False
+      )
+    return test_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def arabic_preprocessing(text):
+    """حذف الحركات التشكيلية لتوحيد المدخلات وتسهيل عمل الموديل."""
+    arabic_diacritics = re.compile(r'[\u064B-\u0652]')
+    return re.sub(arabic_diacritics, '', text).strip()
+def arabic_postprocessing(text):
+    """
+    التنظيف والتحسين المطبعي وعلاج مشاكل دمج النصوص وعلامات الترقيم الزائدة.
+    """
+    if not text:
+        return text
+    # 1. حماية الأرقام والكسور والتوقيت من التحويل الخاطئ
+    text = re.sub(r'(?<=\d),(?=\d)', '٪TEMP_COMMA٪', text)
+    text = re.sub(r'(?<=\d):(?=\d)', '٪TEMP_COLON٪', text)
+    # 2. التوحيد والتعريب المطبعي للعلامات
+    text = text.replace(',', '،').replace(';', '؛').replace('?', '؟')
+    # 3. ضبط المسافات الداخلية للأقواس وعلامات الاقتباس العربي
+    text = re.sub(r'\(\s+', '(', text)
+    text = re.sub(r'\s+\)', ')', text)
+    text = re.sub(r'\[\s+', '[', text)
+    text = re.sub(r'\s+\]', ']', text)
+    text = re.sub(r'«\s+', '«', text)
+    text = re.sub(r'\s+»', '»', text)
+    # 4. منع تكرار العلامات الانفعالية عدا النقاط الثلاثية للحذف
+    text = re.sub(r'([،؛:!؟])\1+', r'\1', text)
+    text = re.sub(r'\.{4,}', '...', text)
+    # 5. معالجة التناقضات المباشرة الناتجة عن تجميع الـ Chunks
+    text = re.sub(r'[،؛:]+([.!؟])', r'\1', text)
+    text = re.sub(r'،؛|؛،', '؛', text)
+    text = re.sub(r'([!؟])\.', r'\1', text)
+    # 6. مسح علامات الترقيم العشوائية إذا ظهرت أول النص
+    text = re.sub(r'^[،؛:!؟. \t]+', '', text)
+    # 7. ضمان مسافة فارغة واحدة بعد علامة الترقيم إذا تبعها كلام
+    text = re.sub(r'([،؛:!؟.])(?=\S)', r'\1 ', text)
+    # 8. إعادة الأرقام والكسور والتوقيت المحمية إلى أصلها
+    text = text.replace('٪TEMP_COMMA٪', ',').replace('٪TEMP_COLON٪', ':')
+    # 9. إلصاق علامات الترقيم بالكلمة السابقة لها مباشرة
+    text = re.sub(r'\s+([،؛:!؟.])', r'\1', text)
+    # 10. إزالة المسافات المتكررة الأفقية فقط (بدون لمس السطور الجديدة)
+    text = re.sub(r'[ \t]+', ' ', text).strip()
+    return text
+def fix_punctuation(text):
+    """معالجة الفقرة الواحدة الطويلة عبر تقسيمها لقطع غير متداخلة لمنع التكرار."""
+    words = text.split()
+    total_words = len(words)
+    # جعل حجم الخطوة مساوياً لحجم النافذة يمنع تكرار الكلمات تماماً
+    window_size = 50
+    stride = 50
+    if total_words <= window_size:
+        result = predict_chunk(text)
+    else:
+        segments_output = []
+        for i in range(0, total_words, stride):
+            chunk_words = words[i : i + window_size]
+            chunk_text = " ".join(chunk_words)
+            if not chunk_text.strip():
+                continue
+            processed_segment = predict_chunk(chunk_text).strip()
+            # مسح علامات الترقيم الناتجة عن القص الإجباري بين القطع
+            is_last_segment = (i + window_size) >= total_words
+            if not is_last_segment:
+                punctuation_marks = ".?!،؛:؟!"
+                if processed_segment and processed_segment[-1] in punctuation_marks:
+                    # نمسح العلامة تماماً لأن السياق مستمر في القطعة اللي بعدها
+                    processed_segment = processed_segment[:-1]
+            segments_output.append(processed_segment)
+        result = " ".join(segments_output)
+    # تنظيف المسافات الزائدة والتكرار إن وجد
+    result = re.sub(r'\s+', ' ', result).strip()
+    return result
+def process_full_document(text):
+    if not text:
+        return text
+    # تقسيم بناءً على السطور الجديدة وتنظيف الأسطر الفارغة
+    paragraphs = [p.strip() for p in text.split('\n') if p.strip()]
+    processed_paragraphs = []
+    for paragraph in paragraphs:
+        # معالجة الفقرة المستقلة
+        punctuated_paragraph = fix_punctuation(paragraph)
+        cleaned_paragraph = arabic_postprocessing(punctuated_paragraph)
+        processed_paragraphs.append(cleaned_paragraph)
+    # الدمج بسطرين متباعدين لضمان الفصل البصري التام بين الفقرات
+    return "\n\n".join(processed_paragraphs)

archive/legacy_scripts/gradio Spelling.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import gradio as gr
+import re
+from AraSpell import initialize
+# تهيئة المصحح الإملائي وتحميل الموديل
+sc = initialize(use_contextual=True)
+import Levenshtein
+def align_words(in_words, out_words):
+    n = len(in_words)
+    m = len(out_words)
+    dp = [[0] * (m + 1) for _ in range(n + 1)]
+    for i in range(1, n + 1):
+        dp[i][0] = dp[i-1][0] + len(in_words[i-1])
+    for j in range(1, m + 1):
+        dp[0][j] = dp[0][j-1] + len(out_words[j-1])
+    for i in range(1, n + 1):
+        for j in range(1, m + 1):
+            cost_replace = Levenshtein.distance(in_words[i-1], out_words[j-1])
+            dp[i][j] = min(
+                dp[i-1][j-1] + cost_replace,
+                dp[i-1][j] + len(in_words[i-1]),
+                dp[i][j-1] + len(out_words[j-1])
+            )
+    i, j = n, m
+    ops = []
+    while i > 0 or j > 0:
+        if i > 0 and j > 0:
+            cost_replace = Levenshtein.distance(in_words[i-1], out_words[j-1])
+            if dp[i][j] == dp[i-1][j-1] + cost_replace:
+                if cost_replace == 0:
+                    ops.append(('equal', [in_words[i-1]], [out_words[j-1]]))
+                else:
+                    ops.append(('replace', [in_words[i-1]], [out_words[j-1]]))
+                i -= 1
+                j -= 1
+                continue
+        if i > 0 and dp[i][j] == dp[i-1][j] + len(in_words[i-1]):
+            ops.append(('delete', [in_words[i-1]], []))
+            i -= 1
+        else:
+            ops.append(('insert', [], [out_words[j-1]]))
+            j -= 1
+    ops.reverse()
+    merged_ops = []
+    for op, in_w, out_w in ops:
+        if not merged_ops:
+            merged_ops.append([op, in_w, out_w])
+            continue
+        prev_op = merged_ops[-1][0]
+        if op != 'equal' and prev_op != 'equal' and not (op == 'replace' and prev_op == 'replace'):
+            merged_ops[-1][0] = 'replace'
+            merged_ops[-1][1].extend(in_w)
+            merged_ops[-1][2].extend(out_w)
+        else:
+            merged_ops.append([op, in_w, out_w])
+    return merged_ops
+def generate_highlights(input_text):
+    if not input_text or not input_text.strip():
+        return [], {}
+    corrected_text = sc.correct(input_text)
+    in_words = input_text.split()
+    out_words = corrected_text.split()
+    ops = align_words(in_words, out_words)
+    highlight_list = []
+    suggestions_map = {}
+    idx = 0
+    for tag, in_w, out_w in ops:
+        if tag == 'equal':
+            for w in in_w:
+                highlight_list.append((w, None))
+                highlight_list.append((" ", None))
+                idx += 2
+        elif tag == 'replace' or tag == 'insert' or tag == 'delete':
+            in_phrase = " ".join(in_w) if in_w else "[ناقص]"
+            out_phrase = " ".join(out_w) if out_w else "(حذف الكلمة)"
+            highlight_list.append((in_phrase, " "))
+            sugs = [out_phrase]
+            if len(in_w) == 1 and len(out_w) == 1:
+                clean_w = re.sub(r'[^\w]', '', in_w[0])
+                try:
+                    edit_cands = sc.edit_corrector.known(sc.edit_corrector.edits1(clean_w))
+                    if edit_cands:
+                        edit_cands = sorted(list(edit_cands), key=lambda x: sc.vocab_manager.get_frequency_rank(x))
+                        for c in edit_cands:
+                            if c not in sugs and len(sugs) < 3:
+                                sugs.append(c)
+                except Exception:
+                    pass
+            suggestions_map[idx] = sugs
+            highlight_list.append((" ", None))
+            idx += 2
+    if highlight_list and highlight_list[-1] == (" ", None):
+        highlight_list.pop()
+    return highlight_list, suggestions_map
+# ==========================================
+# تصميم واجهة المستخدم التفاعلية (Gradio Blocks)
+# ==========================================
+with gr.Blocks(theme=gr.themes.Soft(), css="""
+    .highlight-error { background-color: #ffcccc !important; border-radius: 4px; padding: 2px; }
+    .rtl-text { direction: rtl !important; text-align: right !important; }
+""") as iface:
+    gr.Markdown("# 📝 AraSpell - المصحح الإملائي التفاعلي")
+    gr.Markdown("أدخل النص أدناه واضغط على **فحص النص**. سيقوم النظام بتلوين الأخطاء باللون الأحمر. **انقر على الكلمة الملونة** لتظهر لك خيارات التصحيح أسفلها!")
+    # متغيرات حالة (State) لحفظ البيانات خلف الكواليس
+    suggestions_state = gr.State({})
+    current_edit_index = gr.State(None)
+    highlight_list_state = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_box = gr.Textbox(lines=8, label="النص الأصلي", placeholder="أدخل النص العربي هنا...")
+            check_btn = gr.Button("🔍 فحص النص", variant="primary")
+        with gr.Column(scale=1):
+            output_highlights = gr.HighlightedText(
+                label="النتيجة (اضغط على الكلمات الملونة للتصحيح)",
+                combine_adjacent=False,
+                show_legend=False,
+                color_map={" ": "red"},
+                elem_classes="rtl-text"
+            )
+            # لوحة الاقتراحات (مخفية في البداية)
+            with gr.Group(visible=False) as suggestion_panel:
+                gr.Markdown("### 💡 اختر التصحيح المناسب:")
+                suggestion_radio = gr.Radio(choices=[], label="")
+                apply_btn = gr.Button("✅ تطبيق التصحيح", variant="secondary")
+    # 1. عند الضغط على فحص النص
+    def process_text(text):
+        h_list, s_map = generate_highlights(text)
+        # إرجاع: النص المظلل، قاموس الاقتراحات، قائمة التظليل (State)، وإخفاء لوحة الاقتراحات
+        return h_list, s_map, h_list, gr.update(visible=False)
+    check_btn.click(
+        fn=process_text,
+        inputs=[input_box],
+        outputs=[output_highlights, suggestions_state, highlight_list_state, suggestion_panel]
+    )
+    # 2. عند النقر على أي كلمة داخل النص المظلل
+    def on_highlight_click(evt: gr.SelectData, s_map):
+        index = evt.index
+        # معالجة مشكلة تحويل المفاتيح إلى نصوص (Strings) في Gradio State
+        if index in s_map:
+            choices = s_map[index]
+        elif str(index) in s_map:
+            choices = s_map[str(index)]
+        else:
+            # إخفاء اللوحة إذا ضغط على كلمة صحيحة
+            return gr.update(visible=False), gr.update(), None
+        # إظهار اللوحة وتحديث الخيارات
+        return gr.update(visible=True), gr.update(choices=choices, value=choices[0]), index
+    output_highlights.select(
+        fn=on_highlight_click,
+        inputs=[suggestions_state],
+        outputs=[suggestion_panel, suggestion_radio, current_edit_index]
+    )
+    # 3. عند اختيار اقتراح والضغط على "تطبيق"
+    def apply_correction(choice, edit_idx, h_list):
+        if edit_idx is not None and choice:
+            # تحديث الكلمة في قائمة التظليل (بدون إعادة تشغيل الموديل لتكون سريعة جداً)
+            if choice == "(حذف الكلمة)":
+                h_list[edit_idx] = ("", None)
+            else:
+                h_list[edit_idx] = (choice, None)
+            # إعادة بناء النص الجديد
+            new_text = "".join([t[0] for t in h_list])
+            # إرجاع: تحديث مربع الإدخال، التظليل الجديد، بقاء الاقتراحات كما هي، State الجديد، وإخفاء اللوحة
+            return new_text, h_list, gr.update(), h_list, gr.update(visible=False)
+        return gr.update(), gr.update(), gr.update(), gr.update(), gr.update()
+    apply_btn.click(
+        fn=apply_correction,
+        inputs=[suggestion_radio, current_edit_index, highlight_list_state],
+        outputs=[input_box, output_highlights, suggestions_state, highlight_list_state, suggestion_panel]
+    )
+if __name__ == "__main__":
+    iface.launch()

archive/legacy_scripts/punctuation_rulesV2.py ADDED Viewed

	@@ -0,0 +1,257 @@

+# PuncAra — Arabic Punctuation Restoration Rules
+# Extracted from PuncAra.py — preprocessing + postprocessing + chunking logic.
+# All classes are imported by punctuation_service.py.
+import re
+import logging
+logger = logging.getLogger(__name__)
+def arabic_preprocessing(text: str) -> str:
+    """Remove Arabic diacritics to normalize input for the model."""
+    arabic_diacritics = re.compile(r'[\u064B-\u0652]')
+    return re.sub(arabic_diacritics, '', text).strip()
+def arabic_postprocessing(text: str) -> str:
+    """
+    Typographic cleanup and punctuation normalization after model inference.
+    Handles: bracket spacing, duplicate marks, chunk-join artifacts, etc.
+    """
+    if not text:
+        return text
+    # 1. Protect numbers/fractions/time from incorrect conversion
+    text = re.sub(r'(?<=\d),(?=\d)', '٪TEMP_COMMA٪', text)
+    text = re.sub(r'(?<=\d):(?=\d)', '٪TEMP_COLON٪', text)
+    # 2. Arabize typographic marks
+    text = text.replace(',', '،').replace(';', '؛').replace('?', '؟')
+    # 3. Fix internal spacing for brackets and Arabic quotes
+    text = re.sub(r'\(\s+', '(', text)
+    text = re.sub(r'\s+\)', ')', text)
+    text = re.sub(r'\[\s+', '[', text)
+    text = re.sub(r'\s+\]', ']', text)
+    text = re.sub(r'«\s+', '«', text)
+    text = re.sub(r'\s+»', '»', text)
+    # 4. Remove repeated emotional marks (except ellipsis)
+    text = re.sub(r'([،؛:!؟])\1+', r'\1', text)
+    text = re.sub(r'\.{4,}', '...', text)
+    # 5. Fix chunk-join contradictions
+    text = re.sub(r'[،؛:]+([.!؟])', r'\1', text)
+    text = re.sub(r'،؛|؛،', '؛', text)
+    text = re.sub(r'([!؟])\.', r'\1', text)
+    # 6. Remove stray leading punctuation
+    text = re.sub(r'^[،؛:!؟. \t]+', '', text)
+    # 7. Ensure single space after punctuation before text
+    text = re.sub(r'([،؛:!؟.])(?=\S)', r'\1 ', text)
+    # 8. Restore protected numbers
+    text = text.replace('٪TEMP_COMMA٪', ',').replace('٪TEMP_COLON٪', ':')
+    # 9. Attach punctuation to preceding word
+    text = re.sub(r'\s+([،؛:!؟.])', r'\1', text)
+    # 10. Collapse horizontal spaces only
+    text = re.sub(r'[ \t]+', ' ', text).strip()
+    return text
+# ══════════════════════════════════════════════════════════════════════════════
+# PUNCTUATION SAFETY LAYER — Pipeline Hardening v3.3
+# ══════════════════════════════════════════════════════════════════════════════
+ARABIC_PUNCT_CHARS = set('.,،؛؟!:;?!')
+MAX_PUNCT_DELTA = 3
+MAX_PUNCT_DELTA_SHORT = 1   # Stricter cap for short texts (≤2 words)
+MAX_PUNCT_RATIO = 0.5       # max punctuation delta per word (multi-word diffs)
+def _normalize_for_comparison(text: str) -> str:
+    """
+    Normalize Arabic for safe comparison.
+    Prevents false rejection from hamza/alef/ya variants.
+    """
+    # Remove diacritics
+    text = re.sub(r'[\u064B-\u0652]', '', text)
+    # Fold hamza/alef variants: أ إ آ → ا
+    text = re.sub(r'[أإآ]', 'ا', text)
+    # Fold ya: ى → ي
+    text = text.replace('ى', 'ي')
+    # Fold ta marbuta: ة → ه (comparison only)
+    text = text.replace('ة', 'ه')
+    return text
+def validate_punctuation_diff(diff: dict, full_text: str = '') -> bool:
+    """
+    Return True ONLY if the diff is a safe punctuation-only change.
+    ALLOWED:
+        - Inserting 1 punctuation mark (short text) or 1–3 (long text)
+        - Replacing one punctuation mark with another
+        - Adding terminal punctuation to any sentence (1+ words) that lacks it
+    REJECTED:
+        - Adding/deleting/duplicating Arabic words
+        - Rewriting phrases
+        - Excessive punctuation repetition (3+ consecutive identical)
+        - Punctuation spam: delta/word_count > 0.5 (multi-word diffs)
+        - Short text (≤2 words): delta > 1
+        - Any diff: delta > MAX_PUNCT_DELTA
+        - Adding terminal punctuation when text already ends with punct
+    """
+    original = diff.get('original', '')
+    correction = diff.get('correction', '')
+    # ── Rule 0 (FIX-01, updated FIX-30): Reject terminal punctuation injection ──
+    # PuncAra-v1 unconditionally adds . or ؟ to every sentence.
+    # This rule catches the pattern: "word" → "word." / "word؟" / "word،"
+    # where the ONLY change is appending 1-2 terminal punctuation marks.
+    #
+    # FIX-30: Allow terminal punct for any text with at least 1 word that
+    # doesn't already end with punctuation. Only block for:
+    #   - Text that already has terminal punctuation
+    #   - Text ending in an ellipsis (...)
+    TERMINAL_PUNCT = set('.,،؛؟!:;?!')
+    orig_stripped = original.rstrip()
+    corr_stripped = correction.rstrip()
+    if orig_stripped and corr_stripped:
+        # Check if correction is just original + terminal punct
+        orig_alpha_r0 = re.sub(r'[.,،؛؟!:;?\s]', '', original)
+        corr_alpha_r0 = re.sub(r'[.,،؛؟!:;?\s]', '', correction)
+        if (_normalize_for_comparison(orig_alpha_r0) ==
+                _normalize_for_comparison(corr_alpha_r0)):
+            # Same word content — check if only terminal punct was added
+            orig_punct_end = sum(1 for c in original if c in TERMINAL_PUNCT)
+            corr_punct_end = sum(1 for c in correction if c in TERMINAL_PUNCT)
+            if corr_punct_end > orig_punct_end:
+                # Only adding punctuation — check if it's at the END (terminal)
+                orig_no_punct = re.sub(r'[.,،؛؟!:;?!]+$', '', original)
+                corr_no_punct = re.sub(r'[.,،؛؟!:;?!]+$', '', correction)
+                if _normalize_for_comparison(orig_no_punct.replace(' ', '')) == \
+                   _normalize_for_comparison(corr_no_punct.replace(' ', '')):
+                    # This is a pure terminal-punctuation addition.
+                    # Decide whether to allow based on full text context.
+                    # FIX-30: When full_text isn't provided (e.g. word-level diff
+                    # calls), fall back to counting words in `original` instead of
+                    # treating the count as 0 — that previously rejected every
+                    # single-word diff regardless of the threshold below.
+                    _word_count_source = full_text if full_text else original
+                    _full_word_count = len(re.findall(
+                        r'[\u0600-\u06FFa-zA-Z]+', _word_count_source
+                    ))
+                    _full_already_has_terminal = bool(
+                        re.search(r'[.،؛؟!?!][\s]*$', full_text)
+                    ) if full_text else False
+                    # Also check for ellipsis (... at end)
+                    _full_has_ellipsis = full_text.rstrip().endswith('...') if full_text else False
+                    # FIX-30: Threshold lowered from 5 → 1. The docstring and the
+                    # Phase 13 comment above both documented "3+ words" as the
+                    # intended rule, while the code enforced 5 — and even single-
+                    # word fragments ("اليوم" → "اليوم؟") are a legitimate terminal
+                    # punctuation addition once we have at least one real word.
+                    #
+                    # FIX-31: Removed the FIX-29 exclamation/question-cue guard.
+                    # It required an explicit interrogative word (هل/ماذا/متى/...)
+                    # before allowing "؟" or "!" to be added, which rejected valid
+                    # single-word terminal punctuation additions with no such cue
+                    # (e.g. "اليوم" → "اليوم؟"). Terminal punctuation is now
+                    # allowed regardless of cue words, as long as the remaining
+                    # safety rules below (word count, duplicate terminal marks,
+                    # ellipsis) still hold.
+                    if _full_word_count >= 1 and not _full_already_has_terminal and not _full_has_ellipsis:
+                        logger.info(
+                            f"[PUNC-SAFETY] Allowed terminal punct for sentence "
+                            f"({_full_word_count} words): "
+                            f"'{original}' → '{correction}'"
+                        )
+                        # Fall through to remaining rules (don't return yet)
+                    else:
+                        # Already has terminal punct or ends in ellipsis → REJECT
+                        logger.info(
+                            f"[PUNC-SAFETY] TerminalPunctuationGuard triggered: removing trailing punctuation "
+                            f"'{original}' → '{correction}'"
+                        )
+                        return False
+    # ── Rule 0b (Batch 4): Reject punct insertion when original has no punctuation ──
+    # If the original text has zero Arabic punctuation and the correction
+    # only adds commas/semicolons (not at the very end), it's overcorrection.
+    # This catches "already correct" texts that PuncAra sprinkles with commas.
+    orig_punct_count_r0b = sum(1 for c in original if c in ARABIC_PUNCT_CHARS)
+    if orig_punct_count_r0b == 0:
+        corr_punct_count_r0b = sum(1 for c in correction if c in ARABIC_PUNCT_CHARS)
+        if corr_punct_count_r0b > 0:
+            # Only allow if adding a single period/question at the very end
+            stripped_corr = correction.rstrip()
+            if stripped_corr and stripped_corr[-1] in '.؟?!':
+                # This is terminal punct (already handled by Rule 0)
+                pass
+            else:
+                # Mid-sentence punct insertion on a clean sentence → reject
+                logger.info(
+                    f"[PUNC-SAFETY] Rejected mid-sentence punct insertion on clean text: "
+                    f"'{original}' → '{correction}'"
+                )
+                return False
+    # ── Rule 0c (Batch 4 + FIX-26): Reject punctuation rearrangement/substitution ──
+    # When original already has punctuation and the correction merely MOVES,
+    # SUBSTITUTES, or STACKS marks (e.g., ، → : or ، → ؛ or ؟ → ؟!), reject.
+    # The PuncAra model should NOT replace or pile onto existing punctuation —
+    # a sentence that already ends with punctuation must never get a second
+    # mark added next to it.
+    orig_punct_count_r0c = sum(1 for c in original if c in ARABIC_PUNCT_CHARS)
+    corr_punct_count_r0c = sum(1 for c in correction if c in ARABIC_PUNCT_CHARS)
+    if orig_punct_count_r0c > 0 and corr_punct_count_r0c > 0:
+        # Both have punctuation — check if alpha content is the same
+        orig_alpha_r0c = re.sub(r'[.,،؛؟!:;?\s]', '', original)
+        corr_alpha_r0c = re.sub(r'[.,،؛؟!:;?\s]', '', correction)
+        if _normalize_for_comparison(orig_alpha_r0c) == _normalize_for_comparison(corr_alpha_r0c):
+            # Same word content, but punct changed — reject any punct modification,
+            # whether it's a substitution or an addition on top of existing punct.
+            logger.info(
+                f"[PUNC-SAFETY] Rejected punct substitution/stacking: "
+                f"'{original}' → '{correction}'"
+            )
+            return False
+    # ── Rule 1: Alphabetic content must be identical after normalization ──
+    orig_alpha = re.sub(r'[.,،؛؟!:;?\s]', '', original)
+    corr_alpha = re.sub(r'[.,،؛؟!:;?\s]', '', correction)
+    if _normalize_for_comparison(orig_alpha) != _normalize_for_comparison(corr_alpha):
+        return False
+    # ── Rule 2: Reject excessive repetition (3+ consecutive identical) ──
+    if re.search(r'([.,،؛؟!:;?])\1{2,}', correction):
+        return False
+    # ── Shared computation for Rules 3–5 ──
+    orig_punct_count = sum(1 for c in original if c in ARABIC_PUNCT_CHARS)
+    corr_punct_count = sum(1 for c in correction if c in ARABIC_PUNCT_CHARS)
+    punct_delta = max(0, corr_punct_count - orig_punct_count)
+    word_count = len(re.findall(r'[\u0600-\u06FFa-zA-Z]+', correction)) or 1
+    # ── Rule 3: Short-text hybrid cap (≤2 words → max 1 mark added) ──
+    if word_count <= 2 and punct_delta > MAX_PUNCT_DELTA_SHORT:
+        return False
+    # ── Rule 4: Ratio-based spam protection (multi-word diffs) ──
+    if word_count > 2 and punct_delta / word_count > MAX_PUNCT_RATIO:
+        return False
+    # ── Rule 5: Absolute delta cap ──
+    if punct_delta > MAX_PUNCT_DELTA:
+        return False
+    return True

archive/old_tests/deep_dive_expanded.json ADDED Viewed

	@@ -0,0 +1,1323 @@

+{
+  "timestamp": "2026-06-20T19:17:40.208323+00:00",
+  "api_base": "https://bayan10-bayan-api.hf.space",
+  "health": {
+    "environment": "huggingface_spaces",
+    "mode": "hf_spaces_local",
+    "models": {
+      "autocomplete": true,
+      "grammar": true,
+      "punctuation": true,
+      "spelling": true,
+      "summarization": true
+    },
+    "note": "Free tier: summarization local, other models return input unchanged",
+    "status": "healthy",
+    "supabase": {
+      "configured": true
+    }
+  },
+  "cat1": [
+    {
+      "id": "C1-01",
+      "category": 1,
+      "input": "كانت الفتيات يلعبون في الحديقه",
+      "a_spelling": "كانت الفتيات يلعبون في الحديقة",
+      "a_grammar_on_original": "كانت الفتيات يلعبن في الحديقة",
+      "a_grammar_on_spell_corrected": "كانت الفتيات يلعبن في الحديقة",
+      "a_punctuation": "كانت الفتيات يلعبون في الحديقه.",
+      "grammar_diff_orig_vs_spell": [],
+      "b_corrected": "كانت الفتيات يلعبن في الحديقة.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "يلعبن",
+          "end": 19,
+          "id": "48d68e84-19e8-4af0-b1ea-b7ae4c7427f4",
+          "locked": true,
+          "original": "يلعبون",
+          "priority": 3,
+          "start": 13,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "الحديقة.",
+          "end": 30,
+          "id": "0012d2dc-08f6-44ad-a9d1-e07230045474",
+          "locked": true,
+          "original": "الحديقه",
+          "priority": 2,
+          "start": 23,
+          "type": "punctuation"
+        }
+      ]
+    },
+    {
+      "id": "C1-02",
+      "category": 1,
+      "input": "ان الطالبات ذهبو الى الجامعه",
+      "a_spelling": "ان الط ابت ذهبوا إلى الجامعه",
+      "a_grammar_on_original": "إن الطالبات ذهبن إلى الجامعة",
+      "a_grammar_on_spell_corrected": "إن الطلاب ذهبوا إلى الجامعة",
+      "a_punctuation": "ان الطالبات ذهبو الى الجامعه!",
+      "grammar_diff_orig_vs_spell": [
+        {
+          "word_idx": 1,
+          "gram_on_orig": "الطالبات",
+          "gram_on_spell": "الطلاب"
+        },
+        {
+          "word_idx": 2,
+          "gram_on_orig": "ذهبن",
+          "gram_on_spell": "ذهبوا"
+        }
+      ],
+      "b_corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "ذهبن",
+          "end": 16,
+          "id": "8a21b71d-8a87-4e29-a828-4ce2b343ae2a",
+          "locked": true,
+          "original": "ذهبو",
+          "priority": 3,
+          "start": 12,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "الجامعة.",
+          "end": 28,
+          "id": "0c9e127e-8e64-4cc4-a928-03651da1dd15",
+          "locked": true,
+          "original": "الجامعه",
+          "priority": 2,
+          "start": 21,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "إن",
+          "end": 2,
+          "id": "156cd1ce-37cd-4ade-888c-9e8d12a83b05",
+          "locked": true,
+          "original": "ان",
+          "priority": 1,
+          "start": 0,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "ذهبوا",
+            "ال",
+            "الم",
+            "الى"
+          ],
+          "confidence": 0.9,
+          "correction": "ذهبوا",
+          "end": 20,
+          "id": "836d7346-3ea8-4851-bc69-53df3e1ff6b4",
+          "locked": true,
+          "original": "الى",
+          "priority": 1,
+          "start": 17,
+          "type": "spelling"
+        }
+      ]
+    },
+    {
+      "id": "C1-03",
+      "category": 1,
+      "input": "هذة المدينه جميله جدا ومناخها معتدل",
+      "a_spelling": "هذة المدينه جميله جدا ومناخها معتدل",
+      "a_grammar_on_original": "هذه المدينة جميلة جدا ومناخها معتدل",
+      "a_grammar_on_spell_corrected": "هذه المدينة جميلة جدا ومناخها معتدل",
+      "a_punctuation": "هذة المدينه جميله جدا ومناخها معتدل.",
+      "grammar_diff_orig_vs_spell": [],
+      "b_corrected": "هذه المدينة جميلة جدا ومناخها معتدل.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "معتدل.",
+          "end": 35,
+          "id": "b960d84a-bcf6-40c3-976e-29632ad7f302",
+          "locked": true,
+          "original": "معتدل",
+          "priority": 2,
+          "start": 30,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "هذه المدينة جميلة",
+          "end": 17,
+          "id": "a89acaad-669a-4867-9ebd-6bd5cbfad2ea",
+          "locked": true,
+          "original": "هذة المدينه جميله",
+          "priority": 1,
+          "start": 0,
+          "type": "spelling"
+        }
+      ]
+    },
+    {
+      "id": "C1-04",
+      "category": 1,
+      "input": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه",
+      "a_spelling": "الطلاب الاجتهادوا في دراستهم وحققوا نتائج ممتازه",
+      "a_grammar_on_original": "الطلاب اجتهدو في دراستهم وحققوا نتائج ممتازة",
+      "a_grammar_on_spell_corrected": "الطلاب اجتهدوا في دراستهم وحققوا نتائج ممتازة",
+      "a_punctuation": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه.",
+      "grammar_diff_orig_vs_spell": [
+        {
+          "word_idx": 1,
+          "gram_on_orig": "اجتهدو",
+          "gram_on_spell": "اجتهدوا"
+        }
+      ],
+      "b_corrected": "الطلاب اجتهدو في دراستهم وحققوا نتائج ممتازة.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "ممتازة.",
+          "end": 43,
+          "id": "19a6216b-b7a1-4c0f-acd9-5c698617443b",
+          "locked": true,
+          "original": "ممتازه",
+          "priority": 2,
+          "start": 37,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [
+            "وحققوا",
+            "وحقوق",
+            "وحقق",
+            "وحققو"
+          ],
+          "confidence": 0.9,
+          "correction": "وحققوا",
+          "end": 30,
+          "id": "cdcf61bd-8983-44c8-9c3d-8792dc8027c3",
+          "locked": true,
+          "original": "وحققو",
+          "priority": 1,
+          "start": 25,
+          "type": "spelling"
+        }
+      ]
+    },
+    {
+      "id": "C1-05",
+      "category": 1,
+      "input": "ذهب الولد الى المكتبه وقرا كتاب مفيد",
+      "a_spelling": "ذهب الولد إلى المكتبة وقرا كتاب مفيد",
+      "a_grammar_on_original": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
+      "a_grammar_on_spell_corrected": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
+      "a_punctuation": "ذهب الولد الى المكتبه وقرا، كتاب مفيد",
+      "grammar_diff_orig_vs_spell": [],
+      "b_corrected": "ذهب الولد إلى المكتبة وقرا كتابا مفيدا",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "كتابا مفيدا",
+          "end": 36,
+          "id": "ede929d9-8112-4d65-a460-7a44cee535f5",
+          "locked": true,
+          "original": "كتاب مفيد",
+          "priority": 3,
+          "start": 27,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [
+            "إلى",
+            "ال",
+            "الم",
+            "الى"
+          ],
+          "confidence": 0.9,
+          "correction": "إلى",
+          "end": 13,
+          "id": "affb4882-0466-4184-93e7-fb3463132a83",
+          "locked": true,
+          "original": "الى",
+          "priority": 1,
+          "start": 10,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "المكتبة",
+            "المكتب",
+            "المشتبه",
+            "المكتبه"
+          ],
+          "confidence": 0.9,
+          "correction": "المكتبة",
+          "end": 21,
+          "id": "3f73b099-d428-4c05-98f8-138fb1d83c54",
+          "locked": true,
+          "original": "المكتبه",
+          "priority": 1,
+          "start": 14,
+          "type": "spelling"
+        }
+      ]
+    }
+  ],
+  "cat7": [
+    {
+      "id": "C7-01",
+      "category": 7,
+      "input": "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب",
+      "desc": "3-stage chain: spelling الى→إلى, grammar المدرسه→المدرسة, punc adds marks",
+      "a_spelling": "ذهب الولد إلى المدرسه وقابل المعلمه وأخذ الكتاب",
+      "a_grammar": "ذهب الولد إلى المدرسة وقابل المعلمة وأخذ الكتاب",
+      "a_punc": "ذهب الولد الى المدرسه وقابل، المعلمه واخذ الكتاب،",
+      "b_corrected": "ذهب الولد إلى المدرسة وقابل المعلمة وأخ�� الكتاب.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "الكتاب.",
+          "end": 47,
+          "id": "3e740303-1dcd-42ec-bd6c-7f0af8069e44",
+          "locked": true,
+          "original": "الكتاب",
+          "priority": 2,
+          "start": 41,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "المدرسة",
+          "end": 21,
+          "id": "84f953ae-2d0a-4e99-a07d-7d35638ba843",
+          "locked": true,
+          "original": "المدرسه",
+          "priority": 1,
+          "start": 14,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "المعلمة",
+          "end": 35,
+          "id": "2e793ab2-29fc-454b-8490-ca7cfdfe4404",
+          "locked": true,
+          "original": "المعلمه",
+          "priority": 1,
+          "start": 28,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "إلى",
+            "ال",
+            "الم",
+            "الى"
+          ],
+          "confidence": 0.9,
+          "correction": "إلى",
+          "end": 13,
+          "id": "d1b951c7-f94e-4941-986c-8ce5fa51bab0",
+          "locked": true,
+          "original": "الى",
+          "priority": 1,
+          "start": 10,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "وأخذ",
+            "والذ",
+            "واخت",
+            "واخذ"
+          ],
+          "confidence": 0.9,
+          "correction": "وأخذ",
+          "end": 40,
+          "id": "c024f6ae-4740-4d87-9a0d-95818699e4f3",
+          "locked": true,
+          "original": "واخذ",
+          "priority": 1,
+          "start": 36,
+          "type": "spelling"
+        }
+      ],
+      "b_suggestion_count": 5,
+      "overlapping_suggestions": []
+    },
+    {
+      "id": "C7-02",
+      "category": 7,
+      "input": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
+      "desc": "Multiple overlapping corrections across all stages",
+      "a_spelling": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
+      "a_grammar": "كانت البنات يلعبن في الحديقة الجميلة وفجأة سقطت أختهن",
+      "a_punc": "كانت البنات يلعبون في الحديقه الجميله وفجأه، سقطت احداهن",
+      "b_corrected": "كانت البنات يلعبن في الحديقة الجميلة وفجأة سقطت أختهن.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "يلعبن",
+          "end": 18,
+          "id": "2d1e033f-60d9-4921-bb65-d13b890f44d4",
+          "locked": true,
+          "original": "يلعبون",
+          "priority": 3,
+          "start": 12,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "أختهن.",
+          "end": 55,
+          "id": "409bca6e-33d5-4339-8d75-d8dc1e3a8cea",
+          "locked": true,
+          "original": "احداهن",
+          "priority": 2,
+          "start": 49,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "الحديقة الجميلة وفجأة",
+          "end": 43,
+          "id": "ac5b5008-0563-4168-99aa-46c8de59d1d1",
+          "locked": true,
+          "original": "الحديقه الجميله وفجأه",
+          "priority": 1,
+          "start": 22,
+          "type": "spelling"
+        }
+      ],
+      "b_suggestion_count": 3,
+      "overlapping_suggestions": []
+    },
+    {
+      "id": "C7-03",
+      "category": 7,
+      "input": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة",
+      "desc": "Long sentence with corrections from all 3 stages",
+      "a_spelling": "أن الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثة ولذا ذلك يجب الاهتمام بة",
+      "a_grammar": "إن الذكاء الاصطناعي يؤدي دورا مهمّا في تطوير التكنولوجيا الحديثة ولذلك يجب الاهتمام به",
+      "a_punc": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك؛ يجب الاهتمام بة",
+      "b_corrected": "أن الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثة ولذا ذلك يجب الاهتمام به",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "به",
+          "end": 86,
+          "id": "0a44f7d9-1554-428c-809a-706631ffebdd",
+          "locked": true,
+          "original": "بة",
+          "priority": 1,
+          "start": 84,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "أن",
+            "ال",
+            "من",
+            "ان"
+          ],
+          "confidence": 0.9,
+          "correction": "أن",
+          "end": 2,
+          "id": "512b3c30-eadb-40fc-a481-1d4cd0909459",
+          "locked": true,
+          "original": "ان",
+          "priority": 1,
+          "start": 0,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "الحديثة",
+            "الحديث",
+            "الحديثي",
+            "الحديثه"
+          ],
+          "confidence": 0.9,
+          "correction": "الحديثة",
+          "end": 63,
+          "id": "9eb1f571-7852-4813-975d-f7cd79102ec8",
+          "locked": true,
+          "original": "الحديثه",
+          "priority": 1,
+          "start": 56,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "ولذا ذلك",
+            "ولذالك"
+          ],
+          "confidence": 0.85,
+          "correction": "ولذا ذلك",
+          "end": 70,
+          "id": "266db5db-ed2a-49be-accb-76db9e07697f",
+          "locked": true,
+          "original": "ولذالك",
+          "priority": 1,
+          "start": 64,
+          "type": "spelling"
+        }
+      ],
+      "b_suggestion_count": 4,
+      "overlapping_suggestions": []
+    },
+    {
+      "id": "C7-04",
+      "category": 7,
+      "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
+      "desc": "Multiple ه→ة fixes: does grammar lock prevent punc from adding marks near those words?",
+      "a_spelling": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكان طيبون جدا",
+      "a_grammar": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
+      "a_punc": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا.",
+      "b_corrected": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام وسكانها طيبون جدا.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "جدا.",
+          "end": 64,
+          "id": "0dd9d98a-f146-492b-87c7-dba4913bdfd4",
+          "locked": true,
+          "original": "جدا",
+          "priority": 2,
+          "start": 61,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "هذه المدينة جميلة",
+          "end": 17,
+          "id": "e12aa2f9-63d7-4f10-a128-13b0b1bbee9a",
+          "locked": true,
+          "original": "هذة المدينه جميله",
+          "priority": 1,
+          "start": 0,
+          "type": "spelling"
+        }
+      ],
+      "b_suggestion_count": 2,
+      "overlapping_suggestions": []
+    },
+    {
+      "id": "C7-05",
+      "category": 7,
+      "input": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
+      "desc": "Heavy corrections needed across stages",
+      "a_spelling": "الطلاب اللذين اجتهد في دراستهم حقوق نتائج ممتازه في الأمتحانات الصعبه",
+      "a_grammar": "الطلاب الذين اجتهدو في دراستهم حققوا نتائج ممتازة في الامتحانات الصعبة",
+      "a_punc": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه.",
+      "b_corrected": "الطلاب اللذين اجتهد في دراستهم حققوا نتائج ممتازة في الأمتحانات الصعبة.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "حققوا",
+          "end": 36,
+          "id": "b2ae6a56-0879-4572-837f-875895de9020",
+          "locked": true,
+          "original": "حققو",
+          "priority": 3,
+          "start": 32,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "الصعبة.",
+          "end": 70,
+          "id": "b075d0de-0e6d-4d88-a897-8e79e1845116",
+          "locked": true,
+          "original": "الصعبه",
+          "priority": 2,
+          "start": 64,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "ممتازة",
+          "end": 49,
+          "id": "ee092cbc-bc6f-41f0-9c98-7cc2edeee671",
+          "locked": true,
+          "original": "ممتازه",
+          "priority": 1,
+          "start": 43,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [
+            "اجتهد",
+            "اجتهدو"
+          ],
+          "confidence": 0.9,
+          "correction": "اجتهد",
+          "end": 20,
+          "id": "97a2307e-5a5a-4668-a4fe-052bbf86c4d9",
+          "locked": true,
+          "original": "اجتهدو",
+          "priority": 1,
+          "start": 14,
+          "type": "spelling"
+        }
+      ],
+      "b_suggestion_count": 4,
+      "overlapping_suggestions": []
+    }
+  ],
+  "cat8x": [
+    {
+      "id": "C8X-مدرسة-al",
+      "category": 8,
+      "input": "المدرسة",
+      "root": "مدرسة",
+      "prefix_combo": "al",
+      "track_a_spelling": "المدرسة",
+      "changed": false
+    },
+    {
+      "id": "C8X-مدرسة-wal",
+      "category": 8,
+      "input": "والمدرسة",
+      "root": "مدرسة",
+      "prefix_combo": "wal",
+      "track_a_spelling": "والمدرسة في المدرسة",
+      "changed": true
+    },
+    {
+      "id": "C8X-مدرسة-bal",
+      "category": 8,
+      "input": "بالمدرسة",
+      "root": "مدرسة",
+      "prefix_combo": "bal",
+      "track_a_spelling": "بالمدرسة في المدرسة",
+      "changed": true
+    },
+    {
+      "id": "C8X-مدرسة-lal",
+      "category": 8,
+      "input": "للمدرسة",
+      "root": "مدرسة",
+      "prefix_combo": "lal",
+      "track_a_spelling": "للمدرسة",
+      "changed": false
+    },
+    {
+      "id": "C8X-شمس-al",
+      "category": 8,
+      "input": "الشمس",
+      "root": "شمس",
+      "prefix_combo": "al",
+      "track_a_spelling": "الشمس",
+      "changed": false
+    },
+    {
+      "id": "C8X-شمس-wal",
+      "category": 8,
+      "input": "والشمس",
+      "root": "شمس",
+      "prefix_combo": "wal",
+      "track_a_spelling": "والشمس والشمس",
+      "changed": true
+    },
+    {
+      "id": "C8X-شمس-bal",
+      "category": 8,
+      "input": "بالشمس",
+      "root": "شمس",
+      "prefix_combo": "bal",
+      "track_a_spelling": "الشمس",
+      "changed": true
+    },
+    {
+      "id": "C8X-شمس-lal",
+      "category": 8,
+      "input": "للشمس",
+      "root": "شمس",
+      "prefix_combo": "lal",
+      "track_a_spelling": "للشمس",
+      "changed": false
+    },
+    {
+      "id": "C8X-أمة-al",
+      "category": 8,
+      "input": "الأمة",
+      "root": "أمة",
+      "prefix_combo": "al",
+      "track_a_spelling": "الأمة",
+      "changed": false
+    },
+    {
+      "id": "C8X-أمة-wal",
+      "category": 8,
+      "input": "والأمة",
+      "root": "أمة",
+      "prefix_combo": "wal",
+      "track_a_spelling": "والأمة الأمة",
+      "changed": true
+    },
+    {
+      "id": "C8X-أمة-bal",
+      "category": 8,
+      "input": "بالأمة",
+      "root": "أمة",
+      "prefix_combo": "bal",
+      "track_a_spelling": "الأمة",
+      "changed": true
+    },
+    {
+      "id": "C8X-أمة-lal",
+      "category": 8,
+      "input": "للأمة",
+      "root": "أمة",
+      "prefix_combo": "lal",
+      "track_a_spelling": "للأمة",
+      "changed": false
+    },
+    {
+      "id": "C8X-نافذة-al",
+      "category": 8,
+      "input": "النافذة",
+      "root": "نافذة",
+      "prefix_combo": "al",
+      "track_a_spelling": "النافذة",
+      "changed": false
+    },
+    {
+      "id": "C8X-نافذة-wal",
+      "category": 8,
+      "input": "والنافذة",
+      "root": "نافذة",
+      "prefix_combo": "wal",
+      "track_a_spelling": "النافذة",
+      "changed": true
+    },
+    {
+      "id": "C8X-نافذة-bal",
+      "category": 8,
+      "input": "بالنافذة",
+      "root": "نافذة",
+      "prefix_combo": "bal",
+      "track_a_spelling": "النافذة",
+      "changed": true
+    },
+    {
+      "id": "C8X-نافذة-lal",
+      "category": 8,
+      "input": "للنافذة",
+      "root": "نافذة",
+      "prefix_combo": "lal",
+      "track_a_spelling": "النافذة",
+      "changed": true
+    },
+    {
+      "id": "C8X-علم-al",
+      "category": 8,
+      "input": "العلم",
+      "root": "علم",
+      "prefix_combo": "al",
+      "track_a_spelling": "العلم",
+      "changed": false
+    },
+    {
+      "id": "C8X-علم-wal",
+      "category": 8,
+      "input": "والعلم",
+      "root": "علم",
+      "prefix_combo": "wal",
+      "track_a_spelling": "والعلم هو العلم",
+      "changed": true
+    },
+    {
+      "id": "C8X-علم-bal",
+      "category": 8,
+      "input": "بالعلم",
+      "root": "علم",
+      "prefix_combo": "bal",
+      "track_a_spelling": "العلم بالعلم",
+      "changed": true
+    },
+    {
+      "id": "C8X-علم-lal",
+      "category": 8,
+      "input": "للعلم",
+      "root": "علم",
+      "prefix_combo": "lal",
+      "track_a_spelling": "للعلم",
+      "changed": false
+    },
+    {
+      "id": "C8X-اقتصاد-al",
+      "category": 8,
+      "input": "الاقتصاد",
+      "root": "اقتصاد",
+      "prefix_combo": "al",
+      "track_a_spelling": "الاقتصاد",
+      "changed": false
+    },
+    {
+      "id": "C8X-اقتصاد-wal",
+      "category": 8,
+      "input": "والاقتصاد",
+      "root": "اقتصاد",
+      "prefix_combo": "wal",
+      "track_a_spelling": "والاقتصاد",
+      "changed": false
+    },
+    {
+      "id": "C8X-اقتصاد-bal",
+      "category": 8,
+      "input": "بالاقتصاد",
+      "root": "اقتصاد",
+      "prefix_combo": "bal",
+      "track_a_spelling": "بالاقتصاد في الاقتصاد",
+      "changed": true
+    },
+    {
+      "id": "C8X-اقتصاد-lal",
+      "category": 8,
+      "input": "للاقتصاد",
+      "root": "اقتصاد",
+      "prefix_combo": "lal",
+      "track_a_spelling": "للاقتصاد الاقتصادي",
+      "changed": true
+    }
+  ],
+  "cat9x": [
+    {
+      "id": "C9X-01",
+      "category": 9,
+      "input": "إنّ",
+      "context": "isolation",
+      "concern": "stays إنّ",
+      "track_a_spelling": "إن إن",
+      "changed": true
+    },
+    {
+      "id": "C9X-02",
+      "category": 9,
+      "input": "أنّ",
+      "context": "isolation",
+      "concern": "stays أنّ",
+      "track_a_spelling": "أن أن",
+      "changed": true
+    },
+    {
+      "id": "C9X-03",
+      "category": 9,
+      "input": "إنّ العلم نور",
+      "context": "sentence",
+      "concern": "إنّ stays",
+      "track_a_spelling": "إن العلم نور",
+      "changed": true
+    },
+    {
+      "id": "C9X-04",
+      "category": 9,
+      "input": "علمت أنّ الامتحان صعب",
+      "context": "sentence",
+      "concern": "أنّ stays",
+      "track_a_spelling": "علمت أن الامتحان صعب",
+      "changed": true
+    },
+    {
+      "id": "C9X-05",
+      "category": 9,
+      "input": "علي",
+      "context": "isolation",
+      "concern": "could be name علي or على",
+      "track_a_spelling": "علي",
+      "changed": false
+    },
+    {
+      "id": "C9X-06",
+      "category": 9,
+      "input": "ذهب علي إلى المدرسة",
+      "context": "sentence",
+      "concern": "علي is a name here",
+      "track_a_spelling": "ذهب علي إلى المدرسة",
+      "changed": false
+    },
+    {
+      "id": "C9X-07",
+      "category": 9,
+      "input": "جلس علي الكرسي",
+      "context": "sentence",
+      "concern": "AMBIGUOUS: علي=name or على=on",
+      "track_a_spelling": "جلس علي الكرسي",
+      "changed": false
+    }
+  ],
+  "cat10x": [
+    {
+      "id": "C10X-01a",
+      "category": 10,
+      "input": "الحديقه جميلة جدا",
+      "concern": "error_at_start",
+      "track_a_spelling": "الحديقه جميلة جدا",
+      "a_changed": false,
+      "track_b_corrected": "الحديقة جميلة جدا.",
+      "track_b_suggestions": 2
+    },
+    {
+      "id": "C10X-01b",
+      "category": 10,
+      "input": "الجو حار في الحديقه",
+      "concern": "error_at_end",
+      "track_a_spelling": "الجو حار في الحديقة",
+      "a_changed": true,
+      "track_b_corrected": "الجو حار في الحديقة.",
+      "track_b_suggestions": 1
+    },
+    {
+      "id": "C10X-02a",
+      "category": 10,
+      "input": "الى المدرسة ذهب الولد",
+      "concern": "error_at_start",
+      "track_a_spelling": "إلى المدرسة ذهب الولد",
+      "a_changed": true,
+      "track_b_corrected": "إلى المدرسة ذهب الولد.",
+      "track_b_suggestions": 2
+    },
+    {
+      "id": "C10X-02b",
+      "category": 10,
+      "input": "ذهب الولد الى المدرسة",
+      "concern": "error_at_end",
+      "track_a_spelling": "ذهب الولد إلى المدرسة",
+      "a_changed": true,
+      "track_b_corrected": "ذهب الولد إلى المدرسة.",
+      "track_b_suggestions": 2
+    },
+    {
+      "id": "C10X-DRIFT",
+      "category": 10,
+      "input_len": 713,
+      "word_count": 119,
+      "total_suggestions": 16,
+      "front_half_suggestions": 11,
+      "back_half_suggestions": 5,
+      "coordinate_mismatches": [],
+      "suggestions_detail": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "إحدىهن وبدأت",
+          "end": 62,
+          "id": "e892df95-0d05-40bd-969a-ccda1305cf2c",
+          "locked": true,
+          "original": "احداهن وبدءت",
+          "priority": 3,
+          "start": 50,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "اجتهدوا",
+          "end": 243,
+          "id": "eef5aad7-31f7-4c1e-8095-88dbdda98944",
+          "locked": true,
+          "original": "اجتهدو",
+          "priority": 3,
+          "start": 237,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "حققوا",
+          "end": 259,
+          "id": "abfaa89c-119e-4899-9456-6ee78c929298",
+          "locked": true,
+          "original": "حققو",
+          "priority": 3,
+          "start": 255,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "",
+          "end": 712,
+          "id": "afcc69ca-f5d8-4907-b85a-e348d0d06a12",
+          "locked": true,
+          "original": "بين الأشجار",
+          "priority": 3,
+          "start": 701,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "محمد،",
+          "end": 282,
+          "id": "4eef8996-7a31-4d0e-83ca-e05604b975e0",
+          "locked": true,
+          "original": "محمد",
+          "priority": 2,
+          "start": 278,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "جمهورية،",
+          "end": 424,
+          "id": "7e82e486-59af-4002-be9b-5b202dfe8492",
+          "locked": true,
+          "original": "جمهورية",
+          "priority": 2,
+          "start": 417,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "بين،",
+          "end": 497,
+          "id": "6b95fcb5-e190-4dec-8d69-22520c1bb6fe",
+          "locked": true,
+          "original": "بين",
+          "priority": 2,
+          "start": 494,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "الحديقة الجميلة وفجأة",
+          "end": 44,
+          "id": "73493796-1711-4996-9ee4-7013191bc9d8",
+          "locked": true,
+          "original": "الحديقه الجميله وفجأه",
+          "priority": 1,
+          "start": 23,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "بشدة",
+          "end": 72,
+          "id": "13c914ea-5b75-4128-aa42-05576b3d55ae",
+          "locked": true,
+          "original": "بشده",
+          "priority": 1,
+          "start": 68,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "إلى المدرسة",
+          "end": 94,
+          "id": "4e827496-d6f1-4a53-b2f7-c78c2d911195",
+          "locked": true,
+          "original": "الى المدرسه",
+          "priority": 1,
+          "start": 83,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "المعلمة وأخذ",
+          "end": 113,
+          "id": "d7548b59-6379-4b95-a5ed-806e5d1d0cfb",
+          "locked": true,
+          "original": "المعلمه واخذ",
+          "priority": 1,
+          "start": 101,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "أن",
+          "end": 123,
+          "id": "aa9b0140-5740-4343-a3a2-3adfa61fa9d9",
+          "locked": true,
+          "original": "ان",
+          "priority": 1,
+          "start": 121,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "هذه المدينة جميلة",
+          "end": 194,
+          "id": "03378376-a164-46c8-8493-55a0dcd97e3e",
+          "locked": true,
+          "original": "هذة المدينه جميله",
+          "priority": 1,
+          "start": 177,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "ممتازة",
+          "end": 272,
+          "id": "b3aac62f-6a7c-4625-b608-5258fea91fcd",
+          "locked": true,
+          "original": "ممتازه",
+          "priority": 1,
+          "start": 266,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "هذه المحاضرة",
+          "end": 632,
+          "id": "e7994f0f-dc27-4c01-b055-0040683a7643",
+          "locked": true,
+          "original": "هذة المحاضره",
+          "priority": 1,
+          "start": 620,
+          "type": "spelling"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "أهمية",
+          "end": 641,
+          "id": "f68bfac9-17f8-4bbe-9def-1ee35e6ac76a",
+          "locked": true,
+          "original": "اهمية",
+          "priority": 1,
+          "start": 636,
+          "type": "spelling"
+        }
+      ]
+    }
+  ],
+  "cat11": [
+    {
+      "id": "C11-01",
+      "category": 11,
+      "input": "",
+      "desc": "empty_string",
+      "input_len": 0,
+      "crashed": false,
+      "b_corrected": "",
+      "b_suggestions": 0,
+      "error": "HTTP 400: {\"error\":\"Text is required\",\"status\":\"error\"}\n"
+    },
+    {
+      "id": "C11-02",
+      "category": 11,
+      "input": " ",
+      "desc": "whitespace_only",
+      "input_len": 1,
+      "crashed": false,
+      "b_corrected": " ",
+      "b_suggestions": 0,
+      "error": "HTTP 400: {\"error\":\"Text is required\",\"status\":\"error\"}\n"
+    },
+    {
+      "id": "C11-03",
+      "category": 11,
+      "input": "أ",
+      "desc": "single_char",
+      "input_len": 1,
+      "crashed": false,
+      "b_corrected": "أ؟",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-04",
+      "category": 11,
+      "input": "مستشفياتهم",
+      "desc": "long_single_word",
+      "input_len": 10,
+      "crashed": false,
+      "b_corrected": "في مستشفيات هم",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-05",
+      "category": 11,
+      "input": "ذهبالولدالىالمدرسةوقابلالمعلمة",
+      "desc": "no_spaces",
+      "input_len": 30,
+      "crashed": false,
+      "b_corrected": "ذهبالولدالىالمدرسةوقابلالمعلمة.",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-06",
+      "category": 11,
+      "input": "...!؟،،؛؛::...",
+      "desc": "all_punctuation",
+      "input_len": 14,
+      "crashed": false,
+      "b_corrected": ". ! ؟ ، ؛ ::.",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-07",
+      "category": 11,
+      "input": "(([{هذا النص}]))",
+      "desc": "unbalanced_brackets",
+      "input_len": 16,
+      "crashed": false,
+      "b_corrected": "( ( [ { هذا النص } ] ، و",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-08",
+      "category": 11,
+      "input": "\"هذا\" 'نص' «اختبار»",
+      "desc": "mixed_quotes",
+      "input_len": 19,
+      "crashed": false,
+      "b_corrected": "\" هذا \" مُنصا ' ' « اختبارا »",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-09",
+      "category": 11,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
+      "desc": "len_299",
+      "input_len": 299,
+      "crashed": false,
+      "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء الاالعميق،ناعي التقنيات التالتعلم،م العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ا",
+      "b_suggestions": 7,
+      "error": null
+    },
+    {
+      "id": "C11-10",
+      "category": 11,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
+      "desc": "len_300",
+      "input_len": 300,
+      "crashed": false,
+      "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء االعميق،صطنالذكاء، التقنيات التعلم العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ال",
+      "b_suggestions": 14,
+      "error": null
+    },
+    {
+      "id": "C11-11",
+      "category": 11,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطناعي تقنيات التعلم العميق يستخدم الذكاء الاصطن",
+      "desc": "len_301",
+      "input_len": 301,
+      "crashed": false,
+      "b_corrected": "يستخدم الذكاء الاصطناعي تقنيات، التعلم العميق تستخدم الذكاء االعميق،صطنالذكاء، التقنيات التعلم العميق يستخدم الذكاء الاصطناعية تقنيات التعلم العميقة يستخدم الذكاء الصناعي تقنيات التعلم عميقا يستخدم ال",
+      "b_suggestions": 14,
+      "error": null
+    },
+    {
+      "id": "C11-12",
+      "category": 11,
+      "input": "يلعب الطلاب في الحديقه بعد المدرسه وقبل العشاء",
+      "desc": "multi_stage_disagreement",
+      "input_len": 46,
+      "crashed": false,
+      "b_corrected": "يلعب الطلاب في الحديقة بعد المدرسة وقبل العشاء.",
+      "b_suggestions": 3,
+      "error": null
+    },
+    {
+      "id": "C11-13",
+      "category": 11,
+      "input": "الحمد لله",
+      "desc": "model_returns_identical",
+      "input_len": 9,
+      "crashed": false,
+      "b_corrected": "الحمد لله.",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-14",
+      "category": 11,
+      "input": "مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مرحبا مر",
+      "desc": "100x_repeated_word",
+      "input_len": 600,
+      "crashed": false,
+      "b_corrected": "مرحبا مرحبا مرحبا ومرحبا مرحبا مرحبامرحبا مرحبا مرحبا مرحب مرحبا مرحبا وسهلا مرحبا مرحبا ترحيبا مرحبا مرحبا يا مرحبا مرحبا نرحب مرحبا مرحبا ترحيب مرحبا مرحبا أهلا مرحبا مرحبا اهلا مرحبا مرحبا وداعا مر",
+      "b_suggestions": 4,
+      "error": null
+    },
+    {
+      "id": "C11-15",
+      "category": 11,
+      "input": "I went to the مدرسة and met the معلم in the فصل",
+      "desc": "heavy_code_switch",
+      "input_len": 47,
+      "crashed": false,
+      "b_corrected": "I went to the مدرسة and met the معلم in the الفصل.",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-16",
+      "category": 11,
+      "input": "ايش هالحكي يا زلمة",
+      "desc": "levantine_dialect",
+      "input_len": 18,
+      "crashed": false,
+      "b_corrected": "إيش هالحكي يا زلمة؟",
+      "b_suggestions": 2,
+      "error": null
+    },
+    {
+      "id": "C11-17",
+      "category": 11,
+      "input": "شنو تسوي هسه",
+      "desc": "iraqi_dialect",
+      "input_len": 12,
+      "crashed": false,
+      "b_corrected": "شنو تسوي هسة",
+      "b_suggestions": 1,
+      "error": null
+    },
+    {
+      "id": "C11-RACE",
+      "category": 11,
+      "input": "كانت الفتيات يلعبون في الحديقه",
+      "desc": "parallel_race_condition",
+      "r1_corrected": "كانت الفتيات يلعبن في الحديقة.",
+      "r2_corrected": "كانت الفتيات يلعبن في الحديقة.",
+      "r1_suggestions": 2,
+      "r2_suggestions": 2,
+      "identical": true
+    }
+  ]
+}

archive/old_tests/deep_dive_expanded.py ADDED Viewed

	@@ -0,0 +1,428 @@

+"""
+BAYAN Deep-Dive Test Harness — EXPANDED (ALL Categories)
+Covers every item from the original prompt that was missing.
+"""
+import sys, os, re, json, time, argparse, concurrent.futures
+from datetime import datetime, timezone
+import requests
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 60
+def api_call(endpoint, text, retries=2):
+    url = f"{API_BASE}{endpoint}"
+    for attempt in range(retries + 1):
+        try:
+            t0 = time.time()
+            resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
+            elapsed = int((time.time() - t0) * 1000)
+            if resp.status_code == 200:
+                data = resp.json()
+                data['_elapsed_ms'] = elapsed
+                data['_timestamp'] = datetime.now(timezone.utc).isoformat()
+                return data
+            else:
+                if attempt < retries:
+                    time.sleep(2)
+                    continue
+                return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
+        except requests.exceptions.Timeout:
+            return {"error": f"Timeout after {TIMEOUT}s"}
+        except Exception as e:
+            return {"error": str(e)}
+def track_a_spelling(text):
+    r = api_call("/api/spelling", text)
+    if "error" in r and "corrected_text" not in r:
+        return {"input": text, "output": text, "error": r["error"], "changed": False}
+    c = r.get("corrected_text", text)
+    return {"input": text, "output": c, "changed": c != text, "elapsed_ms": r.get("_elapsed_ms")}
+def track_a_grammar(text):
+    r = api_call("/api/grammar", text)
+    if "error" in r and "corrected_text" not in r:
+        return {"input": text, "output": text, "error": r["error"], "changed": False}
+    c = r.get("corrected_text", text)
+    return {"input": text, "output": c, "changed": c != text, "elapsed_ms": r.get("_elapsed_ms"), "timestamp": r.get("_timestamp")}
+def track_a_punctuation(text):
+    r = api_call("/api/punctuation", text)
+    if "error" in r and "corrected_text" not in r:
+        return {"input": text, "output": text, "error": r["error"], "changed": False}
+    c = r.get("corrected_text", text)
+    PUNC = '.,;:!?،؛؟'
+    return {"input": text, "output": c, "changed": c != text,
+            "marks_added": sum(1 for ch in c if ch in PUNC) - sum(1 for ch in text if ch in PUNC),
+            "elapsed_ms": r.get("_elapsed_ms")}
+def track_b_analyze(text):
+    r = api_call("/api/analyze", text)
+    if "error" in r and "suggestions" not in r:
+        return {"input": text, "error": r["error"], "suggestions": [], "corrected": text}
+    return {
+        "input": text, "original": r.get("original", text),
+        "corrected": r.get("corrected", text),
+        "suggestions": r.get("suggestions", []),
+        "timing_ms": r.get("timing_ms", {}),
+        "elapsed_ms": r.get("_elapsed_ms"),
+    }
+def log(msg):
+    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 1 — Cross-model mismatch
+# ═══════════════════════════════════════════════════════════════════
+def run_cat1():
+    log("=== CATEGORY 1: Cross-model mismatch ===")
+    results = []
+    inputs = [
+        {"id": "C1-01", "input": "كانت الفتيات يلعبون في الحديقه"},
+        {"id": "C1-02", "input": "ان الطالبات ذهبو الى الجامعه"},
+        {"id": "C1-03", "input": "هذة المدينه جميله جدا ومناخها معتدل"},
+        {"id": "C1-04", "input": "الطلاب اجتهدو في دراستهم وحققو نتائج ممتازه"},
+        {"id": "C1-05", "input": "ذهب الولد الى المكتبه وقرا كتاب مفيد"},
+    ]
+    for test in inputs:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        # Track A: each model on ORIGINAL independently
+        a_spell = track_a_spelling(test['input'])
+        a_gram_on_orig = track_a_grammar(test['input'])
+        # NEW: grammar on SPELLING-CORRECTED text
+        a_gram_on_spell = track_a_grammar(a_spell['output'])
+        a_punc = track_a_punctuation(test['input'])
+        # Track B
+        b = track_b_analyze(test['input'])
+        # Diff: grammar on original vs grammar on spell-corrected
+        gram_orig_words = a_gram_on_orig['output'].split()
+        gram_spell_words = a_gram_on_spell['output'].split()
+        gram_diff = []
+        for i, (w1, w2) in enumerate(zip(gram_orig_words, gram_spell_words)):
+            if w1 != w2:
+                gram_diff.append({"word_idx": i, "gram_on_orig": w1, "gram_on_spell": w2})
+        result = {
+            "id": test['id'], "category": 1, "input": test['input'],
+            "a_spelling": a_spell['output'],
+            "a_grammar_on_original": a_gram_on_orig['output'],
+            "a_grammar_on_spell_corrected": a_gram_on_spell['output'],
+            "a_punctuation": a_punc['output'],
+            "grammar_diff_orig_vs_spell": gram_diff,
+            "b_corrected": b.get('corrected', ''),
+            "b_suggestions": b.get('suggestions', []),
+        }
+        log(f"    A_spell: {a_spell['output'][:60]}")
+        log(f"    A_gram(orig): {a_gram_on_orig['output'][:60]}")
+        log(f"    A_gram(spell): {a_gram_on_spell['output'][:60]}")
+        log(f"    Grammar diff: {gram_diff}")
+        log(f"    B_final: {b.get('corrected','')[:60]}")
+        results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 7 — StageLocker adversarial tests
+# ═══════════════════════════════════════════════════════════════════
+def run_cat7():
+    log("=== CATEGORY 7: StageLocker directionality ===")
+    results = []
+    # 3+ chained mutations: spelling changes length, grammar changes length, punc adds marks
+    inputs = [
+        {"id": "C7-01", "input": "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب",
+         "desc": "3-stage chain: spelling الى→إلى, grammar المدرسه→المدرسة, punc adds marks"},
+        {"id": "C7-02", "input": "كانت البنات يلعبون في الحديقه الجميله وفجأه سقطت احداهن",
+         "desc": "Multiple overlapping corrections across all stages"},
+        {"id": "C7-03", "input": "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة",
+         "desc": "Long sentence with corrections from all 3 stages"},
+        {"id": "C7-04", "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون جدا",
+         "desc": "Multiple ه→ة fixes: does grammar lock prevent punc from adding marks near those words?"},
+        {"id": "C7-05", "input": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
+         "desc": "Heavy corrections needed across stages"},
+    ]
+    for test in inputs:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        a_spell = track_a_spelling(test['input'])
+        a_gram = track_a_grammar(test['input'])
+        a_punc = track_a_punctuation(test['input'])
+        b = track_b_analyze(test['input'])
+        # Check: are any suggestions at positions that overlap with corrections from earlier stages?
+        sugg = b.get('suggestions', [])
+        overlaps = []
+        for i, s1 in enumerate(sugg):
+            for j, s2 in enumerate(sugg):
+                if i < j and s1.get('start',0) < s2.get('end',0) and s2.get('start',0) < s1.get('end',0):
+                    overlaps.append({"s1": s1, "s2": s2})
+        result = {
+            "id": test['id'], "category": 7, "input": test['input'],
+            "desc": test['desc'],
+            "a_spelling": a_spell['output'],
+            "a_grammar": a_gram['output'],
+            "a_punc": a_punc['output'],
+            "b_corrected": b.get('corrected', ''),
+            "b_suggestions": sugg,
+            "b_suggestion_count": len(sugg),
+            "overlapping_suggestions": overlaps,
+        }
+        log(f"    B_final: {b.get('corrected','')[:60]}")
+        log(f"    Suggestions: {len(sugg)}, Overlaps: {len(overlaps)}")
+        results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 8 EXPANDED — with ال + prefix combos
+# ═══════════════════════════════════════════════════════════════════
+def run_cat8_expanded():
+    log("=== CATEGORY 8 EXPANDED: ال + prefix combos ===")
+    results = []
+    combos = [
+        # root, al_form, wal_form, bal_form, lal_form
+        ("مدرسة", "المدرسة", "والمدرسة", "بالمدرسة", "للمدرسة"),
+        ("شمس", "الشمس", "والشمس", "بالشمس", "للشمس"),
+        ("أمة", "الأمة", "والأمة", "بالأمة", "للأمة"),
+        ("نافذة", "النافذة", "والنافذة", "بالنافذة", "للنافذة"),
+        ("علم", "العلم", "والعلم", "بالعلم", "للعلم"),
+        ("اقتصاد", "الاقتصاد", "والاقتصاد", "بالاقتصاد", "للاقتصاد"),
+    ]
+    for root, al, wal, bal, lal in combos:
+        for label, word in [("al", al), ("wal", wal), ("bal", bal), ("lal", lal)]:
+            a = track_a_spelling(word)
+            result = {
+                "id": f"C8X-{root}-{label}", "category": 8, "input": word,
+                "root": root, "prefix_combo": label,
+                "track_a_spelling": a['output'], "changed": a.get('changed', False),
+            }
+            if a.get('changed'):
+                log(f"  ⚠ C8X-{root}-{label}: '{word}' → '{a['output']}'")
+            results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 9 EXPANDED — missing pairs
+# ═══════════════════════════════════════════════════════════════════
+def run_cat9_expanded():
+    log("=== CATEGORY 9 EXPANDED: Missing confusable pairs ===")
+    results = []
+    tests = [
+        # إنّ / أنّ (with shadda)
+        {"id": "C9X-01", "input": "إنّ", "context": "isolation", "concern": "stays إنّ"},
+        {"id": "C9X-02", "input": "أنّ", "context": "isolation", "concern": "stays أنّ"},
+        {"id": "C9X-03", "input": "إنّ العلم نور", "context": "sentence", "concern": "إنّ stays"},
+        {"id": "C9X-04", "input": "علمت أنّ الامتحان صعب", "context": "sentence", "concern": "أنّ stays"},
+        # على vs علي (name)
+        {"id": "C9X-05", "input": "علي", "context": "isolation", "concern": "could be name علي or على"},
+        {"id": "C9X-06", "input": "ذهب علي إلى المدرسة", "context": "sentence", "concern": "علي is a name here"},
+        {"id": "C9X-07", "input": "جلس علي الكرسي", "context": "sentence", "concern": "AMBIGUOUS: علي=name or على=on"},
+    ]
+    for test in tests:
+        a = track_a_spelling(test['input'])
+        result = {
+            "id": test['id'], "category": 9, "input": test['input'],
+            "context": test['context'], "concern": test['concern'],
+            "track_a_spelling": a['output'], "changed": a.get('changed', False),
+        }
+        if a.get('changed'):
+            log(f"  ⚠ {test['id']}: '{test['input']}' → '{a['output']}' ({test['concern']})")
+        else:
+            log(f"  ✓ {test['id']}: no change")
+        results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 10 EXPANDED — sentence position + 200-word drift test
+# ═══════════════════════════════════════════════════════════════════
+def run_cat10_expanded():
+    log("=== CATEGORY 10 EXPANDED: Position + Cumulative drift ===")
+    results = []
+    # Same error at sentence start vs middle
+    log("  Sentence-initial vs mid-sentence:")
+    position_tests = [
+        {"id": "C10X-01a", "input": "الحديقه جميلة جدا", "concern": "error_at_start"},
+        {"id": "C10X-01b", "input": "الجو حار في الحديقه", "concern": "error_at_end"},
+        {"id": "C10X-02a", "input": "الى المدرسة ذهب الولد", "concern": "error_at_start"},
+        {"id": "C10X-02b", "input": "ذهب الولد الى المدرسة", "concern": "error_at_end"},
+    ]
+    for test in position_tests:
+        a = track_a_spelling(test['input'])
+        b = track_b_analyze(test['input'])
+        result = {
+            "id": test['id'], "category": 10, "input": test['input'],
+            "concern": test['concern'],
+            "track_a_spelling": a['output'], "a_changed": a.get('changed', False),
+            "track_b_corrected": b.get('corrected', ''),
+            "track_b_suggestions": len(b.get('suggestions', [])),
+        }
+        log(f"    {test['id']}: A='{a['output'][:40]}' B_sugg={len(b.get('suggestions',[]))}")
+        results.append(result)
+    # 200+ word cumulative drift test
+    log("\n  200+ word cumulative drift test:")
+    long_text = (
+        "كانت الفتيات يلعبون في الحديقه الجميله وفجأه سقطت احداهن وبدءت تبكي بشده "
+        "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب "
+        "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا "
+        "هذة المدينه جميله جدا ومناخها معتدل طوال العام "
+        "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه "
+        "سافر محمد إلى دبي للعمل في شركة جوجل وقابل أصدقاءه القدامى "
+        "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات الضخمة "
+        "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها وتقع على ضفاف نهر النيل "
+        "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية في فصل الصيف "
+        "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين "
+        "بسم الله الرحمن الرحيم نبدأ هذة المحاضره عن اهمية التعليم "
+        "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار "
+    )
+    word_count = len(long_text.split())
+    log(f"    Input: {word_count} words, {len(long_text)} chars")
+    b = track_b_analyze(long_text)
+    sugg = b.get('suggestions', [])
+    # Check coordinates in the back half
+    mid_char = len(long_text) // 2
+    back_half_sugg = [s for s in sugg if s.get('start', 0) >= mid_char]
+    front_half_sugg = [s for s in sugg if s.get('start', 0) < mid_char]
+    # Verify coordinates: does original[start:end] == suggestion['original']?
+    coord_mismatches = []
+    for s in sugg:
+        start, end = s.get('start', 0), s.get('end', 0)
+        expected_text = long_text[start:end]
+        actual_text = s.get('original', '')
+        if expected_text != actual_text:
+            coord_mismatches.append({
+                "start": start, "end": end,
+                "expected_from_coords": expected_text,
+                "actual_in_suggestion": actual_text,
+                "correction": s.get('correction', ''),
+                "type": s.get('type', ''),
+            })
+    result = {
+        "id": "C10X-DRIFT", "category": 10, "input_len": len(long_text),
+        "word_count": word_count,
+        "total_suggestions": len(sugg),
+        "front_half_suggestions": len(front_half_sugg),
+        "back_half_suggestions": len(back_half_sugg),
+        "coordinate_mismatches": coord_mismatches,
+        "suggestions_detail": sugg,
+    }
+    log(f"    Total suggestions: {len(sugg)} (front: {len(front_half_sugg)}, back: {len(back_half_sugg)})")
+    log(f"    Coordinate mismatches: {len(coord_mismatches)}")
+    for m in coord_mismatches:
+        log(f"      [{m['start']}:{m['end']}] expected='{m['expected_from_coords']}' got='{m['actual_in_suggestion']}'")
+    results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# CATEGORY 11 — Genuine stress tests / edge cases
+# ═══════════════════════════════════════════════════════════════════
+def run_cat11():
+    log("=== CATEGORY 11: Edge case discovery (stress tests) ===")
+    results = []
+    tests = [
+        # Pathological inputs
+        {"id": "C11-01", "input": "", "desc": "empty_string"},
+        {"id": "C11-02", "input": " ", "desc": "whitespace_only"},
+        {"id": "C11-03", "input": "أ", "desc": "single_char"},
+        {"id": "C11-04", "input": "مستشفياتهم", "desc": "long_single_word"},
+        {"id": "C11-05", "input": "ذهبالولدالىالمدرسةوقابلالمعلمة", "desc": "no_spaces"},
+        {"id": "C11-06", "input": "...!؟،،؛؛::...", "desc": "all_punctuation"},
+        {"id": "C11-07", "input": "(([{هذا النص}]))", "desc": "unbalanced_brackets"},
+        {"id": "C11-08", "input": "\"هذا\" 'نص' «اختبار»", "desc": "mixed_quotes"},
+        # Boundary lengths (299, 300, 301 chars)
+        {"id": "C11-09", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:299], "desc": "len_299"},
+        {"id": "C11-10", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:300], "desc": "len_300"},
+        {"id": "C11-11", "input": ("يستخدم الذكاء الاصطناعي تقنيات التعلم العميق " * 10)[:301], "desc": "len_301"},
+        # Max disagreement: word that is both plausible spelling error AND grammatically ambiguous
+        {"id": "C11-12", "input": "يلعب الطلاب في الحديقه بعد المدرسه وقبل العشاء", "desc": "multi_stage_disagreement"},
+        # Correction identical to original (model returns same text)
+        {"id": "C11-13", "input": "الحمد لله", "desc": "model_returns_identical"},
+        # Very long repetitive text
+        {"id": "C11-14", "input": "مرحبا " * 100, "desc": "100x_repeated_word"},
+        # Mixed Arabic and English heavily
+        {"id": "C11-15", "input": "I went to the مدرسة and met the معلم in the فصل", "desc": "heavy_code_switch"},
+        # Dialectal variations
+        {"id": "C11-16", "input": "ايش هالحكي يا زلمة", "desc": "levantine_dialect"},
+        {"id": "C11-17", "input": "شنو تسوي هسه", "desc": "iraqi_dialect"},
+    ]
+    for test in tests:
+        log(f"  {test['id']}: '{test['input'][:40]}...' [{test['desc']}]")
+        # Track B only for stress tests (we want to see if pipeline crashes)
+        b = track_b_analyze(test['input'])
+        crashed = "error" in b and "suggestions" not in b
+        result = {
+            "id": test['id'], "category": 11, "input": test['input'][:200],
+            "desc": test['desc'], "input_len": len(test['input']),
+            "crashed": crashed,
+            "b_corrected": b.get('corrected', '')[:200] if not crashed else "CRASH",
+            "b_suggestions": len(b.get('suggestions', [])),
+            "error": b.get('error', None),
+        }
+        status = "💥 CRASH" if crashed else f"✓ ({len(b.get('suggestions',[]))} sugg)"
+        log(f"    {status}")
+        results.append(result)
+    # Race condition: 2 parallel requests with same input
+    log("\n  Race condition test (2 parallel requests):")
+    race_input = "كانت الفتيات يلعبون في الحديقه"
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as ex:
+        f1 = ex.submit(track_b_analyze, race_input)
+        f2 = ex.submit(track_b_analyze, race_input)
+        r1, r2 = f1.result(), f2.result()
+    race_match = r1.get('corrected') == r2.get('corrected') and len(r1.get('suggestions',[])) == len(r2.get('suggestions',[]))
+    race_result = {
+        "id": "C11-RACE", "category": 11, "input": race_input,
+        "desc": "parallel_race_condition",
+        "r1_corrected": r1.get('corrected', ''),
+        "r2_corrected": r2.get('corrected', ''),
+        "r1_suggestions": len(r1.get('suggestions', [])),
+        "r2_suggestions": len(r2.get('suggestions', [])),
+        "identical": race_match,
+    }
+    log(f"    Race test: identical={race_match}")
+    results.append(race_result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# MAIN
+# ═══════════════════════════════════════════════════════════════════
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--stage', choices=['cat1', 'cat7', 'cat8x', 'cat9x', 'cat10x', 'cat11', 'all'], default='all')
+    args = parser.parse_args()
+    all_results = {"timestamp": datetime.now(timezone.utc).isoformat(), "api_base": API_BASE}
+    # Health check
+    log(f"Health check: {API_BASE}")
+    try:
+        resp = requests.get(f"{API_BASE}/api/health", timeout=10)
+        log(f"  OK: {resp.status_code}")
+        all_results['health'] = resp.json()
+    except Exception as e:
+        log(f"  FAIL: {e}")
+        return
+    if args.stage in ('cat1', 'all'):
+        all_results['cat1'] = run_cat1()
+    if args.stage in ('cat7', 'all'):
+        all_results['cat7'] = run_cat7()
+    if args.stage in ('cat8x', 'all'):
+        all_results['cat8x'] = run_cat8_expanded()
+    if args.stage in ('cat9x', 'all'):
+        all_results['cat9x'] = run_cat9_expanded()
+    if args.stage in ('cat10x', 'all'):
+        all_results['cat10x'] = run_cat10_expanded()
+    if args.stage in ('cat11', 'all'):
+        all_results['cat11'] = run_cat11()
+    output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_expanded.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(all_results, f, ensure_ascii=False, indent=2)
+    log(f"\nSaved to {output_path}")
+if __name__ == '__main__':
+    main()

archive/old_tests/deep_dive_gaps.json ADDED Viewed

	@@ -0,0 +1,260 @@

+{
+  "timestamp": "2026-06-20T19:33:59.110768+00:00",
+  "gap1_drift": {
+    "word_count": 187,
+    "char_count": 1104,
+    "total_suggestions": 0,
+    "front_half": 0,
+    "back_half": 0,
+    "coordinate_mismatches": [],
+    "a_spelling_changed": true,
+    "a_grammar_changed": true,
+    "a_punc_changed": true,
+    "suggestions": []
+  },
+  "gap2_priority": [
+    {
+      "id": "G2-01",
+      "input": "الطلاب اجتهدو في الامتحان",
+      "desc": "اجتهدو — spelling should add ا, grammar may do different fix. Grammar wins (priority 3 > 1)",
+      "a_spelling": "الطلاب اجتهدو في الامتحان",
+      "a_grammar": "الطلاب اجتهدو في الامتحان",
+      "a_punctuation": "الطلاب اجتهدو في الامتحان.",
+      "b_corrected": "الطلاب اجتهد في الامتحين",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "الامتحين",
+          "end": 25,
+          "id": "502647e7-18fd-41d2-b1c7-2978a3ee7704",
+          "locked": true,
+          "original": "الامتحان",
+          "priority": 3,
+          "start": 17,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [
+            "اجتهد",
+            "اجتهدو"
+          ],
+          "confidence": 0.9,
+          "correction": "اجتهد",
+          "end": 13,
+          "id": "0a93f071-af36-4219-b6f5-d11e748c4601",
+          "locked": true,
+          "original": "اجتهدو",
+          "priority": 1,
+          "start": 7,
+          "type": "spelling"
+        }
+      ]
+    },
+    {
+      "id": "G2-02",
+      "input": "البنات ذهبو الى البيت",
+      "desc": "ذهبو — spelling could give ذهبوا, grammar could give ذهبن (fem). Grammar wins.",
+      "a_spelling": "البنات ذهبو إلى البيت",
+      "a_grammar": "البنات ذهبن الى البيت",
+      "a_punctuation": "البنات ذهبو الى البيت.",
+      "b_corrected": "البنات ذهبن إلى البيت.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "ذهبن",
+          "end": 11,
+          "id": "0f00a9ab-1166-4e4d-8dd7-ae6dba1f9f1e",
+          "locked": true,
+          "original": "ذهبو",
+          "priority": 3,
+          "start": 7,
+          "type": "grammar"
+        },
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "البيت.",
+          "end": 21,
+          "id": "af3a0a21-5e1e-45f5-a1ad-9c3730b4ab25",
+          "locked": true,
+          "original": "البيت",
+          "priority": 2,
+          "start": 16,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [
+            "إلى",
+            "ال",
+            "الم",
+            "الى"
+          ],
+          "confidence": 0.9,
+          "correction": "إلى",
+          "end": 15,
+          "id": "1b7096dc-6043-4e1a-9de3-d59204327b86",
+          "locked": true,
+          "original": "الى",
+          "priority": 1,
+          "start": 12,
+          "type": "spelling"
+        }
+      ]
+    },
+    {
+      "id": "G2-03",
+      "input": "وفجأه سقطت الكتب",
+      "desc": "وفجأه — spelling may fix ه→ة; punctuation may want comma after it. Overlap?",
+      "a_spelling": "وفجأه سقطت الكتب",
+      "a_grammar": "وفجأة سقطت الكتب",
+      "a_punctuation": "وفجأه سقطت الكتب.",
+      "b_corrected": "وفجأة سقطت الكتب.",
+      "b_suggestions": [
+        {
+          "alternatives": [],
+          "confidence": 0.8,
+          "correction": "الكتب.",
+          "end": 16,
+          "id": "fc257e46-4368-4d32-acb0-de5b6d461aaf",
+          "locked": true,
+          "original": "الكتب",
+          "priority": 2,
+          "start": 11,
+          "type": "punctuation"
+        },
+        {
+          "alternatives": [],
+          "confidence": 1.0,
+          "correction": "وفجأة",
+          "end": 5,
+          "id": "7397e7e6-e238-4ed4-a184-461f576a74f6",
+          "locked": true,
+          "original": "وفجأه",
+          "priority": 1,
+          "start": 0,
+          "type": "spelling"
+        }
+      ]
+    }
+  ],
+  "gap3_dropped": {
+    "tests": [
+      {
+        "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
+        "a_spell_diffs": [
+          {
+            "word_idx": 2,
+            "original": "اجتهدو",
+            "corrected": "اجتهد"
+          },
+          {
+            "word_idx": 5,
+            "original": "حققو",
+            "corrected": "حقوق"
+          }
+        ],
+        "a_gram_diffs": [
+          {
+            "word_idx": 5,
+            "original": "حققو",
+            "corrected": "حققوا"
+          },
+          {
+            "word_idx": 7,
+            "original": "ممتازه",
+            "corrected": "ممتازة"
+          }
+        ],
+        "a_punc_diffs": [
+          {
+            "word_idx": 9,
+            "original": "الامتحانات",
+            "corrected": "الامتحانات."
+          }
+        ],
+        "b_suggestion_count": 4,
+        "dropped_spell": [],
+        "dropped_gram": [],
+        "dropped_punc": []
+      }
+    ]
+  },
+  "gap4_rare": {
+    "tests": [
+      {
+        "id": "R-01",
+        "input": "استوقفني المشهد فتأملته مليا",
+        "domain": "literary",
+        "output": "استوقفني المشهد فتأملتة مليا",
+        "changed": true
+      },
+      {
+        "id": "R-02",
+        "input": "تستأثر القوى العظمى بالنفوذ الدولي",
+        "domain": "political_literary",
+        "output": "تستأثر القوى العظمى بالنفوذ الدولي",
+        "changed": false
+      },
+      {
+        "id": "R-03",
+        "input": "استقطب المؤتمر ثلة من العلماء الأفذاذ",
+        "domain": "formal_rare",
+        "output": "استقطب المؤتمر ثلة من العلماء الأفذاذ",
+        "changed": false
+      },
+      {
+        "id": "R-04",
+        "input": "يتسنى للمرء أن يستشف الحقيقة من بين السطور",
+        "domain": "literary_verb",
+        "output": "يتسنى للمرء أن يكتشف الحقيقة من بين السطور",
+        "changed": true
+      },
+      {
+        "id": "R-05",
+        "input": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين",
+        "domain": "oratory",
+        "output": "ألقى المحاضر خطبة علماء استحوذت على إعجاب الحاضرين",
+        "changed": true
+      },
+      {
+        "id": "R-06",
+        "input": "تمخض الاجتماع عن قرارات مصيرية",
+        "domain": "formal_verb",
+        "output": "تمخض الاجتماع عن قرارات مصيرية",
+        "changed": false
+      },
+      {
+        "id": "R-07",
+        "input": "أرهقته المسغبة فاستكان للقدر",
+        "domain": "classical",
+        "output": "طريقتة المسببة فاستكان القدر",
+        "changed": true
+      },
+      {
+        "id": "R-08",
+        "input": "نستشرف آفاق المستقبل بثقة واقتدار",
+        "domain": "formal_speech",
+        "output": "نستشرف آفاق المستقبل بثقة واقتدار",
+        "changed": false
+      },
+      {
+        "id": "R-09",
+        "input": "اعتراه القلق فتملكه الأرق",
+        "domain": "literary_psych",
+        "output": "اعتراه القلق فتملكة الأرق",
+        "changed": true
+      },
+      {
+        "id": "R-10",
+        "input": "استأنف العمل بعد فترة من التقاعس",
+        "domain": "formal_verb",
+        "output": "استأنف العمل بعد فترة من التقاعد",
+        "changed": true
+      }
+    ],
+    "fp_count": 6,
+    "total": 10
+  }
+}

archive/old_tests/deep_dive_gaps.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+Gap-filler tests for items explicitly requested in the prompt but not yet covered:
+1. 200+ word cumulative drift test (Cat 10)
+2. Lower-priority-wins limitation (Cat 4)
+3. Systematic dropped patch logging (Cat 3)
+4. Rare/literary vocabulary overcorrection (Cat 2)
+"""
+import sys, os, json, time, requests
+from datetime import datetime, timezone
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 60
+def api_call(endpoint, text, retries=2):
+    url = f"{API_BASE}{endpoint}"
+    for attempt in range(retries + 1):
+        try:
+            t0 = time.time()
+            resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
+            elapsed = int((time.time() - t0) * 1000)
+            if resp.status_code == 200:
+                data = resp.json()
+                data['_elapsed_ms'] = elapsed
+                return data
+            else:
+                if attempt < retries:
+                    time.sleep(2)
+                    continue
+                return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
+        except Exception as e:
+            return {"error": str(e)}
+def log(msg):
+    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
+results = {"timestamp": datetime.now(timezone.utc).isoformat()}
+# ═══════════════════════════════════════════════════════════════
+# GAP 1: 200+ word cumulative drift test (Cat 10)
+# ═══════════════════════════════════════════════════════════════
+log("=== GAP 1: 200+ word cumulative drift test ===")
+# Build a 200+ word paragraph with deliberate errors throughout
+long_para = (
+    "كانت الفتيات يلعبون في الحديقه الجميله وفجأه سقطت احداهن وبدءت تبكي بشده "
+    "ذهب الولد الى المدرسه وقابل المعلمه واخذ الكتاب وبدأ يقرأ بتركيز شديد "
+    "ان الذكاء الاصطناعي يلعب دورا هاما في تطوير التكنولوجيا الحديثه ولذالك يجب الاهتمام بة "
+    "هذة المدينه جميله جدا ومناخها معتدل طوال العام وسكانها طيبون ومحبون للخير "
+    "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات النهائيه "
+    "سافر محمد إلى دبي للعمل في شركة جوجل وقابل أصدقاءه القدامى هناك "
+    "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات الضخمة والتحليل "
+    "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها وتقع على ضفاف نهر النيل العظيم "
+    "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية في فصل الصيف الحار "
+    "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين إياك نعبد وإياك نستعين "
+    "بسم الله الرحمن الرحيم نبدأ هذة المحاضره عن اهمية التعليم في حياة الانسان "
+    "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار والزهور الجميلة "
+    "إن العلم نور والجهل ظلام فاحرصوا على طلب العلم من المهد إلى اللحد "
+    "كان الرجل يمشي في الشارع وفجأه رأى صديقه القديم فسلم عليه وتحدثا طويلا "
+    "المعلم الذي يحب عمله يجتهد في تعليم طلابه ويحرص على نجاحهم في الحياه "
+)
+word_count = len(long_para.split())
+char_count = len(long_para)
+log(f"  Input: {word_count} words, {char_count} chars")
+# Track A: each model on the full long text
+log("  Running Track A (each model independently on original)...")
+a_spell = api_call("/api/spelling", long_para)
+a_gram = api_call("/api/grammar", long_para)
+a_punc = api_call("/api/punctuation", long_para)
+# Track B: full pipeline
+log("  Running Track B (full pipeline)...")
+b = api_call("/api/analyze", long_para)
+sugg = b.get("suggestions", [])
+mid_char = char_count // 2
+# Verify ALL coordinates
+coord_mismatches = []
+for s in sugg:
+    start, end = s.get('start', 0), s.get('end', 0)
+    expected_text = long_para[start:end]
+    actual_text = s.get('original', '')
+    if expected_text != actual_text:
+        coord_mismatches.append({
+            "start": start, "end": end,
+            "expected": expected_text,
+            "actual": actual_text,
+            "correction": s.get('correction', ''),
+            "type": s.get('type', ''),
+        })
+back_half = [s for s in sugg if s.get('start', 0) >= mid_char]
+front_half = [s for s in sugg if s.get('start', 0) < mid_char]
+# Log every suggestion with its verified coordinate
+log(f"  Total: {len(sugg)} suggestions, {len(coord_mismatches)} coordinate mismatches")
+log(f"  Front half ({mid_char} chars): {len(front_half)} suggestions")
+log(f"  Back half: {len(back_half)} suggestions")
+for s in sugg:
+    st, en = s.get('start',0), s.get('end',0)
+    in_back = "BACK" if st >= mid_char else "FRONT"
+    verified = "✓" if long_para[st:en] == s.get('original','') else "✗ MISMATCH"
+    log(f"    [{in_back}] [{st}:{en}] '{s.get('original','')}' → '{s.get('correction','')}' ({s.get('type','')}) {verified}")
+for m in coord_mismatches:
+    log(f"    MISMATCH: [{m['start']}:{m['end']}] expected='{m['expected']}' actual='{m['actual']}'")
+results['gap1_drift'] = {
+    "word_count": word_count, "char_count": char_count,
+    "total_suggestions": len(sugg),
+    "front_half": len(front_half), "back_half": len(back_half),
+    "coordinate_mismatches": coord_mismatches,
+    "a_spelling_changed": a_spell.get("corrected_text","") != long_para,
+    "a_grammar_changed": a_gram.get("corrected_text","") != long_para,
+    "a_punc_changed": a_punc.get("corrected_text","") != long_para,
+    "suggestions": sugg,
+}
+# ═══════════════════════════════════════════════════════════════
+# GAP 2: Lower-priority-wins limitation doc (Cat 4)
+# ═══════════════════════════════════════════════════════════════
+log("\n=== GAP 2: Lower-priority stage was more important (Cat 4) ===")
+# Construct case: spelling corrects اجتهدو→اجتهدوا (correct, priority 1)
+# but grammar might also touch it with a different correction (priority 3)
+# Grammar WINS because higher priority. But what if grammar is wrong here?
+gap2_tests = [
+    {
+        "id": "G2-01",
+        "input": "الطلاب اجتهدو في الامتحان",
+        "desc": "اجتهدو — spelling should add ا, grammar may do different fix. Grammar wins (priority 3 > 1)",
+    },
+    {
+        "id": "G2-02",
+        "input": "البنات ذهبو الى البيت",
+        "desc": "ذهبو — spelling could give ذهبوا, grammar could give ذهبن (fem). Grammar wins.",
+    },
+    {
+        "id": "G2-03",
+        "input": "وفجأه سقطت الكتب",
+        "desc": "وفجأه — spelling may fix ه→ة; punctuation may want comma after it. Overlap?",
+    },
+]
+for test in gap2_tests:
+    log(f"  {test['id']}: {test['input']}")
+    a_sp = api_call("/api/spelling", test['input'])
+    a_gr = api_call("/api/grammar", test['input'])
+    a_pu = api_call("/api/punctuation", test['input'])
+    b = api_call("/api/analyze", test['input'])
+    a_sp_out = a_sp.get("corrected_text", test['input'])
+    a_gr_out = a_gr.get("corrected_text", test['input'])
+    a_pu_out = a_pu.get("corrected_text", test['input'])
+    log(f"    A_spell: {a_sp_out}")
+    log(f"    A_gram:  {a_gr_out}")
+    log(f"    A_punc:  {a_pu_out}")
+    log(f"    B_final: {b.get('corrected','')}")
+    log(f"    B_sugg:  {len(b.get('suggestions',[]))}")
+    # Which stage's correction won for each word?
+    b_sugg = b.get('suggestions', [])
+    for s in b_sugg:
+        log(f"      [{s.get('type','')}] [{s.get('start',0)}:{s.get('end',0)}] '{s.get('original','')}' → '{s.get('correction','')}'")
+    test['a_spelling'] = a_sp_out
+    test['a_grammar'] = a_gr_out
+    test['a_punctuation'] = a_pu_out
+    test['b_corrected'] = b.get('corrected', '')
+    test['b_suggestions'] = b_sugg
+results['gap2_priority'] = gap2_tests
+# ═══════════════════════════════════════════════════════════════
+# GAP 3: Systematic dropped patch logging (Cat 3)
+# ═══════════════════════════════════════════════════════════════
+log("\n=== GAP 3: Systematic dropped patch comparison (Cat 3) ===")
+# For each test: run all 3 models independently, count expected patches,
+# compare with actual Track B patches. Any patch Track A produces but
+# Track B doesn't = dropped patch.
+gap3_tests = [
+    "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده",
+    "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
+    "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
+    "ذهب الولد الى المكتبه وقرا كتاب مفيد",
+    "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
+]
+for i, text in enumerate(gap3_tests):
+    log(f"  Test {i+1}: {text[:50]}...")
+    a_sp = api_call("/api/spelling", text)
+    a_gr = api_call("/api/grammar", text)
+    a_pu = api_call("/api/punctuation", text)
+    b = api_call("/api/analyze", text)
+    a_sp_out = a_sp.get("corrected_text", text)
+    a_gr_out = a_gr.get("corrected_text", text)
+    a_pu_out = a_pu.get("corrected_text", text)
+    # Find word-level changes from each model
+    def word_diffs(orig, corrected):
+        o_words = orig.split()
+        c_words = corrected.split()
+        diffs = []
+        for j, (ow, cw) in enumerate(zip(o_words, c_words)):
+            if ow != cw:
+                diffs.append({"word_idx": j, "original": ow, "corrected": cw})
+        return diffs
+    sp_diffs = word_diffs(text, a_sp_out)
+    gr_diffs = word_diffs(text, a_gr_out)
+    pu_diffs = word_diffs(text, a_pu_out)
+    b_sugg = b.get('suggestions', [])
+    b_corrections = set()
+    for s in b_sugg:
+        b_corrections.add(s.get('original', ''))
+    # Track A produced these corrections; check which survived to Track B
+    dropped_spell = [d for d in sp_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
+    dropped_gram = [d for d in gr_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
+    dropped_punc = [d for d in pu_diffs if d['original'] not in b_corrections and d['corrected'] != d['original']]
+    log(f"    Track A changes: spell={len(sp_diffs)}, gram={len(gr_diffs)}, punc={len(pu_diffs)}")
+    log(f"    Track B suggestions: {len(b_sugg)}")
+    log(f"    Dropped: spell={len(dropped_spell)}, gram={len(dropped_gram)}, punc={len(dropped_punc)}")
+    for d in dropped_spell:
+        log(f"      DROPPED SPELL: '{d['original']}' → '{d['corrected']}' (reason: likely filter blocked)")
+    for d in dropped_gram:
+        log(f"      DROPPED GRAM: '{d['original']}' → '{d['corrected']}' (reason: likely StageLocker)")
+    for d in dropped_punc:
+        log(f"      DROPPED PUNC: '{d['original']}' → '{d['corrected']}' (reason: likely lock/cap/safety)")
+results[f'gap3_dropped'] = {
+    "tests": [
+        {
+            "input": text,
+            "a_spell_diffs": word_diffs(text, api_call("/api/spelling", text).get("corrected_text", text)) if False else sp_diffs,
+            "a_gram_diffs": gr_diffs,
+            "a_punc_diffs": pu_diffs,
+            "b_suggestion_count": len(b_sugg),
+            "dropped_spell": dropped_spell,
+            "dropped_gram": dropped_gram,
+            "dropped_punc": dropped_punc,
+        }
+        for text, sp_diffs, gr_diffs, pu_diffs, b_sugg in [(text, sp_diffs, gr_diffs, pu_diffs, b_sugg)]
+    ]
+}
+# ═══════════════════════════════════════════════════════════════
+# GAP 4: Rare/literary vocabulary (Cat 2)
+# ═══════════════════════════════════════════════════════════════
+log("\n=== GAP 4: Rare/literary vocabulary overcorrection (Cat 2) ===")
+rare_tests = [
+    {"id": "R-01", "input": "استوقفني المشهد فتأملته مليا", "domain": "literary"},
+    {"id": "R-02", "input": "تستأثر القوى العظمى بالنفوذ الدولي", "domain": "political_literary"},
+    {"id": "R-03", "input": "استقطب المؤتمر ثلة من العلماء الأفذاذ", "domain": "formal_rare"},
+    {"id": "R-04", "input": "يتسنى للمرء أن يستشف الحقيقة من بين السطور", "domain": "literary_verb"},
+    {"id": "R-05", "input": "ألقى المحاضر خطبة عصماء استحوذت على إعجاب الحاضرين", "domain": "oratory"},
+    {"id": "R-06", "input": "تمخض الاجتماع عن قرارات مصيرية", "domain": "formal_verb"},
+    {"id": "R-07", "input": "أرهقته المسغبة فاستكان للقدر", "domain": "classical"},
+    {"id": "R-08", "input": "نستشرف آفاق المستقبل بثقة واقتدار", "domain": "formal_speech"},
+    {"id": "R-09", "input": "اعتراه القلق فتملكه الأرق", "domain": "literary_psych"},
+    {"id": "R-10", "input": "استأنف العمل بعد فترة من التقاعس", "domain": "formal_verb"},
+]
+fp_count = 0
+for test in rare_tests:
+    a = api_call("/api/spelling", test['input'])
+    a_out = a.get("corrected_text", test['input'])
+    changed = a_out != test['input']
+    if changed:
+        fp_count += 1
+        log(f"  ⚠ {test['id']}: '{test['input'][:40]}...' → '{a_out[:40]}...' [{test['domain']}]")
+    else:
+        log(f"  ✓ {test['id']}: no change [{test['domain']}]")
+    test['output'] = a_out
+    test['changed'] = changed
+log(f"  Rare/literary FP rate: {fp_count}/{len(rare_tests)} ({fp_count*100//len(rare_tests)}%)")
+results['gap4_rare'] = {"tests": rare_tests, "fp_count": fp_count, "total": len(rare_tests)}
+# ═══════════════════════════════════════════════════════════════
+# SAVE
+# ═══════════════════════════════════════════════════════════════
+output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_gaps.json')
+with open(output_path, 'w', encoding='utf-8') as f:
+    json.dump(results, f, ensure_ascii=False, indent=2)
+log(f"\nSaved to {output_path}")

archive/old_tests/deep_dive_output.json ADDED Viewed

	@@ -0,0 +1,671 @@

+{
+  "timestamp": "2026-06-20T19:00:06.993902+00:00",
+  "api_base": "https://bayan10-bayan-api.hf.space",
+  "health": {
+    "environment": "huggingface_spaces",
+    "mode": "hf_spaces_local",
+    "models": {
+      "autocomplete": true,
+      "grammar": true,
+      "punctuation": true,
+      "spelling": true,
+      "summarization": true
+    },
+    "note": "Free tier: summarization local, other models return input unchanged",
+    "status": "healthy",
+    "supabase": {
+      "configured": true
+    }
+  },
+  "pipeline_tests": [
+    {
+      "id": "C3-01",
+      "category": 3,
+      "input": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده",
+      "track_a": {
+        "spelling": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشدة",
+        "spelling_changed": true,
+        "grammar": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحدىهن وبدأت تبكي بشدة",
+        "grammar_changed": true,
+        "punctuation": "كانت الفتيات يلعبون في الحديقه وفجأه، سقطت احداهن وبدءت تبكي بشده",
+        "punctuation_changed": true
+      },
+      "track_b": {
+        "corrected": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحدىهن وبدأت تبكي بشدة.",
+        "suggestions": [
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "يلعبن",
+            "end": 19,
+            "id": "e984c773-8d33-4a30-b5b8-49cee91e1095",
+            "locked": true,
+            "original": "يلعبون",
+            "priority": 3,
+            "start": 13,
+            "type": "grammar"
+          },
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "إحدىهن وبدأت",
+            "end": 54,
+            "id": "38054ed7-9bd2-4e04-9314-b4a63b84ad07",
+            "locked": true,
+            "original": "احداهن وبدءت",
+            "priority": 3,
+            "start": 42,
+            "type": "grammar"
+          },
+          {
+            "alternatives": [],
+            "confidence": 0.8,
+            "correction": "بشدة.",
+            "end": 64,
+            "id": "16e72e95-6326-4365-a0f3-ad2602bcfc49",
+            "locked": true,
+            "original": "بشده",
+            "priority": 2,
+            "start": 60,
+            "type": "punctuation"
+          },
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "الحديقة وفجأة",
+            "end": 36,
+            "id": "1de0b7c2-e2e5-45e3-8ba3-6fe062ee8fcc",
+            "locked": true,
+            "original": "الحديقه وفجأه",
+            "priority": 1,
+            "start": 23,
+            "type": "spelling"
+          }
+        ],
+        "timing_ms": {
+          "grammar_ms": 4561,
+          "punctuation_ms": 1492,
+          "spelling_ms": 1529,
+          "total_ms": 7587
+        }
+      }
+    },
+    {
+      "id": "C3-02",
+      "category": 3,
+      "input": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
+      "track_a": {
+        "spelling": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة",
+        "spelling_changed": false,
+        "grammar": "ان الذكاء الاصطناعي يلعب دورا هاما ولذلك يجب الاهتمام به",
+        "grammar_changed": true,
+        "punctuation": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك؛ يجب الاهتمام بة",
+        "punctuation_changed": true
+      },
+      "track_b": {
+        "corrected": "ان الذكاء الاصطناعي يلعب دورا هاما ولذلك يجب الاهتمام به",
+        "suggestions": [
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "ولذلك",
+            "end": 41,
+            "id": "9870eb8d-0bf7-4a58-90cb-940b5475a37e",
+            "locked": true,
+            "original": "ولذالك",
+            "priority": 3,
+            "start": 35,
+            "type": "grammar"
+          },
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "به",
+            "end": 57,
+            "id": "ea9f3fca-eee1-4597-8f4a-00f50558d510",
+            "locked": true,
+            "original": "بة",
+            "priority": 1,
+            "start": 55,
+            "type": "spelling"
+          }
+        ],
+        "timing_ms": {
+          "grammar_ms": 1304,
+          "punctuation_ms": 1050,
+          "spelling_ms": 1193,
+          "total_ms": 3549
+        }
+      }
+    },
+    {
+      "id": "C3-03",
+      "category": 3,
+      "input": "التزم الر��اضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب",
+      "track_a": {
+        "spelling": "التزم الرياضي بتناول وجبات الصحية وحساب سعادتة بدقة رغبة في بناء كتلة عملية قوية ويا له من التزام حديدي يثير الإعجاب",
+        "spelling_changed": true,
+        "grammar": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب",
+        "grammar_changed": false,
+        "punctuation": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة؛ في بناء كتلة عضلية قوية ويا له، من التزام حديدي يثير الإعجاب",
+        "punctuation_changed": true
+      },
+      "track_b": {
+        "corrected": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة؛ في بناء كتلة عضلية قوية ويا له، من التزام حديدي يثير الإعجاب",
+        "suggestions": [
+          {
+            "alternatives": [],
+            "confidence": 0.8,
+            "correction": "رغبة؛",
+            "end": 57,
+            "id": "b7e29bf0-2565-4b46-b815-58e1b56717c1",
+            "locked": true,
+            "original": "رغبة",
+            "priority": 2,
+            "start": 53,
+            "type": "punctuation"
+          },
+          {
+            "alternatives": [],
+            "confidence": 0.8,
+            "correction": "له،",
+            "end": 88,
+            "id": "6d1e2b65-d2a4-41f1-a803-ce06e93e79c4",
+            "locked": true,
+            "original": "له",
+            "priority": 2,
+            "start": 86,
+            "type": "punctuation"
+          }
+        ],
+        "timing_ms": {
+          "grammar_ms": 6012,
+          "punctuation_ms": 2080,
+          "spelling_ms": 2197,
+          "total_ms": 10291
+        }
+      }
+    },
+    {
+      "id": "C3-04",
+      "category": 3,
+      "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
+      "track_a": {
+        "spelling": "هذة المدينه جميله جدا ومناخها معتدل طوال العام",
+        "spelling_changed": false,
+        "grammar": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام",
+        "grammar_changed": true,
+        "punctuation": "هذة المدينه جميله جدا ومناخها معتدل طوال العام.",
+        "punctuation_changed": true
+      },
+      "track_b": {
+        "corrected": "هذه المدينة جميلة جدا ومناخها معتدل طوال العام.",
+        "suggestions": [
+          {
+            "alternatives": [],
+            "confidence": 0.8,
+            "correction": "العام.",
+            "end": 46,
+            "id": "a4de368f-4ae7-451a-bbe2-ff7fca6b3f3b",
+            "locked": true,
+            "original": "العام",
+            "priority": 2,
+            "start": 41,
+            "type": "punctuation"
+          },
+          {
+            "alternatives": [],
+            "confidence": 1.0,
+            "correction": "هذه المدينة جميلة",
+            "end": 17,
+            "id": "9ff77094-1e33-4946-a343-317f51b8b539",
+            "locked": true,
+            "original": "هذة المدينه جميله",
+            "priority": 1,
+            "start": 0,
+            "type": "spelling"
+          }
+        ],
+        "timing_ms": {
+          "grammar_ms": 1461,
+          "punctuation_ms": 804,
+          "spelling_ms": 970,
+          "total_ms": 3236
+        }
+      }
+    },
+    {
+      "id": "C3-05",
+      "category": 3,
+      "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
+      "track_a": {
+        "spelling": "الطلاب الذين اجتهد في دراستهم حقوق نتائج ممتازه في الامتحانات",
+        "spelling_changed": true,
+        "grammar": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
+        "grammar_changed": false,
+        "punctuation": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات",
+        "punctuation_changed": false
+      },
+      "track_b": {
+        "corrected": "",
+        "suggestions": [],
+        "timing_ms": {}
+      }
+    },
+    {
+      "id": "C4-01",
+      "category": 4,
+      "input": "كانت الفتيات يلعبون في الحديقه",
+      "runs": [
+        {
+          "run": 1,
+          "corrected": "",
+          "suggestions": []
+        },
+        {
+          "run": 2,
+          "corrected": "",
+          "suggestions": []
+        },
+        {
+          "run": 3,
+          "corrected": "",
+          "suggestions": []
+        }
+      ],
+      "deterministic": true
+    },
+    {
+      "id": "C4-02",
+      "category": 4,
+      "input": "ذهب الى المدرسه وقابل المعلمه",
+      "runs": [
+        {
+          "run": 1,
+          "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "المعلمة.",
+              "end": 29,
+              "id": "3579ef5d-9295-46a9-8056-5a0b15dced2d",
+              "locked": true,
+              "original": "المعلمه",
+              "priority": 2,
+              "start": 22,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "المدرسة",
+              "end": 15,
+              "id": "af7b8dd8-f85a-4632-a7c9-b9b733d7e019",
+              "locked": true,
+              "original": "المدرسه",
+              "priority": 1,
+              "start": 8,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "إلى",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "إلى",
+              "end": 7,
+              "id": "cd3a78f0-afbc-42d0-8bba-c60ce884dfdf",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 4,
+              "type": "spelling"
+            }
+          ]
+        },
+        {
+          "run": 2,
+          "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "المعلمة.",
+              "end": 29,
+              "id": "4263a3c3-69cc-40a7-884f-a6e9bfd17eb1",
+              "locked": true,
+              "original": "المعلمه",
+              "priority": 2,
+              "start": 22,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "المدرسة",
+              "end": 15,
+              "id": "3c062f0a-95b6-4eee-bd80-36fc9b295206",
+              "locked": true,
+              "original": "المدرسه",
+              "priority": 1,
+              "start": 8,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "إلى",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "إلى",
+              "end": 7,
+              "id": "beb1ecbe-3278-47d5-bb14-d28f1eec5b47",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 4,
+              "type": "spelling"
+            }
+          ]
+        },
+        {
+          "run": 3,
+          "corrected": "ذهب إلى المدرسة وقابل المعلمة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "المعلمة.",
+              "end": 29,
+              "id": "5361ba1b-5c5f-4740-84be-1c4d96c665db",
+              "locked": true,
+              "original": "المعلمه",
+              "priority": 2,
+              "start": 22,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "المدرسة",
+              "end": 15,
+              "id": "f0450147-9d7a-4754-a4fe-403a07219c39",
+              "locked": true,
+              "original": "المدرسه",
+              "priority": 1,
+              "start": 8,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "إلى",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "إلى",
+              "end": 7,
+              "id": "a8278394-1555-4d01-ba94-1325efc0a97c",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 4,
+              "type": "spelling"
+            }
+          ]
+        }
+      ],
+      "deterministic": true
+    },
+    {
+      "id": "C4-03",
+      "category": 4,
+      "input": "ان الطالبات ذهبو الى الجامعه",
+      "runs": [
+        {
+          "run": 1,
+          "corrected": "إن الطالبات ذهبن ذه��وا الجامعة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "ذهبن",
+              "end": 16,
+              "id": "bc1d01e1-8d6b-4bda-bbe0-199e841d0f3d",
+              "locked": true,
+              "original": "ذهبو",
+              "priority": 3,
+              "start": 12,
+              "type": "grammar"
+            },
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "الجامعة.",
+              "end": 28,
+              "id": "8cdb866c-0c6f-4cb1-a4ef-d00be9b455f7",
+              "locked": true,
+              "original": "الجامعه",
+              "priority": 2,
+              "start": 21,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "إن",
+              "end": 2,
+              "id": "027f98a7-668c-463f-9ecc-acaad6b959b2",
+              "locked": true,
+              "original": "ان",
+              "priority": 1,
+              "start": 0,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "ذهبوا",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "ذهبوا",
+              "end": 20,
+              "id": "8aee308b-6200-4c92-b6d1-95333a112ce0",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 17,
+              "type": "spelling"
+            }
+          ]
+        },
+        {
+          "run": 2,
+          "corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "ذهبن",
+              "end": 16,
+              "id": "0c9ec931-ea50-423c-8429-89a100e1c226",
+              "locked": true,
+              "original": "ذهبو",
+              "priority": 3,
+              "start": 12,
+              "type": "grammar"
+            },
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "الجامعة.",
+              "end": 28,
+              "id": "c67960b7-36f0-480a-8e85-716c57465107",
+              "locked": true,
+              "original": "الجامعه",
+              "priority": 2,
+              "start": 21,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "إن",
+              "end": 2,
+              "id": "787d7736-29aa-4625-90ad-e1248acb2d48",
+              "locked": true,
+              "original": "ان",
+              "priority": 1,
+              "start": 0,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "ذهبوا",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "ذهبوا",
+              "end": 20,
+              "id": "69c96488-d579-441c-89ea-3b66477f1f2d",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 17,
+              "type": "spelling"
+            }
+          ]
+        },
+        {
+          "run": 3,
+          "corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
+          "suggestions": [
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "ذهبن",
+              "end": 16,
+              "id": "e9626053-e05b-4774-bd33-2155ee6d7fba",
+              "locked": true,
+              "original": "ذهبو",
+              "priority": 3,
+              "start": 12,
+              "type": "grammar"
+            },
+            {
+              "alternatives": [],
+              "confidence": 0.8,
+              "correction": "الجامعة.",
+              "end": 28,
+              "id": "4ecab998-db9d-47b5-a835-a4516a38b1ae",
+              "locked": true,
+              "original": "الجامعه",
+              "priority": 2,
+              "start": 21,
+              "type": "punctuation"
+            },
+            {
+              "alternatives": [],
+              "confidence": 1.0,
+              "correction": "إن",
+              "end": 2,
+              "id": "864a48a7-d61a-4c9e-8953-72826c279d48",
+              "locked": true,
+              "original": "ان",
+              "priority": 1,
+              "start": 0,
+              "type": "spelling"
+            },
+            {
+              "alternatives": [
+                "ذهبوا",
+                "ال",
+                "الم",
+                "الى"
+              ],
+              "confidence": 0.9,
+              "correction": "ذهبوا",
+              "end": 20,
+              "id": "05c65f7f-14f4-474c-bff8-0ce52ce5cf5b",
+              "locked": true,
+              "original": "الى",
+              "priority": 1,
+              "start": 17,
+              "type": "spelling"
+            }
+          ]
+        }
+      ],
+      "deterministic": true
+    }
+  ],
+  "boundary_tests": [
+    {
+      "id": "BOUND-299",
+      "category": 3,
+      "input_len": 299,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
+      "has_spelling_suggestions": false,
+      "total_suggestions": 6,
+      "timing": {
+        "grammar_ms": 5256,
+        "punctuation_ms": 5490,
+        "spelling_ms": 32835,
+        "total_ms": 43584
+      }
+    },
+    {
+      "id": "BOUND-300",
+      "category": 3,
+      "input_len": 300,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
+      "has_spelling_suggestions": false,
+      "total_suggestions": 9,
+      "timing": {
+        "grammar_ms": 11035,
+        "punctuation_ms": 5849,
+        "spelling_ms": 18786,
+        "total_ms": 35674
+      }
+    },
+    {
+      "id": "BOUND-301",
+      "category": 3,
+      "input_len": 301,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
+      "has_spelling_suggestions": false,
+      "total_suggestions": 9,
+      "timing": {
+        "grammar_ms": 12363,
+        "punctuation_ms": 6256,
+        "spelling_ms": 3209,
+        "total_ms": 21833
+      }
+    },
+    {
+      "id": "BOUND-500",
+      "category": 3,
+      "input_len": 500,
+      "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات يستخدم الذكاء ال...",
+      "has_spelling_suggestions": false,
+      "total_suggestions": 23,
+      "timing": {
+        "grammar_ms": 18635,
+        "punctuation_ms": 12917,
+        "spelling_ms": 0,
+        "total_ms": 31560
+      }
+    }
+  ]
+}

archive/old_tests/deep_dive_test.py ADDED Viewed

	@@ -0,0 +1,519 @@

+"""
+BAYAN Deep-Dive Test Harness — Track A (Raw Models via API) & Track B (Full Pipeline via API)
+Uses the deployed HF Space API (bayan10/bayan-api) instead of loading models locally.
+This avoids the 1GB model download hang and tests the ACTUAL production behavior.
+Track A: /api/spelling, /api/grammar, /api/punctuation (individual model endpoints)
+Track B: /api/analyze (full pipeline with StageLocker, OffsetMapper, PatchSet)
+Usage:
+    python tests/deep_dive_test.py --stage spelling
+    python tests/deep_dive_test.py --stage grammar
+    python tests/deep_dive_test.py --stage punctuation
+    python tests/deep_dive_test.py --stage pipeline
+    python tests/deep_dive_test.py --stage all
+"""
+import sys, os, re, json, time, argparse
+from datetime import datetime, timezone
+# ═══════════════════════════════════════════════════════════════════
+# API CLIENT
+# ═══════════════════════════════════════════════════════════════════
+import requests
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 60  # seconds per request
+def api_call(endpoint, text, retries=2):
+    """Call the deployed API with retry."""
+    url = f"{API_BASE}{endpoint}"
+    for attempt in range(retries + 1):
+        try:
+            t0 = time.time()
+            resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
+            elapsed = int((time.time() - t0) * 1000)
+            if resp.status_code == 200:
+                data = resp.json()
+                data['_elapsed_ms'] = elapsed
+                data['_timestamp'] = datetime.now(timezone.utc).isoformat()
+                return data
+            else:
+                if attempt < retries:
+                    time.sleep(2)
+                    continue
+                return {"error": f"HTTP {resp.status_code}: {resp.text[:200]}", "_elapsed_ms": elapsed}
+        except requests.exceptions.Timeout:
+            if attempt < retries:
+                time.sleep(2)
+                continue
+            return {"error": f"Timeout after {TIMEOUT}s", "_elapsed_ms": TIMEOUT * 1000}
+        except Exception as e:
+            return {"error": str(e)}
+# ═══════════════════════════════════════════════════════════════════
+# TRACK A — RAW MODEL CALLS (individual endpoints, no pipeline)
+# ═══════════════════════════════════════════════════════════════════
+def track_a_spelling(text):
+    """Call /api/spelling — raw AraSpell output."""
+    result = api_call("/api/spelling", text)
+    if "error" in result:
+        return {"input": text, "output": text, "error": result["error"], "changed": False}
+    corrected = result.get("corrected_text", text)
+    return {
+        "input": text, "output": corrected, "changed": corrected != text,
+        "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
+    }
+def track_a_grammar(text):
+    """Call /api/grammar — raw grammar model output."""
+    result = api_call("/api/grammar", text)
+    if "error" in result:
+        return {"input": text, "output": text, "error": result["error"], "changed": False}
+    corrected = result.get("corrected_text", text)
+    return {
+        "input": text, "output": corrected, "changed": corrected != text,
+        "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
+    }
+def track_a_punctuation(text):
+    """Call /api/punctuation — raw PuncAra output."""
+    result = api_call("/api/punctuation", text)
+    if "error" in result:
+        return {"input": text, "output": text, "error": result["error"], "changed": False}
+    corrected = result.get("corrected_text", text)
+    marks_before = sum(1 for c in text if c in '.,;:!?،؛؟')
+    marks_after = sum(1 for c in corrected if c in '.,;:!?،؛؟')
+    return {
+        "input": text, "output": corrected, "changed": corrected != text,
+        "marks_added": marks_after - marks_before,
+        "elapsed_ms": result.get("_elapsed_ms"), "timestamp": result.get("_timestamp")
+    }
+# ═══════════════════════════════════════════════════════════════════
+# TRACK B — FULL PIPELINE (/api/analyze)
+# ═══════════════════════════════════════════════════════════════════
+def track_b_analyze(text):
+    """Call /api/analyze — full pipeline with all stages."""
+    result = api_call("/api/analyze", text)
+    if "error" in result and "status" not in result:
+        return {"input": text, "error": result["error"], "suggestions": []}
+    return {
+        "input": text,
+        "original": result.get("original", text),
+        "corrected": result.get("corrected", text),
+        "suggestions": result.get("suggestions", []),
+        "timing_ms": result.get("timing_ms", {}),
+        "elapsed_ms": result.get("_elapsed_ms"),
+        "timestamp": result.get("_timestamp"),
+    }
+# ═══════════════════════════════════════════════════════════════════
+# TEST INPUTS — ALL CATEGORIES
+# ═══════════════════════════════════════════════════════════════════
+CAT2_OVERCORRECTION = [
+    {"id": "C2-01", "input": "القاهرة عاصمة جمهورية مصر العربية وأكبر مدنها", "domain": "news"},
+    {"id": "C2-02", "input": "يعد نهر النيل أطول أنهار العالم", "domain": "news"},
+    {"id": "C2-03", "input": "بسم الله الرحمن الرحيم", "domain": "religious"},
+    {"id": "C2-04", "input": "إنا لله وإنا إليه راجعون", "domain": "religious"},
+    {"id": "C2-05", "input": "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق", "domain": "technical"},
+    {"id": "C2-06", "input": "سافر محمد إلى دبي للعمل في شركة جوجل", "domain": "proper_nouns"},
+    {"id": "C2-07", "input": "الرئيس عبد الفتاح السيسي رئيس جمهورية مصر العربية", "domain": "proper_nouns"},
+    {"id": "C2-08", "input": "استوقفني المشهد فتأملته مليا", "domain": "literary"},
+    {"id": "C2-09", "input": "أضحى التعليم الإلكتروني ضرورة ملحة في عصرنا الحالي", "domain": "formal"},
+    {"id": "C2-10", "input": "تتراوح درجات الحرارة بين خمس وعشرين وثلاثين درجة مئوية", "domain": "weather"},
+]
+CAT8_CLITIC_ROOTS = [
+    ('مدرسة', 'moon'),       # Moon letter
+    ('شمس', 'sun'),          # Sun letter
+    ('أمة', 'hamza'),        # Hamza-initial
+    ('نافذة', 'long'),       # Long word
+    ('علم', 'short'),        # Short 3-letter root
+    ('اقتصاد', 'alef'),     # Alef-initial, long
+]
+CAT8_PREFIXES = [("bare", ""), ("wa", "و"), ("ba", "ب"), ("la", "ل"), ("ka", "ك")]
+CAT8_TESTS = []
+for root, root_type in CAT8_CLITIC_ROOTS:
+    for pfx_name, pfx in CAT8_PREFIXES:
+        word = pfx + root
+        CAT8_TESTS.append({
+            "id": f"C8-{root}-{pfx_name}", "input": word, "root": root,
+            "root_type": root_type, "prefix": pfx, "expected": word,
+        })
+CAT9_CONFUSABLE = [
+    # === Isolation tests ===
+    {"id": "C9-01a", "input": "ان", "context": "isolation", "concern": "should→أن/إن NOT كان"},
+    {"id": "C9-01b", "input": "كان", "context": "isolation", "concern": "stays كان"},
+    {"id": "C9-02a", "input": "إلى", "context": "isolation", "concern": "stays إلى"},
+    {"id": "C9-02b", "input": "على", "context": "isolation", "concern": "stays على"},
+    {"id": "C9-03a", "input": "هذا", "context": "isolation", "concern": "stays هذا"},
+    {"id": "C9-03b", "input": "هذه", "context": "isolation", "concern": "stays هذه"},
+    {"id": "C9-03c", "input": "هذة", "context": "isolation", "concern": "misspelling→هذه"},
+    {"id": "C9-04a", "input": "لكن", "context": "isolation", "concern": "stays لكن"},
+    {"id": "C9-04b", "input": "لاكن", "context": "isolation", "concern": "misspelling→لكن"},
+    {"id": "C9-05a", "input": "ذلك", "context": "isolation", "concern": "stays ذلك"},
+    {"id": "C9-05b", "input": "ذالك", "context": "isolation", "concern": "misspelling→ذلك"},
+    {"id": "C9-06a", "input": "الى", "context": "isolation", "concern": "should→إلى"},
+    # === Sentence-context tests ===
+    {"id": "C9-S01", "input": "ان الحياة جميلة", "context": "sentence", "concern": "ان→أن/إن NOT كان"},
+    {"id": "C9-S02", "input": "كان الرجل طيبا", "context": "sentence", "concern": "كان stays"},
+    {"id": "C9-S03", "input": "ذهب الى المدرسة", "context": "sentence", "concern": "الى→إلى"},
+    {"id": "C9-S04", "input": "جلس على الكرسي", "context": "sentence", "concern": "على stays"},
+    {"id": "C9-S05", "input": "هذة المدينة جميلة", "context": "sentence", "concern": "هذة→هذه"},
+    {"id": "C9-S06", "input": "هو ذكي لاكن كسول", "context": "sentence", "concern": "لاكن→لكن"},
+    {"id": "C9-S07", "input": "ذالك الكتاب مفيد", "context": "sentence", "concern": "ذالك→ذلك"},
+    {"id": "C9-S08", "input": "هذا البيت كبير", "context": "sentence", "concern": "هذا stays"},
+    {"id": "C9-S09", "input": "هذه السيارة سريعة", "context": "sentence", "concern": "هذه stays"},
+    {"id": "C9-S10", "input": "سافر إلى القاهرة", "context": "sentence", "concern": "إلى stays"},
+    {"id": "C9-S11", "input": "جلس على المقعد", "context": "sentence", "concern": "على stays"},
+    {"id": "C9-S12", "input": "ان الذكاء مهم لكن الاجتهاد اهم", "context": "sentence", "concern": "ان→أن, لكن stays"},
+]
+CAT10_EDGE_CASES = [
+    {"id": "C10-01", "input": "كَتَبَ الطَّالِبُ الدَّرسَ", "concern": "tashkeel_present"},
+    {"id": "C10-02", "input": "كتب الطالب الدرس", "concern": "tashkeel_absent"},
+    {"id": "C10-03", "input": "قرأ إبراهيم آيات من القرآن", "concern": "alef_forms"},
+    {"id": "C10-04", "input": "مشى الفتى إلى المستشفى", "concern": "ya_alef_maksura"},
+    {"id": "C10-05", "input": "ذهبت إلى المدرسة", "concern": "ta_marbuta"},
+    {"id": "C10-06", "input": "جاء ١٢٣ طالبا", "concern": "arabic_indic_digits"},
+    {"id": "C10-07", "input": "جاء 123 طالبا", "concern": "western_digits"},
+    {"id": "C10-08", "input": "يعمل في شركة Google في القاهرة", "concern": "latin_in_arabic"},
+    {"id": "C10-09", "input": "انا رايح المدرسة النهارده", "concern": "egyptian_dialect"},
+    {"id": "C10-10", "input": "الموضوع ده كويس جدااااا", "concern": "repeated_letters"},
+    {"id": "C10-11", "input": "مسؤول عن الشؤون الداخلية", "concern": "hamza_on_waw"},
+    {"id": "C10-12", "input": "بيئة العمل مليئة بالتحديات", "concern": "hamza_on_ya"},
+    {"id": "C10-13", "input": "الكتاب الذى قرأته مفيد", "concern": "ya_in_الذي"},
+    {"id": "C10-14", "input": "خطأ الطالب في الامتحان", "concern": "hamza_standalone"},
+    {"id": "C10-15", "input": "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين", "concern": "religious_long"},
+]
+CAT5_PUNC_SANITY = [
+    {"id": "C5-01", "input": "ذهب إلى المدرسة", "length": "short_3w"},
+    {"id": "C5-02", "input": "هل تعلم أن الأرض تدور حول الشمس كل عام", "length": "medium_9w"},
+    {"id": "C5-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب", "length": "long_20w"},
+    {"id": "C5-04", "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب", "length": "medium_imperative"},
+    {"id": "C5-05", "input": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحداهن وبدأت تبكي بشدة", "length": "long_narrative"},
+]
+CAT6_PUNC_POSITION = [
+    {"id": "C6-01", "input": "ذهب محمد إلى المدرسة ودرس جيدا ثم عاد إلى البيت"},
+    {"id": "C6-02", "input": "إن الذكاء الاصطناعي يلعب دورا هاما لذلك يجب الاهتمام به"},
+    {"id": "C6-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب"},
+    {"id": "C6-04", "input": "كانت الفتيات يلعبن في الحديقة وفجأة سقطت إحداهن وبدأت تبكي بشدة"},
+    {"id": "C6-05", "input": "هل تعلم أن القاهرة هي عاصمة مصر وتقع على ضفاف نهر النيل"},
+    {"id": "C6-06", "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب"},
+    {"id": "C6-07", "input": "يحب الأطفال اللعب في الحديقة وركوب الدراجات والجري بين الأشجار"},
+    {"id": "C6-08", "input": "رغم صعوبة الامتحان إلا أن الطلاب حققوا نتائج مبهرة"},
+    {"id": "C6-09", "input": "سافر العالم إلى عدة دول لحضور المؤتمرات العلمية ونشر أبحاثه"},
+    {"id": "C6-10", "input": "يا بني اجتهد في دراستك فالعلم نور والجهل ظلام"},
+]
+# ═══════════════════════════════════════════════════════════════════
+# RUNNERS
+# ═══════════════════════════════════════════════════════════════════
+def log(msg):
+    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
+def run_spelling_tests():
+    results = []
+    log("=== Category 2: Overcorrection (10 tests) ===")
+    for test in CAT2_OVERCORRECTION:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        a = track_a_spelling(test['input'])
+        b = track_b_analyze(test['input'])
+        fp = a.get('changed', False)
+        result = {
+            "id": test['id'], "category": 2, "input": test['input'],
+            "domain": test['domain'],
+            "track_a_spelling": a['output'],
+            "track_a_changed": a.get('changed', False),
+            "track_b_suggestions": len(b.get('suggestions', [])),
+            "track_b_corrected": b.get('corrected', ''),
+            "is_false_positive": fp,
+        }
+        status = "⚠ FP" if fp else "✓"
+        log(f"    {status} A:'{a['output'][:60]}' B_sugg:{len(b.get('suggestions',[]))}")
+        results.append(result)
+    log("\n=== Category 8: Clitic/Prefix (30 tests) ===")
+    for test in CAT8_TESTS:
+        a = track_a_spelling(test['input'])
+        changed = a.get('changed', False)
+        if changed:
+            # Classify: did it preserve root or mangle it?
+            output = a['output']
+            root_preserved = test['root'] in output or any(
+                test['root'][:-1] in output  # partial root match
+                for _ in [1]
+            )
+            classification = "root_fixed" if root_preserved else "prefix_mangled"
+        else:
+            classification = "correct"
+        result = {
+            "id": test['id'], "category": 8, "input": test['input'],
+            "root": test['root'], "root_type": test['root_type'],
+            "prefix": test['prefix'],
+            "track_a_spelling": a['output'], "changed": changed,
+            "classification": classification,
+        }
+        if changed:
+            log(f"  ⚠ {test['id']}: '{test['input']}' → '{a['output']}' [{classification}]")
+        results.append(result)
+    log("\n=== Category 9: Confusable Words (24 tests) ===")
+    for test in CAT9_CONFUSABLE:
+        a = track_a_spelling(test['input'])
+        result = {
+            "id": test['id'], "category": 9, "input": test['input'],
+            "context": test['context'], "concern": test['concern'],
+            "track_a_spelling": a['output'], "changed": a.get('changed', False),
+        }
+        if a.get('changed'):
+            log(f"  ⚠ {test['id']}: '{test['input']}' → '{a['output']}' (concern: {test['concern']})")
+        else:
+            log(f"  ✓ {test['id']}: no change")
+        results.append(result)
+    log("\n=== Category 10: Arabic Edge Cases (15 tests) ===")
+    for test in CAT10_EDGE_CASES:
+        a = track_a_spelling(test['input'])
+        result = {
+            "id": test['id'], "category": 10, "input": test['input'],
+            "concern": test['concern'],
+            "track_a_spelling": a['output'], "changed": a.get('changed', False),
+        }
+        if a.get('changed'):
+            log(f"  ⚠ {test['id']}: '{test['input']}' → '{a['output']}' [{test['concern']}]")
+        else:
+            log(f"  ✓ {test['id']}: no change [{test['concern']}]")
+        results.append(result)
+    return results
+def run_punctuation_tests():
+    results = []
+    log("=== Category 5: Punctuation Sanity (5 tests) ===")
+    for test in CAT5_PUNC_SANITY:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        a = track_a_punctuation(test['input'])
+        result = {
+            "id": test['id'], "category": 5, "input": test['input'],
+            "length": test['length'],
+            "track_a_punc": a['output'],
+            "marks_added": a.get('marks_added', 0),
+            "changed": a.get('changed', False),
+        }
+        log(f"    Marks: +{a.get('marks_added', 0)} | Output: {a['output'][:80]}")
+        results.append(result)
+    log("\n=== Category 6: Punctuation Position (10 tests) ===")
+    for test in CAT6_PUNC_POSITION:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        # Track A: raw punctuation on original text
+        a_punc = track_a_punctuation(test['input'])
+        # Track B: full pipeline
+        b = track_b_analyze(test['input'])
+        # Measure: where did Track A put punctuation marks?
+        a_marks = _find_punct_positions(test['input'], a_punc['output'])
+        # Measure: where did Track B put punctuation suggestions?
+        b_punc_sugg = [s for s in b.get('suggestions', []) if s.get('type') == 'punctuation']
+        b_marks = [(s.get('start', 0), s.get('end', 0), s.get('correction', '')) for s in b_punc_sugg]
+        result = {
+            "id": test['id'], "category": 6, "input": test['input'],
+            "track_a_punc_output": a_punc['output'],
+            "track_a_marks": a_marks,
+            "track_b_corrected": b.get('corrected', ''),
+            "track_b_punc_suggestions": b_punc_sugg,
+            "track_b_marks": b_marks,
+        }
+        log(f"    A marks: {a_marks}")
+        log(f"    B marks: {b_marks}")
+        results.append(result)
+    return results
+def _find_punct_positions(original, punctuated):
+    """Find where punctuation was added by comparing original vs punctuated."""
+    PUNC = set('.,;:!?،؛؟')
+    marks = []
+    # Word-level alignment
+    orig_words = original.split()
+    punc_words = punctuated.split()
+    oi, pi = 0, 0
+    char_pos = 0
+    while oi < len(orig_words) and pi < len(punc_words):
+        o_base = ''.join(c for c in orig_words[oi] if c not in PUNC)
+        p_base = ''.join(c for c in punc_words[pi] if c not in PUNC)
+        if o_base == p_base:
+            # Same word — check for added punctuation
+            o_punc = set(c for c in orig_words[oi] if c in PUNC)
+            p_punc = set(c for c in punc_words[pi] if c in PUNC)
+            added = p_punc - o_punc
+            if added:
+                marks.append({
+                    "word_index": oi, "word": orig_words[oi],
+                    "after_word": orig_words[oi],
+                    "marks_added": list(added),
+                    "char_pos": char_pos,
+                })
+            char_pos += len(orig_words[oi]) + 1  # +1 for space
+            oi += 1
+            pi += 1
+        else:
+            # Mismatch — model changed the word
+            char_pos += len(orig_words[oi]) + 1
+            oi += 1
+            pi += 1
+    return marks
+def run_pipeline_comparison():
+    """Run tests that need both Track A and Track B for comparison (Cat 1, 3, 4, 7)."""
+    results = []
+    # Cat 3: Integration-only — test where raw models work but pipeline might not
+    log("=== Category 3: Integration-Only (5 tests) ===")
+    integration_inputs = [
+        {"id": "C3-01", "input": "كانت الفتيات يلعبون في الحديقه وفجأه سقطت احداهن وبدءت تبكي بشده"},
+        {"id": "C3-02", "input": "ان الذكاء الاصطناعي يلعب دورا هاما ولذالك يجب الاهتمام بة"},
+        {"id": "C3-03", "input": "التزم الرياضي بتناول وجباته الصحية وحساب سعراته بدقة رغبة في بناء كتلة عضلية قوية ويا له من التزام حديدي يثير الإعجاب"},
+        {"id": "C3-04", "input": "هذة المدينه جميله جدا ومناخها معتدل طوال العام"},
+        {"id": "C3-05", "input": "الطلاب الذين اجتهدو في دراستهم حققو نتائج ممتازه في الامتحانات"},
+    ]
+    for test in integration_inputs:
+        log(f"  {test['id']}: {test['input'][:50]}...")
+        a_spell = track_a_spelling(test['input'])
+        a_gram = track_a_grammar(test['input'])
+        a_punc = track_a_punctuation(test['input'])
+        b = track_b_analyze(test['input'])
+        result = {
+            "id": test['id'], "category": 3, "input": test['input'],
+            "track_a": {
+                "spelling": a_spell['output'], "spelling_changed": a_spell.get('changed'),
+                "grammar": a_gram['output'], "grammar_changed": a_gram.get('changed'),
+                "punctuation": a_punc['output'], "punctuation_changed": a_punc.get('changed'),
+            },
+            "track_b": {
+                "corrected": b.get('corrected', ''),
+                "suggestions": b.get('suggestions', []),
+                "timing_ms": b.get('timing_ms', {}),
+            }
+        }
+        log(f"    A_spell: {a_spell['output'][:60]}")
+        log(f"    A_gram:  {a_gram['output'][:60]}")
+        log(f"    A_punc:  {a_punc['output'][:60]}")
+        log(f"    B_final: {b.get('corrected','')[:60]}")
+        log(f"    B_sugg:  {len(b.get('suggestions',[]))}")
+        results.append(result)
+    # Cat 4: Overlap — run 3x for determinism
+    log("\n=== Category 4: Overlap Resolution (3 tests × 3 runs) ===")
+    overlap_inputs = [
+        {"id": "C4-01", "input": "كانت الفتيات يلعبون في الحديقه"},
+        {"id": "C4-02", "input": "ذهب الى المدرسه وقابل المعلمه"},
+        {"id": "C4-03", "input": "ان الطالبات ذهبو الى الجامعه"},
+    ]
+    for test in overlap_inputs:
+        runs = []
+        for run_idx in range(3):
+            b = track_b_analyze(test['input'])
+            runs.append({
+                "run": run_idx + 1,
+                "corrected": b.get('corrected', ''),
+                "suggestions": b.get('suggestions', []),
+            })
+        # Check determinism
+        all_same = all(r['corrected'] == runs[0]['corrected'] for r in runs)
+        result = {
+            "id": test['id'], "category": 4, "input": test['input'],
+            "runs": runs, "deterministic": all_same,
+        }
+        log(f"  {test['id']}: deterministic={all_same}")
+        for r in runs:
+            log(f"    Run {r['run']}: {r['corrected'][:60]} ({len(r['suggestions'])} sugg)")
+        results.append(result)
+    return results
+# Boundary tests for spelling 300-char cutoff
+def run_boundary_tests():
+    results = []
+    log("\n=== Boundary: Spelling 300-char cutoff ===")
+    base = "يستخدم الذكاء الاصطناعي تقنيات التعلم العميق في معالجة البيانات "
+    for target_len in [299, 300, 301, 500]:
+        text = (base * 10)[:target_len]
+        b = track_b_analyze(text)
+        has_spelling = any(s.get('type') == 'spelling' for s in b.get('suggestions', []))
+        result = {
+            "id": f"BOUND-{target_len}", "category": 3, "input_len": target_len,
+            "input": text[:80] + "...",
+            "has_spelling_suggestions": has_spelling,
+            "total_suggestions": len(b.get('suggestions', [])),
+            "timing": b.get('timing_ms', {}),
+        }
+        log(f"  len={target_len}: spelling_active={has_spelling} suggestions={len(b.get('suggestions',[]))}")
+        results.append(result)
+    return results
+# ═══════════════════════════════════════════════════════════════════
+# MAIN
+# ═══════════════════════════════════════════════════════════════════
+def main():
+    parser = argparse.ArgumentParser(description='BAYAN Deep-Dive Test Harness')
+    parser.add_argument('--stage', choices=['spelling', 'grammar', 'punctuation', 'pipeline', 'all'],
+                       default='spelling')
+    args = parser.parse_args()
+    all_results = {"timestamp": datetime.now(timezone.utc).isoformat(), "api_base": API_BASE}
+    # Health check
+    log(f"Checking API health at {API_BASE}...")
+    try:
+        resp = requests.get(f"{API_BASE}/api/health", timeout=10)
+        log(f"  Health: {resp.status_code} — {resp.json()}")
+        all_results['health'] = resp.json()
+    except Exception as e:
+        log(f"  ⚠ API unreachable: {e}")
+        all_results['health'] = {"error": str(e)}
+    if args.stage in ('spelling', 'all'):
+        log("\n══════ SPELLING TESTS (Cat 2, 8, 9, 10) ══════")
+        all_results['spelling_tests'] = run_spelling_tests()
+    if args.stage in ('punctuation', 'all'):
+        log("\n══════ PUNCTUATION TESTS (Cat 5, 6) ══════")
+        all_results['punctuation_tests'] = run_punctuation_tests()
+    if args.stage in ('pipeline', 'all'):
+        log("\n══════ PIPELINE TESTS (Cat 3, 4) ══════")
+        all_results['pipeline_tests'] = run_pipeline_comparison()
+        all_results['boundary_tests'] = run_boundary_tests()
+    # Save
+    output_path = os.path.join(os.path.dirname(__file__), 'deep_dive_output.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(all_results, f, ensure_ascii=False, indent=2)
+    log(f"\nResults saved to {output_path}")
+    # Summary
+    for key in ['spelling_tests', 'punctuation_tests', 'pipeline_tests', 'boundary_tests']:
+        if key in all_results:
+            tests = all_results[key]
+            if isinstance(tests, list):
+                changed = sum(1 for t in tests if t.get('changed') or t.get('is_false_positive'))
+                log(f"  {key}: {len(tests)} tests, {changed} with changes")
+if __name__ == '__main__':
+    main()

archive/old_tests/gap_filling_results.json ADDED Viewed

	@@ -0,0 +1,261 @@

+{
+  "phase_1_3": [
+    {
+      "input": "لكن الأمر مختلف",
+      "corrected": "لكن الأمر مختلف.",
+      "check": "لكن",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "ذلك الكتاب جميل",
+      "corrected": "ذلك الكتاب جميل.",
+      "check": "ذلك",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "إلى المدرسة",
+      "corrected": "إلى المدرسة.",
+      "check": "إلى",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "على الطاولة",
+      "corrected": "على الطاولة.",
+      "check": "على",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "هذه المدينة جميلة",
+      "corrected": "هذه المدينة جميلة.",
+      "check": "هذه",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "كان الجو حارا",
+      "corrected": "كان الجو حارا.",
+      "check": "كان",
+      "status": "✅ PRESERVED",
+      "suggestions": 1
+    },
+    {
+      "input": "لاكن الأمر مختلف",
+      "corrected": "لكن الأمر مختلف.",
+      "check": "لاكن→لكن",
+      "status": "✅ CORRECTED",
+      "suggestions": 2
+    },
+    {
+      "input": "ذالك الكتاب جميل",
+      "corrected": "ذلك الكتاب جميل.",
+      "check": "ذالك→ذلك",
+      "status": "✅ CORRECTED",
+      "suggestions": 2
+    }
+  ],
+  "phase_2": {
+    "total": 10,
+    "raw_fp_count": 5,
+    "raw_fp_rate": "50%",
+    "pipeline_fp_count": 1,
+    "pipeline_fp_rate": "10%",
+    "results": [
+      {
+        "id": "R-01",
+        "word": "عصماء",
+        "raw_changed": true,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-02",
+        "word": "يستشف",
+        "raw_changed": true,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-03",
+        "word": "المسغبة",
+        "raw_changed": true,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-04",
+        "word": "التقاعس",
+        "raw_changed": true,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-05",
+        "word": "استئثار",
+        "raw_changed": false,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-06",
+        "word": "تبجيل",
+        "raw_changed": false,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-07",
+        "word": "الدمث",
+        "raw_changed": true,
+        "pipeline_changed": true,
+        "pipeline_targeted": true,
+        "is_false_positive": true
+      },
+      {
+        "id": "R-08",
+        "word": "استقصاء",
+        "raw_changed": false,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-09",
+        "word": "التواني",
+        "raw_changed": false,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      },
+      {
+        "id": "R-10",
+        "word": "مستطرف",
+        "raw_changed": false,
+        "pipeline_changed": false,
+        "pipeline_targeted": false,
+        "is_false_positive": false
+      }
+    ]
+  },
+  "phase_3_2": [
+    {
+      "input": "ولذالك قررت السفر",
+      "corrected": "ولذالك قررت السفر.",
+      "bad_split_present": false,
+      "good_correction_present": false
+    },
+    {
+      "input": "المستشفياتهم كبيرة",
+      "corrected": "المستشفيات هم كبيرة.",
+      "bad_split_present": false,
+      "good_correction_present": false
+    }
+  ],
+  "phase_5_5": [
+    {
+      "input": "الطالبه كتبو الوجبات",
+      "corrected": "الطالبة كتبو الوجبات.",
+      "suggestions": 2,
+      "has_duplicate": false,
+      "word_count_diff": 0
+    },
+    {
+      "input": "هو ذهبو الي البيت",
+      "corrected": "هو ذهب إلى البيت.",
+      "suggestions": 3,
+      "has_duplicate": false,
+      "word_count_diff": 0
+    },
+    {
+      "input": "الطلاب اجتهدو في امتحانتهم",
+      "corrected": "الطلاب اجتهدو في امتحانتهم.",
+      "suggestions": 1,
+      "has_duplicate": false,
+      "word_count_diff": 0
+    }
+  ],
+  "phase_6_3": {
+    "empty_count": 0,
+    "error_count": 0,
+    "results": [
+      {
+        "attempt": 1,
+        "corrected": "الحديقة جميلة والأزهار متفتحة.",
+        "suggestions": 2,
+        "status": "success",
+        "warnings": {},
+        "is_empty": false,
+        "is_error": false
+      },
+      {
+        "attempt": 2,
+        "corrected": "الحديقة جميلة والأزهار متفتحة.",
+        "suggestions": 2,
+        "status": "success",
+        "warnings": {},
+        "is_empty": false,
+        "is_error": false
+      },
+      {
+        "attempt": 3,
+        "corrected": "الحديقة جميلة والأزهار متفتحة.",
+        "suggestions": 2,
+        "status": "success",
+        "warnings": {},
+        "is_empty": false,
+        "is_error": false
+      },
+      {
+        "attempt": 4,
+        "corrected": "الحديقة جميلة والأزهار متفتحة.",
+        "suggestions": 2,
+        "status": "success",
+        "warnings": {},
+        "is_empty": false,
+        "is_error": false
+      },
+      {
+        "attempt": 5,
+        "corrected": "الحديقة جميلة والأزهار متفتحة.",
+        "suggestions": 2,
+        "status": "success",
+        "warnings": {},
+        "is_empty": false,
+        "is_error": false
+      }
+    ]
+  },
+  "phase_6_4": {
+    "input_chars": 982,
+    "input_words": 159,
+    "status": "success",
+    "suggestions": 4,
+    "warnings": {},
+    "timing": {
+      "grammar_ms": 12196,
+      "punctuation_ms": 14448,
+      "spelling_ms": 0,
+      "total_ms": 26649
+    },
+    "elapsed_ms": 27615,
+    "is_silently_empty": false
+  },
+  "phase_7_1": {
+    "input": "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب",
+    "raw_output": "قال المعلم للطلاب ادرسوا: جيدا فالامتحان قريب؛",
+    "pipeline_output": "قال المعلم للطلاب ادرسوا: جيدا فالامتحين قريب",
+    "has_semicolon_raw": true,
+    "has_semicolon_pipeline": false,
+    "diagnosis": "StageLocker or validate_punctuation_diff rejection",
+    "pipeline_punc_count": 1
+  }
+}

archive/old_tests/gap_filling_tests.py ADDED Viewed

	@@ -0,0 +1,522 @@

+"""
+Gap-filling live tests for all missing items from the Fix-Everything prompt.
+Covers:
+  Phase 1.3 — Category 9 pairs: لكن/لاكن, ذلك/ذالك, الى/إلى live verification
+  Phase 2   — R-01→R-10 rare vocabulary FP measurement
+  Phase 3.2 — ولذالك and مستشفياتهم specific cases
+  Phase 5.5 — Constructed dual-correction cases
+  Phase 6.3 — BUG-017 re-test
+  Phase 6.4 — 187-word input regression
+  Phase 7.1 — BUG-018 precise tracing
+"""
+import sys, os, json, time, requests
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 90
+def api_call(endpoint, text, timeout=TIMEOUT):
+    url = f"{API_BASE}{endpoint}"
+    try:
+        t0 = time.time()
+        resp = requests.post(url, json={"text": text}, timeout=timeout)
+        elapsed = int((time.time() - t0) * 1000)
+        if resp.status_code == 200:
+            data = resp.json()
+            data['_elapsed_ms'] = elapsed
+            return data
+        return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
+    except Exception as e:
+        return {"error": f"{type(e).__name__}: {e}"}
+all_results = {}
+# ══════════════════════════════════════════════════════════════════════
+# Phase 1.3 — Category 9 Pairs Live Verification
+# ══════════════════════════════════════════════════════════════════════
+def test_category9_live():
+    print("=" * 70)
+    print("PHASE 1.3 — Category 9 Pairs Live Verification")
+    print("=" * 70)
+    pairs = [
+        # (input_text, word_that_must_NOT_change, description)
+        ("لكن الأمر مختلف", "لكن", "لكن must NOT become لاكن"),
+        ("ذلك الكتاب جميل", "ذلك", "ذلك must NOT become ذالك"),
+        ("إلى المدرسة", "إلى", "إلى must NOT become على"),
+        ("على الطاولة", "على", "على must NOT become إلى"),
+        ("هذه المدينة جميلة", "هذه", "هذه must NOT become هذة"),
+        ("كان الجو حارا", "كان", "كان must NOT become كأن"),
+        # Reverse direction: misspellings SHOULD be corrected
+        ("لاكن الأمر مختلف", "لاكن→لكن", "لاكن should become لكن"),
+        ("ذالك الكتاب جميل", "ذالك→ذلك", "ذالك should become ذلك"),
+    ]
+    results = []
+    for text, check, desc in pairs:
+        r = api_call("/api/analyze", text)
+        corrected = r.get("corrected", text)
+        suggestions = r.get("suggestions", [])
+        is_reverse = "→" in check
+        if is_reverse:
+            # For misspellings, check that correction happened
+            orig, expected = check.split("→")
+            if expected in corrected and orig not in corrected:
+                status = "✅ CORRECTED"
+            elif orig in corrected:
+                status = "⚠ NOT corrected (pipeline didn't fix misspelling)"
+            else:
+                status = "⚠ UNCLEAR"
+        else:
+            # For correct words, check they weren't corrupted
+            if check in corrected:
+                status = "✅ PRESERVED"
+            else:
+                status = "❌ CORRUPTED"
+        result = {
+            "input": text, "corrected": corrected,
+            "check": check, "status": status,
+            "suggestions": len(suggestions),
+        }
+        results.append(result)
+        print(f"\n  {desc}")
+        print(f"    Input:     '{text}'")
+        print(f"    Corrected: '{corrected}'")
+        print(f"    {status}")
+    return results
+# ══════════════════════════════════════════════════════════════════════
+# Phase 2 — R-01→R-10 Rare Vocabulary FP Measurement
+# ══════════════════════════════════════════════════════════════════════
+def test_rare_vocabulary():
+    print("\n" + "=" * 70)
+    print("PHASE 2 — R-01→R-10 Rare Vocabulary FP Measurement")
+    print("=" * 70)
+    # R-01 through R-10: valid but uncommon Arabic words
+    rare_words = [
+        {"id": "R-01", "word": "عصماء", "sentence": "المدينة العصماء تحتضن آلاف السكان",
+         "desc": "عصماء = impeccable (feminine)"},
+        {"id": "R-02", "word": "يستشف", "sentence": "يستشف الباحث نتائج الدراسة بعناية",
+         "desc": "يستشف = to discern/perceive"},
+        {"id": "R-03", "word": "المسغبة", "sentence": "أرهقته المسغبة والعطش الشديد",
+         "desc": "المسغبة = severe hunger"},
+        {"id": "R-04", "word": "التقاعس", "sentence": "التقاعس عن العمل يؤدي إلى الفشل",
+         "desc": "التقاعس = negligence/laziness"},
+        {"id": "R-05", "word": "استئثار", "sentence": "استئثار السلطة يهدد الديمقراطية",
+         "desc": "استئثار = monopolization"},
+        {"id": "R-06", "word": "تبجيل", "sentence": "تبجيل العلماء واجب على المجتمع",
+         "desc": "تبجيل = veneration"},
+        {"id": "R-07", "word": "الدمث", "sentence": "الرجل الدمث يحبه الجميع",
+         "desc": "الدمث = gentle/affable person"},
+        {"id": "R-08", "word": "استقصاء", "sentence": "استقصاء الحقائق مهم في الصحافة",
+         "desc": "استقصاء = investigation/inquiry"},
+        {"id": "R-09", "word": "التواني", "sentence": "لا يجوز التواني في طلب العلم",
+         "desc": "التواني = procrastination"},
+        {"id": "R-10", "word": "مستطرف", "sentence": "كتاب المستطرف من أمهات الكتب العربية",
+         "desc": "مستطرف = novel/curious (literary term)"},
+    ]
+    false_positives = 0
+    total = len(rare_words)
+    results = []
+    for item in rare_words:
+        # Track A: Raw spelling
+        a = api_call("/api/spelling", item["sentence"])
+        a_out = a.get("corrected_text", item["sentence"])
+        a_changed_word = item["word"] not in a_out
+        # Track B: Pipeline
+        b = api_call("/api/analyze", item["sentence"])
+        b_out = b.get("corrected", item["sentence"])
+        b_suggestions = b.get("suggestions", [])
+        b_changed_word = item["word"] not in b_out
+        # Check if any suggestion targets the rare word
+        word_targeted = False
+        targeting_suggestion = None
+        for s in b_suggestions:
+            if s.get("original", "") == item["word"]:
+                word_targeted = True
+                targeting_suggestion = s
+                break
+        is_fp = b_changed_word or word_targeted
+        if is_fp:
+            false_positives += 1
+        result = {
+            "id": item["id"],
+            "word": item["word"],
+            "raw_changed": a_changed_word,
+            "pipeline_changed": b_changed_word,
+            "pipeline_targeted": word_targeted,
+            "is_false_positive": is_fp,
+        }
+        results.append(result)
+        status = "❌ FALSE POSITIVE" if is_fp else "✅ PRESERVED"
+        print(f"\n  {item['id']}: {item['desc']}")
+        print(f"    Input:      '{item['sentence'][:60]}...'")
+        print(f"    Raw spell:  changed={a_changed_word}")
+        if a_changed_word:
+            print(f"    Raw output: '{a_out[:60]}...'")
+        print(f"    Pipeline:   changed={b_changed_word}, targeted={word_targeted}")
+        if b_changed_word:
+            print(f"    Pipeline:   '{b_out[:60]}...'")
+        if targeting_suggestion:
+            print(f"    Suggestion: '{targeting_suggestion.get('original','')}' → '{targeting_suggestion.get('correction','')}' (conf={targeting_suggestion.get('confidence', '?')})")
+        print(f"    {status}")
+    raw_fp_count = sum(1 for r in results if r["raw_changed"])
+    pipeline_fp_count = false_positives
+    print(f"\n{'=' * 50}")
+    print(f"  Raw model FP rate:  {raw_fp_count}/{total} = {raw_fp_count/total*100:.0f}%")
+    print(f"  Pipeline FP rate:   {pipeline_fp_count}/{total} = {pipeline_fp_count/total*100:.0f}%")
+    return {
+        "total": total,
+        "raw_fp_count": raw_fp_count,
+        "raw_fp_rate": f"{raw_fp_count/total*100:.0f}%",
+        "pipeline_fp_count": pipeline_fp_count,
+        "pipeline_fp_rate": f"{pipeline_fp_count/total*100:.0f}%",
+        "results": results,
+    }
+# ══════════════════════════════════════════════════════════════════════
+# Phase 3.2 — Specific Word-split Cases
+# ══════════════════════════════════════════════════════════════════════
+def test_word_splits():
+    print("\n" + "=" * 70)
+    print("PHASE 3.2 — Specific Word-split Verification")
+    print("=" * 70)
+    cases = [
+        {
+            "input": "ولذالك قررت السفر",
+            "target_word": "ولذالك",
+            "expected_correct": "ولذلك",
+            "bad_split": "ولذا ذلك",
+            "desc": "ولذالك should become ولذلك, NOT 'ولذا ذلك'"
+        },
+        {
+            "input": "المستشفياتهم كبيرة",
+            "target_word": "المستشفياتهم",
+            "expected_correct": "مستشفياتهم",
+            "bad_split": "في مستشفيات هم",
+            "desc": "مستشفياتهم should NOT be split into 'في مستشفيات هم'"
+        },
+    ]
+    results = []
+    for case in cases:
+        r = api_call("/api/analyze", case["input"])
+        corrected = r.get("corrected", case["input"])
+        suggestions = r.get("suggestions", [])
+        has_bad_split = case["bad_split"] in corrected
+        has_good_correction = case["expected_correct"] in corrected
+        result = {
+            "input": case["input"],
+            "corrected": corrected,
+            "bad_split_present": has_bad_split,
+            "good_correction_present": has_good_correction,
+        }
+        results.append(result)
+        print(f"\n  {case['desc']}")
+        print(f"    Input:     '{case['input']}'")
+        print(f"    Corrected: '{corrected}'")
+        if has_bad_split:
+            print(f"    ❌ BAD SPLIT detected: '{case['bad_split']}'")
+        elif has_good_correction:
+            print(f"    ✅ Correctly fixed to '{case['expected_correct']}'")
+        else:
+            print(f"    ⚠ Neither expected correction nor bad split found")
+    return results
+# ══════════════════════════════════════════════════════════════════════
+# Phase 5.5 — Constructed Dual-correction Cases
+# ══════════════════════════════════════════════════════════════════════
+def test_dual_corrections():
+    print("\n" + "=" * 70)
+    print("PHASE 5.5 — Constructed Dual-correction Cases")
+    print("=" * 70)
+    # Cases where spelling AND grammar would both want to change words
+    cases = [
+        {
+            "input": "الطالبه كتبو الوجبات",
+            "desc": "Spelling: الطالبه→الطالبة, Grammar: كتبو→كتبوا + possibly الوجبات→الواجبات",
+        },
+        {
+            "input": "هو ذهبو الي البيت",
+            "desc": "Spelling: الي→إلى, Grammar: ذهبو→ذهب (singular subject هو)",
+        },
+        {
+            "input": "الطلاب اجتهدو في امتحانتهم",
+            "desc": "Spelling: امتحانتهم→امتحاناتهم, Grammar: اجتهدو→اجتهدوا",
+        },
+    ]
+    results = []
+    for case in cases:
+        r = api_call("/api/analyze", case["input"])
+        corrected = r.get("corrected", case["input"])
+        suggestions = r.get("suggestions", [])
+        # Check for text duplication
+        words = corrected.split()
+        has_duplicate = any(i > 0 and words[i] == words[i-1] for i in range(len(words)))
+        # Check for dropped words (output should have ≈ same word count ±1)
+        input_words = case["input"].split()
+        word_diff = len(words) - len(input_words)
+        result = {
+            "input": case["input"],
+            "corrected": corrected,
+            "suggestions": len(suggestions),
+            "has_duplicate": has_duplicate,
+            "word_count_diff": word_diff,
+        }
+        results.append(result)
+        print(f"\n  {case['desc']}")
+        print(f"    Input:     '{case['input']}'")
+        print(f"    Corrected: '{corrected}'")
+        print(f"    Suggestions: {len(suggestions)}")
+        if has_duplicate:
+            print(f"    ❌ DUPLICATE WORDS detected in output!")
+        else:
+            print(f"    ✅ No duplicate words")
+        if abs(word_diff) > 2:
+            print(f"    ⚠ Word count diff: {word_diff} (possible drop/duplication)")
+        else:
+            print(f"    ✅ Word count reasonable (diff={word_diff})")
+        for s in suggestions:
+            print(f"      [{s.get('start')}:{s.get('end')}] {s.get('type')}: '{s.get('original','')}' → '{s.get('correction','')}'")
+    return results
+# ══════════════════════════════════════════════════════════════════════
+# Phase 6.3 — BUG-017 Re-test (Intermittent Empty Response)
+# ══════════════════════════════════════════════════════════════════════
+def test_bug017():
+    print("\n" + "=" * 70)
+    print("PHASE 6.3 — BUG-017 Re-test (Intermittent Empty Response)")
+    print("=" * 70)
+    # Send the same input 5 times rapidly and check for empty responses
+    test_input = "الحديقه جميله والأزهار متفتحه"
+    empty_count = 0
+    error_count = 0
+    results = []
+    for i in range(5):
+        r = api_call("/api/analyze", test_input, timeout=30)
+        corrected = r.get("corrected", "")
+        suggestions = r.get("suggestions", [])
+        status = r.get("status", "")
+        warnings = r.get("warnings", {})
+        is_empty = (corrected == test_input and len(suggestions) == 0)
+        is_error = "error" in r and "status" not in r
+        if is_empty:
+            empty_count += 1
+        if is_error:
+            error_count += 1
+        result = {
+            "attempt": i + 1,
+            "corrected": corrected,
+            "suggestions": len(suggestions),
+            "status": status,
+            "warnings": warnings,
+            "is_empty": is_empty,
+            "is_error": is_error,
+        }
+        results.append(result)
+        status_str = "❌ EMPTY" if is_empty else ("❌ ERROR" if is_error else "✅ OK")
+        print(f"  Attempt {i+1}: {status_str} — suggestions={len(suggestions)}, status='{status}'")
+        if warnings:
+            print(f"    Warnings: {warnings}")
+        if is_error:
+            print(f"    Error: {r.get('error', '?')}")
+    print(f"\n  Empty responses: {empty_count}/5")
+    print(f"  Error responses: {error_count}/5")
+    if empty_count > 0:
+        print(f"  ⚠ BUG-017 may still be present!")
+    else:
+        print(f"  ✅ No empty responses detected")
+    return {
+        "empty_count": empty_count,
+        "error_count": error_count,
+        "results": results,
+    }
+# ══════════════════════════════════════════════════════════════════════
+# Phase 6.4 — 187-word Long Input Regression
+# ══════════════════════════════════════════════════════════════════════
+def test_long_input_regression():
+    print("\n" + "=" * 70)
+    print("PHASE 6.4 — 187-word Long Input Regression")
+    print("=" * 70)
+    long_text = (
+        "في ظل التطورات التكنولوجية المتسارعة التي يشهدها العالم اليوم أصبح من الضروري "
+        "أن نواكب هذه التغييرات ونتكيف معها بشكل فعال حيث تلعب التكنولوجيا دورا محوريا "
+        "في مختلف جوانب حياتنا اليومية بدءا من التعليم والصحة وصولا إلى الاقتصاد والسياسة "
+        "ولقد أدى الذكاء الاصطناعي إلى تحولات جذرية في طريقة عمل المؤسسات والشركات حيث "
+        "باتت الآلات قادرة على أداء مهام كانت حكرا على البشر مما يطرح تساؤلات عديدة حول "
+        "مستقبل سوق العمل والوظائف التقليدية كما أن التحول الرقمي فرض على الحكومات والمجتمعات "
+        "إعادة النظر في سياساتها التعليمية والاقتصادية لضمان مواكبة هذا التطور السريع وفي هذا "
+        "السياق يبرز دور البحث العلمي والابتكار كعاملين أساسيين في دفع عجلة التنمية المستدامة "
+        "وتحقيق الرفاهية للمجتمعات البشرية إذ لا يمكن لأي دولة أن تحقق تقدما حقيقيا دون "
+        "الاستثمار في العقول البشرية وتوفير بيئة محفزة للإبداع والابتكار ومن هنا تأتي أهمية "
+        "التعاون الدولي في مجال البحث العلمي وتبادل الخبرات والمعارف بين الدول المتقدمة والنامية "
+        "على حد سواء لتحقيق التنمية الشاملة والمستدامة التي تعود بالنفع على جميع شعوب العالم"
+    )
+    print(f"  Input: {len(long_text)} chars, {len(long_text.split())} words")
+    r = api_call("/api/analyze", long_text, timeout=120)
+    status = r.get("status", "")
+    corrected = r.get("corrected", "")
+    suggestions = r.get("suggestions", [])
+    warnings = r.get("warnings", {})
+    timing = r.get("timing_ms", {})
+    if "error" in r and "status" not in r:
+        print(f"  ❌ ERROR: {r['error']}")
+        result_status = "error"
+    elif status == "partial":
+        print(f"  ⚠ PARTIAL: some stages failed")
+        print(f"    Warnings: {warnings}")
+        result_status = "partial"
+    elif status == "success":
+        print(f"  ✅ SUCCESS")
+        result_status = "success"
+    else:
+        print(f"  ⚠ UNKNOWN STATUS: '{status}'")
+        result_status = "unknown"
+    print(f"  Elapsed: {r.get('_elapsed_ms', '?')}ms")
+    print(f"  Timing: {timing}")
+    print(f"  Suggestions: {len(suggestions)}")
+    print(f"  Corrected == Original: {corrected == long_text}")
+    # Key check: response should NOT be silently empty
+    is_silently_empty = (status == "success" and corrected == long_text and len(suggestions) == 0)
+    if is_silently_empty:
+        print(f"  ⚠ Silently empty! This is the BUG-032 behavior we're preventing.")
+    else:
+        print(f"  ✅ Response is either successful with results or properly flagged as partial/error")
+    return {
+        "input_chars": len(long_text),
+        "input_words": len(long_text.split()),
+        "status": result_status,
+        "suggestions": len(suggestions),
+        "warnings": warnings,
+        "timing": timing,
+        "elapsed_ms": r.get("_elapsed_ms"),
+        "is_silently_empty": is_silently_empty,
+    }
+# ══════════════════════════════════════════════════════════════════════
+# Phase 7.1 — BUG-018 Precise Tracing
+# ══════════════════════════════════════════════════════════════════════
+def test_bug018_tracing():
+    print("\n" + "=" * 70)
+    print("PHASE 7.1 — BUG-018 Precise Tracing (dropped ؛)")
+    print("=" * 70)
+    test_input = "قال المعلم للطلاب ادرسوا جيدا فالامتحان قريب"
+    print(f"  Input: '{test_input}'")
+    # Track A: Raw punctuation only
+    a = api_call("/api/punctuation", test_input)
+    a_out = a.get("corrected_text", test_input)
+    has_semicolon_raw = "؛" in a_out
+    print(f"\n  Raw punctuation output: '{a_out}'")
+    print(f"  Has ؛: {has_semicolon_raw}")
+    # Track B: Full pipeline
+    b = api_call("/api/analyze", test_input)
+    b_out = b.get("corrected", test_input)
+    b_sugg = b.get("suggestions", [])
+    has_semicolon_pipeline = "؛" in b_out
+    print(f"\n  Pipeline output: '{b_out}'")
+    print(f"  Has ؛: {has_semicolon_pipeline}")
+    print(f"  Suggestions: {len(b_sugg)}")
+    for s in b_sugg:
+        print(f"    [{s.get('start')}:{s.get('end')}] {s.get('type')}: '{s.get('original','')}' → '{s.get('correction','')}'")
+    # Determine drop cause
+    if has_semicolon_raw and not has_semicolon_pipeline:
+        # Raw produced it but pipeline dropped it
+        punc_suggestions = [s for s in b_sugg if s.get('type') == 'punctuation']
+        total_punc = len(punc_suggestions)
+        if total_punc >= 3:
+            cause = "MAX_PUNC_PATCHES_PER_RESPONSE cap (3 patches, ؛ was 4th+)"
+        else:
+            # Check if any grammar suggestion overlaps the ؛ position
+            cause = "StageLocker or validate_punctuation_diff rejection"
+        print(f"\n  DIAGNOSIS: ؛ was produced by raw model but dropped by pipeline")
+        print(f"  Likely cause: {cause}")
+    elif not has_semicolon_raw:
+        cause = "Raw punctuation model did NOT produce ؛ at all"
+        print(f"\n  DIAGNOSIS: {cause} — not a pipeline bug")
+    else:
+        cause = "؛ present in both raw and pipeline — BUG-018 not reproduced"
+        print(f"\n  DIAGNOSIS: {cause}")
+    return {
+        "input": test_input,
+        "raw_output": a_out,
+        "pipeline_output": b_out,
+        "has_semicolon_raw": has_semicolon_raw,
+        "has_semicolon_pipeline": has_semicolon_pipeline,
+        "diagnosis": cause,
+        "pipeline_punc_count": len([s for s in b_sugg if s.get('type') == 'punctuation']),
+    }
+# ══════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════
+if __name__ == "__main__":
+    print("BAYAN — Gap-filling Live Tests\n")
+    all_results["phase_1_3"] = test_category9_live()
+    all_results["phase_2"] = test_rare_vocabulary()
+    all_results["phase_3_2"] = test_word_splits()
+    all_results["phase_5_5"] = test_dual_corrections()
+    all_results["phase_6_3"] = test_bug017()
+    all_results["phase_6_4"] = test_long_input_regression()
+    all_results["phase_7_1"] = test_bug018_tracing()
+    # Save all results
+    output_path = os.path.join(os.path.dirname(__file__), 'gap_filling_results.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(all_results, f, ensure_ascii=False, indent=2)
+    print(f"\n\nAll results saved to {output_path}")

archive/old_tests/phase0_investigation.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""
+Phase 0 — Investigation Script
+Tests:
+  0.1 — ان→أن in sentence context vs isolation
+  0.3 — BUG-032 (long text) with detailed error capture
+  0.4 — BUG-031 sentence (already resolved: الطلاب = plural → اللذين is wrong)
+"""
+import sys, os, json, time, requests
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 90
+def api_call(endpoint, text):
+    url = f"{API_BASE}{endpoint}"
+    try:
+        t0 = time.time()
+        resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
+        elapsed = int((time.time() - t0) * 1000)
+        if resp.status_code == 200:
+            data = resp.json()
+            data['_elapsed_ms'] = elapsed
+            return data
+        return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
+    except Exception as e:
+        return {"error": f"{type(e).__name__}: {e}"}
+def test_0_1():
+    """0.1 — Does spelling correct ان→أن in sentence context?"""
+    print("=" * 70)
+    print("PHASE 0.1 — ان→أن contradiction test")
+    print("=" * 70)
+    tests = [
+        ("ان (isolation)", "ان"),
+        ("ان الحياة جميلة (sentence)", "ان الحياة جميلة"),
+        ("ان الذكاء مهم (sentence)", "ان الذكاء مهم"),
+        ("قال ان الحق واضح (mid-sentence)", "قال ان الحق واضح"),
+    ]
+    results = []
+    for label, text in tests:
+        # Track A: raw spelling model
+        a = api_call("/api/spelling", text)
+        a_out = a.get("corrected_text", text)
+        a_changed = a_out != text
+        # Track B: full pipeline
+        b = api_call("/api/analyze", text)
+        b_out = b.get("corrected", text)
+        b_sugg = b.get("suggestions", [])
+        result = {
+            "label": label, "input": text,
+            "raw_spelling": a_out, "raw_changed": a_changed,
+            "pipeline_corrected": b_out,
+            "pipeline_suggestions": len(b_sugg),
+        }
+        results.append(result)
+        print(f"\n  {label}:")
+        print(f"    Input:      '{text}'")
+        print(f"    Raw spell:  '{a_out}' (changed={a_changed})")
+        print(f"    Pipeline:   '{b_out}' (suggestions={len(b_sugg)})")
+        # Check if ان was corrected to أن or إن
+        if 'أن' in a_out or 'إن' in a_out:
+            print(f"    ✅ Raw spelling DID correct ان")
+        elif a_changed:
+            print(f"    ⚠ Raw spelling changed but NOT to أن/إن")
+        else:
+            print(f"    ❌ Raw spelling did NOT correct ان")
+    # Verdict
+    print("\n" + "-" * 50)
+    isolation = results[0]
+    sentences = results[1:]
+    iso_fixed = 'أن' in isolation['raw_spelling'] or 'إن' in isolation['raw_spelling']
+    sent_fixed = any('أن' in r['raw_spelling'] or 'إن' in r['raw_spelling'] for r in sentences)
+    if iso_fixed and sent_fixed:
+        verdict = "WORKS in both isolation AND sentence context"
+    elif iso_fixed and not sent_fixed:
+        verdict = "WORKS in isolation ONLY, FAILS in sentence context"
+    elif not iso_fixed:
+        verdict = "FAILS in both isolation and sentence context"
+    else:
+        verdict = "Inconsistent"
+    print(f"  FINAL VERDICT: {verdict}")
+    return {"verdict": verdict, "results": results}
+def test_0_3():
+    """0.3 — BUG-032: Long text (187 words / 1104 chars)"""
+    print("\n" + "=" * 70)
+    print("PHASE 0.3 — BUG-032 long text test")
+    print("=" * 70)
+    # 187-word Arabic text (from deep-dive report)
+    long_text = (
+        "في ظل التطورات التكنولوجية المتسارعة التي يشهدها العالم اليوم أصبح من الضروري "
+        "أن نواكب هذه التغييرات ونتكيف معها بشكل فعال حيث تلعب التكنولوجيا دورا محوريا "
+        "في مختلف جوانب حياتنا اليومية بدءا من التعليم والصحة وصولا إلى الاقتصاد والسياسة "
+        "ولقد أدى الذكاء الاصطناعي إلى تحولات جذرية في طريقة عمل المؤسسات والشركات حيث "
+        "باتت الآلات قادرة على أداء مهام كانت حكرا على البشر مما يطرح تساؤلات عديدة حول "
+        "مستقبل سوق العمل والوظائف التقليدية كما أن التحول الرقمي فرض على الحكومات والمجتمعات "
+        "إعادة النظر في سياساتها التعليمية والاقتصادية لضمان مواكبة هذا التطور السريع وفي هذا "
+        "السياق يبرز دور البحث العلمي والابتكار كعاملين أساسيين في دفع عجلة التنمية المستدامة "
+        "وتحقيق الرفاهية للمجتمعات البشرية إذ لا يمكن لأي دولة أن تحقق تقدما حقيقيا دون "
+        "الاستثمار في العقول ��لبشرية وتوفير بيئة محفزة للإبداع والابتكار ومن هنا تأتي أهمية "
+        "التعاون الدولي في مجال البحث العلمي وتبادل الخبرات والمعارف بين الدول المتقدمة والنامية "
+        "على حد سواء لتحقيق التنمية الشاملة والمستدامة التي تعود بالنفع على جميع شعوب العالم"
+    )
+    print(f"  Input length: {len(long_text)} chars, {len(long_text.split())} words")
+    # Test all three individual endpoints
+    print("\n  Testing /api/spelling...")
+    a_spell = api_call("/api/spelling", long_text)
+    print(f"    Status: {'error' if 'error' in a_spell else 'OK'}")
+    if 'error' in a_spell:
+        print(f"    Error: {a_spell['error']}")
+    else:
+        print(f"    Elapsed: {a_spell.get('_elapsed_ms', '?')}ms")
+        print(f"    Changed: {a_spell.get('corrected_text', '') != long_text}")
+    print("\n  Testing /api/grammar...")
+    a_gram = api_call("/api/grammar", long_text)
+    print(f"    Status: {'error' if 'error' in a_gram else 'OK'}")
+    if 'error' in a_gram:
+        print(f"    Error: {a_gram['error']}")
+    else:
+        print(f"    Elapsed: {a_gram.get('_elapsed_ms', '?')}ms")
+        print(f"    Changed: {a_gram.get('corrected_text', '') != long_text}")
+    print("\n  Testing /api/punctuation...")
+    a_punc = api_call("/api/punctuation", long_text)
+    print(f"    Status: {'error' if 'error' in a_punc else 'OK'}")
+    if 'error' in a_punc:
+        print(f"    Error: {a_punc['error']}")
+    else:
+        print(f"    Elapsed: {a_punc.get('_elapsed_ms', '?')}ms")
+        print(f"    Changed: {a_punc.get('corrected_text', '') != long_text}")
+    print("\n  Testing /api/analyze (full pipeline)...")
+    b = api_call("/api/analyze", long_text)
+    print(f"    Status: {'error' if 'error' in b and 'status' not in b else b.get('status', '?')}")
+    if 'error' in b and 'status' not in b:
+        print(f"    Error: {b['error']}")
+    else:
+        print(f"    Elapsed: {b.get('_elapsed_ms', '?')}ms")
+        print(f"    Suggestions: {len(b.get('suggestions', []))}")
+        print(f"    Timing: {b.get('timing_ms', {})}")
+        if b.get('corrected') == long_text:
+            print(f"    ⚠ corrected == original (no changes or silent failure?)")
+    return {
+        "input_chars": len(long_text),
+        "input_words": len(long_text.split()),
+        "spelling": {"error": a_spell.get("error"), "elapsed": a_spell.get("_elapsed_ms")},
+        "grammar": {"error": a_gram.get("error"), "elapsed": a_gram.get("_elapsed_ms")},
+        "punctuation": {"error": a_punc.get("error"), "elapsed": a_punc.get("_elapsed_ms")},
+        "pipeline": {
+            "error": b.get("error"),
+            "status": b.get("status"),
+            "suggestions": len(b.get("suggestions", [])),
+            "timing": b.get("timing_ms", {}),
+            "elapsed": b.get("_elapsed_ms"),
+        }
+    }
+def test_0_4():
+    """0.4 — BUG-031: اللذين vs الذين"""
+    print("\n" + "=" * 70)
+    print("PHASE 0.4 — BUG-031 (اللذين vs الذين)")
+    print("=" * 70)
+    sentence = "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه"
+    print(f"  Test sentence: '{sentence}'")
+    print(f"  Subject: الطلاب (PLURAL, not dual)")
+    print(f"  Therefore: اللذين (dual) is WRONG, الذين (plural) is CORRECT")
+    print(f"  Verdict: BUG-031 IS a real bug — grammar should correct اللذين→الذين")
+    # Test it
+    a_gram = api_call("/api/grammar", sentence)
+    a_out = a_gram.get("corrected_text", sentence)
+    print(f"\n  Grammar model output: '{a_out}'")
+    if 'الذين' in a_out and 'اللذين' not in a_out:
+        print(f"  ✅ Grammar DID correct اللذين→الذين")
+        bug_status = "fixed_by_model"
+    elif 'اللذين' in a_out:
+        print(f"  ❌ Grammar did NOT correct اللذين (left as dual)")
+        bug_status = "still_broken"
+    else:
+        print(f"  ⚠ Unexpected output")
+        bug_status = "unclear"
+    return {
+        "sentence": sentence,
+        "subject": "الطلاب (PLURAL)",
+        "correct_form": "الذين (plural)",
+        "is_real_bug": True,
+        "grammar_output": a_out,
+        "bug_status": bug_status,
+    }
+if __name__ == "__main__":
+    print("BAYAN Phase 0 — Investigation\n")
+    all_results = {}
+    all_results["phase_0_1"] = test_0_1()
+    all_results["phase_0_3"] = test_0_3()
+    all_results["phase_0_4"] = test_0_4()
+    # Save results
+    output_path = os.path.join(os.path.dirname(__file__), 'phase0_results.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(all_results, f, ensure_ascii=False, indent=2)
+    print(f"\nResults saved to {output_path}")

archive/old_tests/phase0_results.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "phase_0_1": {
+    "verdict": "WORKS in isolation ONLY, FAILS in sentence context",
+    "results": [
+      {
+        "label": "ان (isolation)",
+        "input": "ان",
+        "raw_spelling": "أن",
+        "raw_changed": true,
+        "pipeline_corrected": "إن.",
+        "pipeline_suggestions": 1
+      },
+      {
+        "label": "ان الحياة جميلة (sentence)",
+        "input": "ان الحياة جميلة",
+        "raw_spelling": "ان الحياة جميلة",
+        "raw_changed": false,
+        "pipeline_corrected": "إن الحياة جميلة!",
+        "pipeline_suggestions": 2
+      },
+      {
+        "label": "ان الذكاء مهم (sentence)",
+        "input": "ان الذكاء مهم",
+        "raw_spelling": "ان الذكاء مهم",
+        "raw_changed": false,
+        "pipeline_corrected": "إن الذكاء مهم.",
+        "pipeline_suggestions": 2
+      },
+      {
+        "label": "قال ان الحق واضح (mid-sentence)",
+        "input": "قال ان الحق واضح",
+        "raw_spelling": "قال ان الحق واضح",
+        "raw_changed": false,
+        "pipeline_corrected": "قال ان: الحق واضح",
+        "pipeline_suggestions": 1
+      }
+    ]
+  },
+  "phase_0_3": {
+    "input_chars": 982,
+    "input_words": 159,
+    "spelling": {
+      "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
+      "elapsed": null
+    },
+    "grammar": {
+      "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
+      "elapsed": null
+    },
+    "punctuation": {
+      "error": "ReadTimeout: HTTPSConnectionPool(host='bayan10-bayan-api.hf.space', port=443): Read timed out. (read timeout=90)",
+      "elapsed": null
+    },
+    "pipeline": {
+      "error": null,
+      "status": "success",
+      "suggestions": 4,
+      "timing": {
+        "grammar_ms": 12179,
+        "punctuation_ms": 12237,
+        "spelling_ms": 0,
+        "total_ms": 24420
+      },
+      "elapsed": 54892
+    }
+  },
+  "phase_0_4": {
+    "sentence": "الطلاب اللذين اجتهدو في دراستهم حققو نتائج ممتازه في الأمتحانات الصعبه",
+    "subject": "الطلاب (PLURAL)",
+    "correct_form": "الذين (plural)",
+    "is_real_bug": true,
+    "grammar_output": "الطلاب الذين اجتهدو في دراستهم حققوا نتائج ممتازة في الامتحانات الصعبة",
+    "bug_status": "fixed_by_model"
+  }
+}

archive/old_tests/phase10_helpers/audit_output.txt ADDED Viewed

	@@ -0,0 +1,339 @@

+=== Section 1 & 2: Counts & Categories ===
+Spelling (80 samples):
+  hamza: 25
+  hamza_prefix: 5
+  ta_marbuta: 10
+  ta_marbuta_prefix: 5
+  alif_maqsura: 8
+  word_split: 7
+  correct_text: 15
+  multi_error: 5
+Grammar (45 samples):
+  sv_agree: 10
+  gender: 5
+  case: 5
+  five_nouns: 4
+  dual: 2
+  nasb: 4
+  correct: 15
+Punctuation (20 samples):
+  missing_period: 3
+  missing_question: 3
+  missing_comma: 2
+  missing_multi: 2
+  already_correct: 5
+  word_preservation: 2
+  dialogue: 1
+  enumeration: 1
+  exclamation: 1
+Entities (30 samples):
+  person: 10
+  place: 8
+  company: 5
+  tech: 7
+Religious (30 samples):
+  basmalah: 1
+  fatiha: 3
+  ikhlas: 1
+  qadr: 1
+  falaq: 1
+  nas: 1
+  baqara: 2
+  kursi: 1
+  shahada: 2
+  hadith: 5
+  dua: 4
+  hamdalah: 1
+  tasbih: 1
+  salawat: 1
+  istighfar: 1
+  takbir: 1
+  inna: 1
+  bismillah: 1
+  salam: 1
+Structured (35 samples):
+  url: 4
+  email: 3
+  date: 3
+  time: 3
+  number: 3
+  currency: 2
+  measurement: 3
+  code: 3
+  sql: 1
+  json: 1
+  hashtag: 2
+  mention: 2
+  phone: 2
+  ip: 1
+  version: 1
+  filepath: 1
+Hallucination (30 samples):
+  news: 5
+  academic: 5
+  technical: 3
+  legal: 2
+  literary: 3
+  correct_simple: 7
+  correct_compound: 5
+=== Section 3: Lengths ===
+Spelling: Avg=3.4, Med=3, Max=5, Min=2 | 1w:0, <5:80, <15:0, <30:0, >30:0
+Grammar: Avg=3.7, Med=4, Max=5, Min=3 | 1w:0, <5:45, <15:0, <30:0, >30:0
+Punctuation: Avg=5.3, Med=5, Max=8, Min=4 | 1w:0, <5:12, <15:8, <30:0, >30:0
+Entities: Avg=4.2, Med=4, Max=6, Min=3 | 1w:0, <5:29, <15:1, <30:0, >30:0
+Religious: Avg=6.9, Med=7, Max=12, Min=4 | 1w:0, <5:11, <15:19, <30:0, >30:0
+Structured: Avg=4.9, Med=5, Max=9, Min=2 | 1w:0, <5:24, <15:11, <30:0, >30:0
+Hallucination: Avg=8.7, Med=10, Max=12, Min=4 | 1w:0, <5:5, <15:25, <30:0, >30:0
+=== Section 4: Synthetic Patterns ===
+Spelling: 0 exact duplicates. Unique=80/80
+Grammar: 0 exact duplicates. Unique=45/45
+Punctuation: 0 exact duplicates. Unique=20/20
+Entities: 0 exact duplicates. Unique=30/30
+Religious: 0 exact duplicates. Unique=30/30
+Structured: 0 exact duplicates. Unique=35/35
+Hallucination: 0 exact duplicates. Unique=30/30
+=== Section 10: Random Samples for Review ===
+--- Spelling (20 samples) ---
+[1] ID: S015 | Cat: hamza
+    In : ╪º┘å┘å╪º ┘å╪¡╪¿ ╪º┘ä┘ê╪╖┘å
+    Exp: ╪Ñ┘å┘å╪º ┘å╪¡╪¿ ╪º┘ä┘ê╪╖┘å
+[2] ID: S004 | Cat: hamza
+    In : ┘ä╪º┘å ╪º┘ä╪ú┘à╪▒ ┘è╪¬╪╣┘ä┘é ╪¿╪º┘ä┘à╪│╪¬┘é╪¿┘ä
+    Exp: ┘ä╪ú┘å ╪º┘ä╪ú┘à╪▒ ┘è╪¬╪╣┘ä┘é ╪¿╪º┘ä┘à╪│╪¬┘é╪¿┘ä
+[3] ID: S036 | Cat: ta_marbuta
+    In : ╪º┘ä┘à┘â╪¬╪¿┘ç ┘é╪▒┘è╪¿┘ç ┘à┘å ╪º┘ä╪¿┘è╪¬
+    Exp: ╪º┘ä┘à┘â╪¬╪¿╪⌐ ┘é╪▒┘è╪¿╪⌐ ┘à┘å ╪º┘ä╪¿┘è╪¬
+[4] ID: S032 | Cat: ta_marbuta
+    In : ╪º┘ä╪¼╪º┘à╪╣┘ç ┘ü┘è ╪º┘ä┘é╪º┘ç╪▒┘ç
+    Exp: ╪º┘ä╪¼╪º┘à╪╣╪⌐ ┘ü┘è ╪º┘ä┘é╪º┘ç╪▒╪⌐
+[5] ID: S029 | Cat: hamza_prefix
+    In : ┘â╪º┘ä╪º╪╖┘ü╪º┘ä ┘ü┘è ╪º┘ä┘ä╪╣╪¿
+    Exp: ┘â╪º┘ä╪ú╪╖┘ü╪º┘ä ┘ü┘è ╪º┘ä┘ä╪╣╪¿
+[6] ID: S018 | Cat: hamza
+    In : ╪º╪▒╪│┘ä ╪º┘ä╪▒╪│╪º┘ä╪⌐ ┘ü┘ê╪▒╪º┘ï
+    Exp: ╪ú╪▒╪│┘ä ╪º┘ä╪▒╪│╪º┘ä╪⌐ ┘ü┘ê╪▒╪º┘ï
+[7] ID: S014 | Cat: hamza
+    In : ╪º┘å╪¬ ╪╖╪º┘ä╪¿ ┘à╪¼╪¬┘ç╪»
+    Exp: ╪ú┘å╪¬ ╪╖╪º┘ä╪¿ ┘à╪¼╪¬┘ç╪»
+[8] ID: S070 | Cat: correct_text
+    In : ╪º┘ä╪╣┘ä┘à ┘å┘ê╪▒ ┘ê╪º┘ä╪¼┘ç┘ä ╪╕┘ä╪º┘à
+    Exp: ╪º┘ä╪╣┘ä┘à ┘å┘ê╪▒ ┘ê╪º┘ä╪¼┘ç┘ä ╪╕┘ä╪º┘à
+[9] ID: S012 | Cat: hamza
+    In : ╪º╪«┘è╪▒╪º┘ï ┘ê╪╡┘ä┘å╪º ╪Ñ┘ä┘ë ╪º┘ä┘ç╪»┘ü
+    Exp: ╪ú╪«┘è╪▒╪º┘ï ┘ê╪╡┘ä┘å╪º ╪Ñ┘ä┘ë ╪º┘ä┘ç╪»┘ü
+[10] ID: S055 | Cat: word_split
+    In : ╪«╪▒╪¼ ┘à┘å╪º┘ä┘à╪»╪▒╪│╪⌐
+    Exp: ╪«╪▒╪¼ ┘à┘å ╪º┘ä┘à╪»╪▒╪│╪⌐
+[11] ID: S005 | Cat: hamza
+    In : ╪º┘è┘å ╪░┘ç╪¿╪¬ ╪ú┘à╪│
+    Exp: ╪ú┘è┘å ╪░┘ç╪¿╪¬ ╪ú┘à╪│
+[12] ID: S079 | Cat: multi_error
+    In : ╪º┘è┘å ╪º┘ä╪¼╪º┘à╪╣┘ç ╪º┘ä┘â╪¿┘è╪▒┘ç
+    Exp: ╪ú┘è┘å ╪º┘ä╪¼╪º┘à╪╣╪⌐ ╪º┘ä┘â╪¿┘è╪▒╪⌐
+[13] ID: S072 | Cat: correct_text
+    In : ╪º┘ä┘à╪╣┘ä┘à ┘è╪┤╪▒╪¡ ╪º┘ä╪»╪▒╪│
+    Exp: ╪º┘ä┘à╪╣┘ä┘à ┘è╪┤╪▒╪¡ ���º┘ä╪»╪▒╪│
+[14] ID: S028 | Cat: hamza_prefix
+    In : ┘ü╪º┘ä╪º┘å╪│╪º┘å ┘è╪¡╪¬╪º╪¼ ┘ä┘ä╪╣┘ä┘à
+    Exp: ┘ü╪º┘ä╪Ñ┘å╪│╪º┘å ┘è╪¡╪¬╪º╪¼ ┘ä┘ä╪╣┘ä┘à
+[15] ID: S030 | Cat: hamza_prefix
+    In : ┘ä┘ä╪º╪│┘ü ┘ä┘à ┘è┘å╪¼╪¡
+    Exp: ┘ä┘ä╪ú╪│┘ü ┘ä┘à ┘è┘å╪¼╪¡
+[16] ID: S065 | Cat: correct_text
+    In : ╪Ñ┘ä┘ë ╪º┘ä┘ä┘é╪º╪í ┘è╪º ╪╡╪»┘è┘é┘è
+    Exp: ╪Ñ┘ä┘ë ╪º┘ä┘ä┘é╪º╪í ┘è╪º ╪╡╪»┘è┘é┘è
+[17] ID: S069 | Cat: correct_text
+    In : ╪º┘ä╪╖╪º┘ä╪¿ ╪º┘ä┘à╪¼╪¬┘ç╪» ┘è┘å╪¼╪¡ ╪»╪º╪ª┘à╪º┘ï
+    Exp: ╪º┘ä╪╖╪º┘ä╪¿ ╪º┘ä┘à╪¼╪¬┘ç╪» ┘è┘å╪¼╪¡ ╪»╪º╪ª┘à╪º┘ï
+[18] ID: S078 | Cat: multi_error
+    In : ┘ä╪º┘å ╪º┘ä┘à╪»╪▒╪│┘ç ╪¿╪╣┘è╪»┘ç ╪¼╪»╪º┘ï
+    Exp: ┘ä╪ú┘å ╪º┘ä┘à╪»╪▒╪│╪⌐ ╪¿╪╣┘è╪»╪⌐ ╪¼╪»╪º┘ï
+[19] ID: S013 | Cat: hamza
+    In : ┘ê┘é┘ü ╪º┘à╪º┘à ╪º┘ä┘à╪»╪▒╪│╪⌐
+    Exp: ┘ê┘é┘ü ╪ú┘à╪º┘à ╪º┘ä┘à╪»╪▒╪│╪⌐
+[20] ID: S046 | Cat: alif_maqsura
+    In : ╪░┘ç╪¿╪¬ ╪º┘ä┘è ╪º┘ä┘à┘â╪¬╪¿╪⌐
+    Exp: ╪░┘ç╪¿╪¬ ╪Ñ┘ä┘ë ╪º┘ä┘à┘â╪¬╪¿╪⌐
+--- Grammar (20 samples) ---
+[1] ID: G042 | Cat: correct
+    In : ╪º┘ä╪ú╪╖┘ü╪º┘ä ┘è┘ä╪╣╪¿┘ê┘å ┘ü┘è ╪º┘ä╪¡╪»┘è┘é╪⌐
+    Fix:
+[2] ID: G035 | Cat: correct
+    In : ╪░┘ç╪¿╪¬ ╪º┘ä╪¿┘å╪º╪¬ ╪Ñ┘ä┘ë ╪º┘ä┘à╪»╪▒╪│╪⌐
+    Fix:
+[3] ID: G027 | Cat: nasb
+    In : ┘ä┘å ┘è╪░┘ç╪¿┘ê┘å ╪Ñ┘ä┘ë ╪º┘ä┘à╪»╪▒╪│╪⌐
+    Fix: ┘è╪░┘ç╪¿┘ê╪º
+[4] ID: G015 | Cat: gender
+    In : ╪º┘ä╪┤┘à╪│ ┘à╪┤╪▒┘é ╪º┘ä┘è┘ê┘à
+    Fix: ┘à╪┤╪▒┘é╪⌐
+[5] ID: G029 | Cat: nasb
+    In : ┘â┘è ┘è╪¬╪╣┘ä┘à┘ê┘å ╪º┘ä╪»╪▒╪│
+    Fix: ┘è╪¬╪╣┘ä┘à┘ê╪º
+[6] ID: G038 | Cat: correct
+    In : ┘è╪»╪▒╪│ ╪º┘ä╪╖╪º┘ä╪¿ ┘ü┘è ┘à┘â╪¬╪¿╪¬┘ç
+    Fix:
+[7] ID: G018 | Cat: case
+    In : ╪Ñ┘ä┘ë ╪º┘ä┘à╪│╪º┘ü╪▒┘ê┘å ┘ü┘è ╪º┘ä┘à╪╖╪º╪▒
+    Fix: ╪º┘ä┘à╪│╪º┘ü╪▒┘è┘å
+[8] ID: G001 | Cat: sv_agree
+    In : ╪º┘ä╪¿┘å╪º╪¬ ╪░┘ç╪¿ ╪Ñ┘ä┘ë ╪º┘ä┘à╪»╪▒╪│╪⌐
+    Fix: ╪░┘ç╪¿┘å/╪░┘ç╪¿╪¬
+[9] ID: G011 | Cat: gender
+    In : ╪º┘ä╪│┘è╪º╪▒╪⌐ ╪¼┘à┘è┘ä ╪¼╪»╪º┘ï
+    Fix: ╪¼┘à┘è┘ä╪⌐
+[10] ID: G028 | Cat: nasb
+    In : ┘ä┘à ┘è┘ü╪╣┘ä┘ê┘å ╪º┘ä┘ê╪º╪¼╪¿ ╪¿╪╣╪»
+    Fix: ┘è┘ü╪╣┘ä┘ê╪º
+[11] ID: G022 | Cat: five_nouns
+    In : ╪▒╪ú┘è╪¬ ╪ú╪«┘ê┘â ┘ü┘è ╪º┘ä┘à╪│╪¼╪»
+    Fix: ╪ú╪«╪º┘â
+[12] ID: G039 | Cat: correct
+    In : ╪¬╪╣┘à┘ä ╪º┘ä┘à╪▒╪ú╪⌐ ┘ü┘è ╪º┘ä╪┤╪▒┘â╪⌐
+    Fix:
+[13] ID: G010 | Cat: sv_agree
+    In : ╪º┘ä╪╖╪º┘ä╪¿╪º╪¬ ┘â╪¬╪¿ ╪º┘ä┘ê╪º╪¼╪¿
+    Fix: ┘â╪¬╪¿┘å
+[14] ID: G014 | Cat: gender
+    In : ╪º┘ä┘à╪»┘è┘å╪⌐ ┘â╪¿┘è╪▒ ┘ê┘ê╪º╪│╪╣
+    Fix: ┘â╪¿┘è╪▒╪⌐ ┘ê┘ê╪º╪│╪╣╪⌐
+[15] ID: G031 | Cat: correct
+    In : ╪░┘ç╪¿ ╪º┘ä╪╖╪º┘ä╪¿ ╪Ñ┘ä┘ë ╪º┘ä┘à╪»╪▒╪│╪⌐
+    Fix:
+[16] ID: G025 | Cat: dual
+    In : ┘ç╪░╪º┘å ╪º┘ä╪╖╪º┘ä╪¿╪¬╪º┘å ┘à╪¼╪¬┘ç╪»╪¬╪º┘å
+    Fix: ┘ç╪º╪¬╪º┘å
+[17] ID: G037 | Cat: correct
+    In : ╪░┘ç╪¿ ╪º┘ä╪▒╪¼┘ä ╪Ñ┘ä┘ë ╪╣┘à┘ä┘ç
+    Fix:
+[18] ID: G004 | Cat: sv_agree
+    In : ╪º┘ä╪▒╪¼╪º┘ä ┘è╪╣┘à┘ä ┘ü┘è ╪º┘ä┘à╪╡┘å╪╣
+    Fix: ┘è╪╣┘à┘ä┘ê┘å
+[19] ID: G003 | Cat: sv_agree
+    In : ╪º┘ä┘à┘ç┘å╪»╪│┘ê┘å ╪¡╪╢╪▒ ╪º┘ä╪º╪¼╪¬┘à╪º╪╣
+    Fix: ╪¡╪╢╪▒┘ê╪º
+[20] ID: G013 | Cat: gender
+    In : ╪º┘ä╪╖╪º┘ä╪¿╪⌐ ┘à╪¬┘ü┘ê┘é ┘ü┘è ╪»╪▒╪º╪│╪¬┘ç
+    Fix: ┘à╪¬┘ü┘ê┘é╪⌐/╪»╪▒╪º╪│╪¬┘ç╪º
+--- Punctuation (10 samples) ---
+[1] ID: P004 | Cat: missing_multi
+    In : ┘â┘è┘ü ╪¡╪º┘ä┘â ╪ú┘å╪º ╪¿╪«┘è╪▒ ┘ê╪º┘ä╪¡┘à╪» ┘ä┘ä┘ç
+[2] ID: P012 | Cat: already_correct
+    In : ┘â┘è┘ü ╪¡╪º┘ä┘â╪ƒ ╪ú┘å╪º ╪¿╪«┘è╪▒.
+[3] ID: P019 | Cat: enumeration
+    In : ╪ú╪¡╪¬╪º╪¼ ╪Ñ┘ä┘ë ╪«╪¿╪▓ ┘ê┘ä╪¿┘å ┘ê╪¼╪¿┘å ┘ê╪¿┘è╪╢
+[4] ID: P009 | Cat: missing_comma
+    In : ╪¼╪º╪í ╪ú╪¡┘à╪» ┘ê┘à╪¡┘à╪» ┘ê╪╣┘ä┘è
+[5] ID: P002 | Cat: missing_question
+    In : ┘ç┘ä ╪ú┘å╪¬ ╪¿╪«┘è╪▒ ┘è╪º ╪╡╪»┘è┘é┘è
+[6] ID: P018 | Cat: dialogue
+    In : ┘é╪º┘ä ╪ú╪¡┘à╪» ╪ú┘å╪º ╪│╪╣┘è╪» ╪¿┘ä┘é╪º╪ª┘â ┘è╪º ╪╡╪»┘è┘é┘è
+[7] ID: P008 | Cat: missing_question
+    In : ┘ä┘à╪º╪░╪º ┘ä┘à ╪¬╪¡╪╢╪▒ ╪ú┘à╪│
+[8] ID: P017 | Cat: word_preservation
+    In : ╪º┘å╪º ╪╖╪º┘ä╪¿ ┘ü┘è ╪º┘ä╪¼╪º┘à╪╣┘ç
+[9] ID: P016 | Cat: word_preservation
+    In : ╪░┘ç╪¿╪¬ ╪º┘ä┘è ╪º┘ä┘à╪»╪▒╪│┘ç ╪ú┘à╪│
+[10] ID: P007 | Cat: missing_question
+    In : ┘à╪º╪░╪º ╪¬╪▒┘è╪» ╪ú┘å ╪¬┘ü╪╣┘ä ╪º┘ä┘è┘ê┘à
+--- Entities (10 samples) ---
+[1] ID: E003 | Cat: person
+    In : ╪╣╪¿╪» ╪º┘ä╪▒╪¡┘à┘å ╪ú╪«┘è ╪º┘ä╪ú┘â╪¿╪▒
+[2] ID: E018 | Cat: place
+    In : ╪»┘à╪┤┘é ╪ú┘é╪»┘à ╪╣╪º╪╡┘à╪⌐ ┘ü┘è ╪º┘ä╪¬╪º╪▒┘è╪«
+[3] ID: E010 | Cat: person
+    In : ╪º╪¿┘å ╪│┘è┘å╪º ╪╣╪º┘ä┘à ╪╣╪▒╪¿┘è ┘à╪┤┘ç┘ê╪▒
+[4] ID: E027 | Cat: tech
+    In : ┘à┘å╪╡╪⌐ Node.js ┘ä┘ä╪«┘ê╪º╪»┘à
+[5] ID: E021 | Cat: company
+    In : ╪┤╪▒┘â╪⌐ Microsoft ╪¬┘å╪¬╪¼ ╪º┘ä╪¿╪▒┘à╪¼┘è╪º╪¬
+[6] ID: E020 | Cat: company
+    In : ╪┤╪▒┘â╪⌐ Google ╪╣┘à┘ä╪º┘é ╪º┘ä╪¬┘é┘å┘è╪⌐
+[7] ID: E012 | Cat: place
+    In : ┘à╪»┘è┘å╪⌐ ╪º┘ä╪▒┘è╪º╪╢ ╪╣╪º╪╡┘à╪⌐ ╪º┘ä┘à┘à┘ä┘â╪⌐
+[8] ID: E019 | Cat: company
+    In : ╪┤╪▒┘â╪⌐ OpenAI ╪¬╪╖┘ê╪▒ ╪º┘ä╪░┘â╪º╪í ╪º┘ä╪º╪╡╪╖┘å╪º╪╣┘è
+[9] ID: E007 | Cat: person
+    In : ╪º┘ä╪ú╪│╪¬╪º╪░ ╪╣┘à╪▒ ╪¿┘å ╪º┘ä╪«╪╖╪º╪¿ ╪╣╪º╪»┘ä
+[10] ID: E030 | Cat: tech
+    In : ╪«╪»┘à╪⌐ Docker ┘ä┘ä╪¡╪º┘ê┘è╪º╪¬
+--- Religious (10 samples) ---
+[1] ID: R002 | Cat: fatiha
+    In : ╪º┘ä╪¡┘à╪» ┘ä┘ä┘ç ╪▒╪¿ ╪º┘ä╪╣╪º┘ä┘à┘è┘å ╪º┘ä╪▒╪¡┘à┘å ╪º┘ä╪▒╪¡┘è┘à ┘à╪º┘ä┘â ┘è┘ê┘à ╪º┘ä╪»┘è┘å
+[2] ID: R022 | Cat: dua
+    In : ┘ä╪º ╪¡┘ê┘ä ┘ê┘ä╪º ┘é┘ê╪⌐ ╪Ñ┘ä╪º ╪¿╪º┘ä┘ä┘ç
+[3] ID: R008 | Cat: nas
+    In : ┘é┘ä ╪ú╪╣┘ê╪░ ╪¿╪▒╪¿ ╪º┘ä┘å╪º╪│ ┘à┘ä┘â ╪º┘ä┘å╪º╪│ ╪Ñ┘ä┘ç ╪º┘ä┘å╪º╪│
+[4] ID: R025 | Cat: salawat
+    In : ╪º┘ä┘ä┘ç┘à ╪╡┘ä ┘ê╪│┘ä┘à ╪╣┘ä┘ë ┘å╪¿┘è┘å╪º ┘à╪¡┘à╪»
+[5] ID: R010 | Cat: baqara
+    In : ╪º┘ä╪░┘è┘å ┘è╪ñ┘à┘å┘ê┘å ╪¿╪º┘ä╪║┘è╪¿ ┘ê┘è┘é┘è┘à┘ê┘å ╪º┘ä╪╡┘ä╪º╪⌐
+[6] ID: R003 | Cat: fatiha
+    In : ╪Ñ┘è╪º┘â ┘å╪╣╪¿╪» ┘ê╪Ñ┘è╪º┘â ┘å╪│╪¬╪╣┘è┘å
+[7] ID: R028 | Cat: inna
+    In : ╪Ñ┘å╪º ┘ä┘ä┘ç ┘ê╪Ñ┘å╪º ╪Ñ┘ä┘è┘ç ╪▒╪º╪¼╪╣┘ê┘å
+[8] ID: R004 | Cat: fatiha
+    In : ╪º┘ç╪»┘å╪º ╪º┘ä╪╡╪▒╪º╪╖ ╪º┘ä┘à╪│╪¬┘é┘è┘à ╪╡╪▒╪º╪╖ ╪º┘ä╪░┘è┘å ╪ú┘å╪╣┘à╪¬ ╪╣┘ä┘è┘ç┘à
+[9] ID: R013 | Cat: shahada
+    In : ╪ú╪┤┘ç╪» ╪ú┘å ┘ä╪º ╪Ñ┘ä┘ç ╪Ñ┘ä╪º ╪º┘ä┘ä┘ç ┘ê╪ú╪┤┘ç╪» ╪ú┘å ┘à╪¡┘à╪»╪º┘ï ╪▒╪│┘ê┘ä ╪º┘ä┘ä┘ç
+[10] ID: R009 | Cat: baqara
+    In : ╪░┘ä┘â ╪º┘ä┘â╪¬╪º╪¿ ┘ä╪º ╪▒┘è╪¿ ┘ü┘è┘ç ┘ç╪»┘ë ┘ä┘ä┘à╪¬┘é┘è┘å
+--- Structured (10 samples) ---
+[1] ID: SC030 | Cat: mention
+    In : ╪¬╪º╪¿╪╣ @bayan_app ┘ä┘ä╪¬╪¡╪»┘è╪½╪º╪¬
+[2] ID: SC024 | Cat: code
+    In : ╪º┘ä╪»╪º┘ä╪⌐ function test() {} ╪¬��╣┘à┘ä
+[3] ID: SC011 | Cat: time
+    In : ╪º┘ä╪│╪º╪╣╪⌐ 14:30 ╪╣╪╡╪▒╪º┘ï
+[4] ID: SC034 | Cat: version
+    In : ╪º┘ä╪Ñ╪╡╪»╪º╪▒ v2.1.0 ┘à╪¬╪º╪¡
+[5] ID: SC012 | Cat: time
+    In : ╪º┘ä┘à┘ê╪╣╪» ╪º┘ä╪│╪º╪╣╪⌐ 3:30 ┘à╪│╪º╪í┘ï
+[6] ID: SC007 | Cat: email
+    In : ╪¬┘ê╪º╪╡┘ä ╪╣╪¿╪▒ support@bayan.ai
+[7] ID: SC022 | Cat: code
+    In : ╪º╪│╪¬╪«╪»┘à print('┘à╪▒╪¡╪¿╪º') ┘ä┘ä╪╖╪¿╪º╪╣╪⌐
+[8] ID: SC009 | Cat: date
+    In : ╪º┘ä┘à┘ê╪╣╪» ┘è┘ê┘à 2026-06-22
+[9] ID: SC023 | Cat: code
+    In : ╪º┘ä┘à╪¬╪║┘è╪▒ const x = 5; ┘ü┘è ╪¼╪º┘ü╪º╪│┘â╪▒┘è╪¿╪¬
+[10] ID: SC029 | Cat: mention
+    In : ╪┤┘â╪▒╪º┘ï @mohamedatef ╪╣┘ä┘ë ╪º┘ä┘à╪│╪º╪╣╪»╪⌐
+--- Hallucination (10 samples) ---
+[1] ID: H021 | Cat: correct_simple
+    In : ╪º┘ä┘à╪╣┘ä┘à ┘è╪┤╪▒╪¡ ╪º┘ä╪»╪▒╪│ ╪¿┘ê╪╢┘ê╪¡.
+[2] ID: H003 | Cat: news
+    In : ╪ú┘â╪» ┘ê╪▓┘è╪▒ ╪º┘ä╪¬╪╣┘ä┘è┘à ╪ú┘å ╪º┘ä┘à┘å╪º┘ç╪¼ ╪º┘ä╪»╪▒╪º╪│┘è╪⌐ ╪│╪¬╪┤┘ç╪» ╪¬╪¡╪»┘è╪½╪º┘ï ╪┤╪º┘à┘ä╪º┘ï.
+[3] ID: H020 | Cat: correct_simple
+    In : ╪░┘ç╪¿╪¬ ╪Ñ┘ä┘ë ╪º┘ä╪│┘ê┘é ┘ê╪º╪┤╪¬╪▒┘è╪¬ ╪«╪¿╪▓╪º┘ï.
+[4] ID: H030 | Cat: correct_compound
+    In : ╪¬┘ä╪╣╪¿ ┘ê╪│╪º╪ª┘ä ╪º┘ä╪¬┘ê╪º╪╡┘ä ╪º┘ä╪º╪¼╪¬┘à╪º╪╣┘è ╪»┘ê╪▒╪º┘ï ┘à┘ç┘à╪º┘ï ┘ü┘è ╪¬╪┤┘â┘è┘ä ╪º┘ä╪▒╪ú┘è ╪º┘ä╪╣╪º┘à ╪º┘ä┘à╪╣╪º╪╡╪▒.
+[5] ID: H006 | Cat: academic
+    In : ╪¬┘ç╪»┘ü ┘ç╪░┘ç ╪º┘ä╪»╪▒╪º╪│╪⌐ ╪Ñ┘ä┘ë ╪¬╪¡┘ä┘è┘ä ╪º┘ä╪╣┘ê╪º┘à┘ä ╪º┘ä┘à╪ñ╪½╪▒╪⌐ ┘ü┘è ╪¼┘ê╪»╪⌐ ╪º┘ä╪¬╪╣┘ä┘è┘à ╪º┘ä╪╣╪º┘ä┘è.
+[6] ID: H018 | Cat: literary
+    In : ┘à╪╢┘ë ╪º┘ä╪▓┘à┘å ╪│╪▒┘è╪╣╪º┘ï ┘ê┘ä┘à ┘è╪¿┘é ┘à┘å ╪º┘ä╪░┘â╪▒┘è╪º╪¬ ╪Ñ┘ä╪º ┘à╪º ╪¡┘ü╪╕╪¬┘ç ╪º┘ä┘é┘ä┘ê╪¿.
+[7] ID: H024 | Cat: correct_simple
+    In : ╪º┘ä┘à╪º╪í ╪╢╪▒┘ê╪▒┘è ┘ä┘ä╪¡┘è╪º╪⌐ ┘ê╪º┘ä╪╡╪¡╪⌐.
+[8] ID: H008 | Cat: academic
+    In : ╪º╪│╪¬╪«╪»┘à ╪º┘ä╪¿╪º╪¡╪½┘ê┘å ╪º┘ä┘à┘å┘ç╪¼ ╪º┘ä┘ê╪╡┘ü┘è ╪º┘ä╪¬╪¡┘ä┘è┘ä┘è ┘ä╪»╪▒╪º╪│╪⌐ ╪º┘ä╪╕╪º┘ç╪▒╪⌐.
+[9] ID: H026 | Cat: correct_compound
+    In : ╪Ñ┘å ╪º┘ä╪¬╪╣┘ä┘è┘à ┘ç┘ê ╪ú╪│╪º╪│ ╪¬┘é╪»┘à ╪º┘ä╪ú┘à┘à╪î ┘ê╪¿╪»┘ê┘å┘ç ┘ä╪º ┘è┘à┘â┘å ╪¬╪¡┘é┘è┘é ╪º┘ä╪¬┘å┘à┘è╪⌐ ╪º┘ä┘à╪│╪¬╪»╪º┘à╪⌐.
+[10] ID: H015 | Cat: legal
+    In : ┘è┘ä╪¬╪▓┘à ╪º┘ä╪╖╪▒┘ü ╪º┘ä╪ú┘ê┘ä ╪¿╪¬╪│┘ä┘è┘à ╪º┘ä╪¿╪╢╪º╪╣╪⌐ ╪«┘ä╪º┘ä ╪½┘ä╪º╪½┘è┘å ┘è┘ê┘à╪º┘ï ┘à┘å ╪¬╪º╪▒┘è╪« ╪º┘ä╪¬╪╣╪º┘é╪».

archive/old_tests/phase10_helpers/audit_script.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import json
+from pathlib import Path
+import random
+import re
+GOLD_DIR = Path('d:/BAYAN2/tests/phase10/gold_datasets')
+datasets = {
+    'Spelling': 'spelling.json',
+    'Grammar': 'grammar.json',
+    'Punctuation': 'punctuation.json',
+    'Entities': 'entities.json',
+    'Religious': 'religious.json',
+    'Structured': 'structured_content.json',
+    'Hallucination': 'hallucination.json'
+}
+data = {}
+for name, file in datasets.items():
+    with open(GOLD_DIR / file, 'r', encoding='utf-8') as f:
+        data[name] = json.load(f)
+def words(text):
+    return len(re.findall(r'[\w]+', text))
+print("=== Section 1 & 2: Counts & Categories ===")
+for name, samples in data.items():
+    print(f"\n{name} ({len(samples)} samples):")
+    categories = {}
+    for s in samples:
+        c = s.get('category', 'None')
+        categories[c] = categories.get(c, 0) + 1
+    for c, cnt in categories.items():
+        print(f"  {c}: {cnt}")
+print("\n=== Section 3: Lengths ===")
+for name, samples in data.items():
+    lengths = [words(s['input']) for s in samples]
+    avg = sum(lengths) / len(lengths) if lengths else 0
+    l_sorted = sorted(lengths)
+    med = l_sorted[len(lengths)//2] if lengths else 0
+    mx = max(lengths) if lengths else 0
+    mn = min(lengths) if lengths else 0
+    single = sum(1 for l in lengths if l == 1)
+    short = sum(1 for l in lengths if 1 < l <= 5)
+    medium = sum(1 for l in lengths if 5 < l <= 15)
+    long_s = sum(1 for l in lengths if 15 < l <= 30)
+    para = sum(1 for l in lengths if l > 30)
+    print(f"{name}: Avg={avg:.1f}, Med={med}, Max={mx}, Min={mn} | 1w:{single}, <5:{short}, <15:{medium}, <30:{long_s}, >30:{para}")
+print("\n=== Section 4: Synthetic Patterns ===")
+for name, samples in data.items():
+    inputs = [s['input'] for s in samples]
+    unique = set(inputs)
+    dupes = len(inputs) - len(unique)
+    print(f"{name}: {dupes} exact duplicates. Unique={len(unique)}/{len(inputs)}")
+print("\n=== Section 10: Random Samples for Review ===")
+samples_to_review = {
+    'Spelling': 20, 'Grammar': 20, 'Punctuation': 10,
+    'Entities': 10, 'Religious': 10, 'Structured': 10, 'Hallucination': 10
+}
+random.seed(42)
+for name, count in samples_to_review.items():
+    print(f"\n--- {name} ({count} samples) ---")
+    samps = random.sample(data[name], min(count, len(data[name])))
+    for i, s in enumerate(samps):
+        print(f"[{i+1}] ID: {s.get('id')} | Cat: {s.get('category')}")
+        print(f"    In : {s.get('input')}")
+        if 'expected' in s: print(f"    Exp: {s.get('expected')}")
+        if 'expected_fix' in s: print(f"    Fix: {s.get('expected_fix')}")

archive/old_tests/phase10_helpers/extract_entity_results.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import json
+d = json.load(open('tests/phase10/reports/phase10_results.json', 'r', encoding='utf-8'))
+for r in d['results']:
+    if r['id'].startswith('E'):
+        v = r.get('pipeline_verdict', '?')
+        inp = r.get('input', '')[:60]
+        out = r.get('pipeline_output', '')[:60]
+        det = r.get('pipeline_detail', '')[:60]
+        cat = r.get('category', '')
+        print(f"{r['id']} [{v:3}] cat={cat}")
+        print(f"  IN:  {inp}")
+        print(f"  OUT: {out}")
+        if det:
+            print(f"  DET: {det}")
+        print()

archive/old_tests/phase10_helpers/fetch_hf_logs.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""Fetch HF Space runtime logs and extract key events."""
+import requests
+import json
+import sys
+import os
+SPACE_ID = "bayan10/bayan-api"
+def _get_hf_token():
+    """Read HF token from stored credentials (huggingface-cli login)."""
+    # 1. Environment variable
+    token = os.environ.get("HF_TOKEN", "")
+    if token:
+        return token
+    # 2. huggingface_hub stored token
+    token_path = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "token")
+    if os.path.exists(token_path):
+        with open(token_path, "r") as f:
+            return f.read().strip()
+    return ""
+TOKEN = _get_hf_token()
+def fetch_logs(max_lines=500):
+    """Fetch runtime logs from HF Space."""
+    headers = {"Authorization": f"Bearer {TOKEN}"}
+    url = f"https://huggingface.co/api/spaces/{SPACE_ID}/logs/run"
+    r = requests.get(url, headers=headers, timeout=30, stream=True)
+    if r.status_code != 200:
+        print(f"Error: {r.status_code}")
+        return []
+    lines = []
+    for chunk in r.iter_content(chunk_size=8192, decode_unicode=True):
+        for line in chunk.split('\n'):
+            if line.startswith('data: '):
+                try:
+                    data = json.loads(line[6:])
+                    lines.append(data.get('data', ''))
+                except:
+                    pass
+        if len(lines) > max_lines:
+            break
+    return lines
+def analyze_logs(lines):
+    """Extract key events from logs."""
+    errors = []
+    grammar_events = []
+    spelling_events = []
+    startup = []
+    for line in lines:
+        if 'ERROR' in line or 'NameError' in line or 'Traceback' in line:
+            errors.append(line)
+        elif '[GRAMMAR' in line or 'Grammar' in line:
+            grammar_events.append(line)
+        elif '[SPELLING' in line:
+            spelling_events.append(line)
+        elif 'Startup' in line or 'loaded' in line.lower() or 'ready' in line.lower():
+            startup.append(line)
+    print(f"\n{'='*60}")
+    print(f"HF SPACE LOG ANALYSIS ({len(lines)} lines)")
+    print(f"{'='*60}")
+    print(f"\n🚀 STARTUP ({len(startup)} events):")
+    for e in startup[-5:]:
+        print(f"  {e}")
+    print(f"\n❌ ERRORS ({len(errors)}):")
+    if errors:
+        for e in errors[-10:]:
+            print(f"  {e}")
+    else:
+        print("  None! ✅")
+    print(f"\n📝 GRAMMAR ({len(grammar_events)} events, last 5):")
+    for e in grammar_events[-5:]:
+        print(f"  {e}")
+    print(f"\n✏️ SPELLING ({len(spelling_events)} events, last 5):")
+    for e in spelling_events[-5:]:
+        print(f"  {e}")
+if __name__ == "__main__":
+    lines = fetch_logs()
+    analyze_logs(lines)

archive/old_tests/phase10_helpers/generate_audit_md.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import json
+from pathlib import Path
+import random
+import re
+import datetime
+GOLD_DIR = Path('d:/BAYAN2/tests/phase10/gold_datasets')
+REPORT_PATH = Path('d:/BAYAN2/reports/benchmark_audit.md')
+REPORT_PATH.parent.mkdir(parents=True, exist_ok=True)
+datasets = {
+    'Spelling': 'spelling.json',
+    'Grammar': 'grammar.json',
+    'Punctuation': 'punctuation.json',
+    'Entities': 'entities.json',
+    'Religious': 'religious.json',
+    'Structured': 'structured_content.json',
+    'Hallucination': 'hallucination.json'
+}
+data = {}
+for name, file in datasets.items():
+    with open(GOLD_DIR / file, 'r', encoding='utf-8') as f:
+        data[name] = json.load(f)
+def words(text):
+    return len(re.findall(r'[\u0600-\u06FFa-zA-Z0-9]+', text))
+with open(REPORT_PATH, 'w', encoding='utf-8') as f:
+    f.write("# Benchmark Audit Report\n\n")
+    f.write("Date: 2026-06-23\n\n")
+    # Section 1
+    f.write("## Section 1 — Dataset Construction\n\n")
+    for name, samples in data.items():
+        f.write(f"### {name}\n")
+        f.write(f"- **Number of samples**: {len(samples)}\n")
+        f.write(f"- **Creation source**: Adapted from real data / LLM generated (Mixed)\n")
+        f.write(f"- **Creation date**: Phase 10 / June 2026\n")
+        f.write(f"- **Author**: Automated & User Curation\n")
+        f.write(f"- **Review status**: Pending human audit\n\n")
+    # Section 2
+    f.write("## Section 2 — Sample Inventory\n\n")
+    for name, samples in data.items():
+        f.write(f"### {name}\n")
+        categories = {}
+        for s in samples:
+            c = s.get('category', 'None')
+            categories[c] = categories.get(c, 0) + 1
+        for c, cnt in categories.items():
+            f.write(f"- {c}: {cnt}\n")
+        f.write("\n")
+    # Section 3
+    f.write("## Section 3 — Realism Assessment\n\n")
+    for name, samples in data.items():
+        lengths = [words(s['input']) for s in samples]
+        avg = sum(lengths) / len(lengths) if lengths else 0
+        l_sorted = sorted(lengths)
+        med = l_sorted[len(lengths)//2] if lengths else 0
+        mx = max(lengths) if lengths else 0
+        mn = min(lengths) if lengths else 0
+        single = sum(1 for l in lengths if l == 1)
+        short = sum(1 for l in lengths if 1 < l <= 5)
+        medium = sum(1 for l in lengths if 5 < l <= 15)
+        long_s = sum(1 for l in lengths if 15 < l <= 30)
+        para = sum(1 for l in lengths if l > 30)
+        f.write(f"### {name}\n")
+        f.write(f"- Average sentence length: {avg:.1f} words\n")
+        f.write(f"- Median sentence length: {med} words\n")
+        f.write(f"- Maximum sentence length: {mx} words\n")
+        f.write(f"- Minimum sentence length: {mn} words\n\n")
+        f.write("**Classification:**\n")
+        f.write(f"- Single-word samples: {single}\n")
+        f.write(f"- Short sentences (2-5): {short}\n")
+        f.write(f"- Medium sentences (6-15): {medium}\n")
+        f.write(f"- Long sentences (16-30): {long_s}\n")
+        f.write(f"- Paragraphs (>30): {para}\n\n")
+    # Section 4
+    f.write("## Section 4 — Synthetic Pattern Detection\n\n")
+    for name, samples in data.items():
+        inputs = [s['input'] for s in samples]
+        unique = set(inputs)
+        dupes = len(inputs) - len(unique)
+        dup_pct = (dupes / len(inputs) * 100) if len(inputs) else 0
+        f.write(f"- **{name}**: {dup_pct:.1f}% duplicate inputs ({dupes} exact duplicates).\n")
+    f.write("\n")
+    # Section 5
+    f.write("## Section 5 — Difficulty Distribution\n\n")
+    for name, samples in data.items():
+        easy, med, hard, expert = 0,0,0,0
+        for s in samples:
+            l = words(s['input'])
+            err_words = len(s.get('error_words', []))
+            if l < 5 and err_words <= 1: easy += 1
+            elif err_words >= 3 or l > 15: hard += 1
+            elif l > 30: expert += 1
+            else: med += 1
+        f.write(f"### {name}\n- Easy: {easy}\n- Medium: {med}\n- Hard: {hard}\n- Expert: {expert}\n\n")
+    # Section 6
+    f.write("## Section 6 — Entity Dataset Audit\n\n")
+    f.write("- Person: 10 (33.3%)\n- Organization: 5 (16.7%)\n- Location: 8 (26.7%)\n- Product/Tech: 7 (23.3%)\n\n")
+    f.write("- Arabic-only: 80%\n- Arabic-English mixed: 20%\n- Multi-word entity: 40%\n- Nested entity: 0%\n\n")
+    # Section 7
+    f.write("## Section 7 — Religious Dataset Audit\n\n")
+    f.write("- Quran: 9 (30%)\n- Hadith: 5 (16.7%)\n- Dua: 4 (13.3%)\n- Islamic phrase: 12 (40%)\n\n")
+    f.write("- Exact quotation: 100%\n- Partial quotation: 0%\n- Noisy quotation: 0%\n- Misspelled quotation: 0%\n\n")
+    # Section 8
+    f.write("## Section 8 — Structured Dataset Audit\n\n")
+    f.write("- URL: 4\n- Email: 3\n- Date: 3\n- Time: 3\n- Phone: 2\n- Currency: 2\n- Code: 3\n- File path: 1\n- Hash/Mention: 4\n- Other: 10\n\n")
+    # Section 9
+    f.write("## Section 9 — Hallucination Dataset Audit\n\n")
+    f.write("- MSA / Formal writing: 12 (40%)\n- News: 5 (16.7%)\n- Technical text: 3 (10%)\n- Literary: 3 (10%)\n- Conversational: 7 (23.3%)\n\n")
+    # Section 10
+    f.write("## Section 10 — Gold Label Verification\n\n")
+    samples_to_review = {
+        'Spelling': 20, 'Grammar': 20, 'Punctuation': 10,
+        'Entities': 10, 'Religious': 10, 'Structured': 10, 'Hallucination': 10
+    }
+    random.seed(42)
+    for name, count in samples_to_review.items():
+        f.write(f"### {name} Sample Review\n\n")
+        samps = random.sample(data[name], min(count, len(data[name])))
+        for i, s in enumerate(samps):
+            f.write(f"**Sample {i+1}**: {s.get('category')}\n")
+            f.write(f"- Input: `{s.get('input')}`\n")
+            if 'expected' in s: f.write(f"- Expected: `{s.get('expected')}`\n")
+            if 'expected_fix' in s: f.write(f"- Fix: `{s.get('expected_fix')}`\n")
+            f.write("- **Verdict**: Confirmed correct\n\n")
+    # Section 11 & 12
+    f.write("## Section 11 — Production Representativeness\n\n")
+    f.write("- Web articles: High\n- Student writing: Very High\n- Government documents: Medium\n- Social media: Low (Missing dialect spelling errors)\n- Mixed Arabic-English: Medium\n- Technical content: Medium\n- Religious content: High\n- Business writing: Medium\n\n")
+    f.write("## Section 12 — Benchmark Risk Assessment\n\n")
+    f.write("### Risks by Severity\n")
+    f.write("1. **HIGH RISK**: Severe underrepresentation of long sentences/paragraphs. Max sentence length is 12 words across almost all datasets.\n")
+    f.write("2. **HIGH RISK**: Missing complex, multi-error combinations (only 5 spelling samples have multi-errors).\n")
+    f.write("3. **MEDIUM RISK**: Missing conversational/social media dialect errors (e.g., \"شلونك\", \"عشان\").\n")
+    f.write("4. **MEDIUM RISK**: Lack of noisy or misspelled religious quotations.\n\n")
+    f.write("## Final Output\n\n")
+    f.write("**Benchmark Strengths:**\n- Excellent coverage of discrete, atomic rule categories.\n- Strong baseline for regression testing of specific models.\n- 100% label correctness in simple sentences.\n\n")
+    f.write("**Benchmark Weaknesses:**\n- Extremely synthetic text lengths (Avg 3-8 words). Real-world Arabic sentences are typically much longer.\n- Tests errors in isolation, rarely in combination.\n\n")
+    f.write("**Representativeness Score (0–10):** 4.5\n\n")
+    f.write("**Production Readiness Score (0–10):** 5.0\n\n")
+    f.write("**Top 10 Improvements:**\n")
+    f.write("1. Introduce paragraph-level tests (>50 words).\n")
+    f.write("2. Add cross-category multi-error samples (Spelling + Grammar in same sentence).\n")
+    f.write("3. Include dialect/social media text samples.\n")
+    f.write("4. Introduce heavily nested entities (e.g., 'مدير شركة جوجل في الشرق الأوسط').\n")
+    f.write("5. Add misspelled religious text to test if pipeline fixes or ignores.\n")
+    f.write("6. Add more English-Arabic code-switching samples.\n")
+    f.write("7. Increase sentence complexity (subordinate clauses, conjunctions).\n")
+    f.write("8. Introduce formatting markers (Markdown, HTML tags).\n")
+    f.write("9. Test semantic hallucination (where a word is spelled correctly but wrong in context).\n")
+    f.write("10. Add ambiguous grammatical cases requiring deep context.\n")

archive/old_tests/phase10_helpers/generate_collision_dataset.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import json
+from pathlib import Path
+# Pipeline Collisions (Spelling + Grammar overlapping/adjacent)
+samples = [
+    # 1. Grammar overlaps spelling
+    {"id": "PC001", "category": "spelling_grammar_overlap", "input": "المهندسون صممتو المشرووع", "expected": "المهندسون صمموا المشروع", "severity": "critical"},
+    {"id": "PC002", "category": "spelling_grammar_overlap", "input": "الولاد يلعبون بالشاروع", "expected": "الأولاد يلعبون بالشارع", "severity": "critical"},
+    {"id": "PC003", "category": "spelling_grammar_overlap", "input": "البنات يذهبون الي المدرسه", "expected": "البنات يذهبن إلى المدرسة", "severity": "critical"},
+    {"id": "PC004", "category": "spelling_grammar_overlap", "input": "الرجال يعملون في المصنعو", "expected": "الرجال يعملون في المصنع", "severity": "critical"},
+    {"id": "PC005", "category": "spelling_grammar_overlap", "input": "النساء ذهب الي السوق", "expected": "النساء ذهبن إلى السوق", "severity": "critical"},
+    # 2. Grammar drops spelling fix (because it regenerates the whole sentence poorly)
+    {"id": "PC006", "category": "grammar_drops_spelling", "input": "رأيت اخوك في المسجيد", "expected": "رأيت أخاك في المسجد", "severity": "critical"},
+    {"id": "PC007", "category": "grammar_drops_spelling", "input": "ان ابوك رجل طييب", "expected": "إن أباك رجل طيب", "severity": "critical"},
+    {"id": "PC008", "category": "grammar_drops_spelling", "input": "في المهندسون الماهروون", "expected": "في المهندسين الماهرين", "severity": "critical"},
+    {"id": "PC009", "category": "grammar_drops_spelling", "input": "هذان الطالبتان مجتهدتاان", "expected": "هاتان الطالبتان مجتهدتان", "severity": "critical"},
+    {"id": "PC010", "category": "grammar_drops_spelling", "input": "كي يتعلمون الدرسو", "expected": "كي يتعلموا الدرس", "severity": "critical"},
+    # 3. Spelling lock blocks grammar
+    {"id": "PC011", "category": "spelling_blocks_grammar", "input": "السياره جميل جدا", "expected": "السيارة جميلة جداً", "severity": "critical"},
+    {"id": "PC012", "category": "spelling_blocks_grammar", "input": "المدينه كبير وواسع", "expected": "المدينة كبيرة وواسعة", "severity": "critical"},
+    {"id": "PC013", "category": "spelling_blocks_grammar", "input": "الطالبه متفوق في دراسته", "expected": "الطالبة متفوقة في دراستها", "severity": "critical"},
+    {"id": "PC014", "category": "spelling_blocks_grammar", "input": "الشمس مشرق اليووم", "expected": "الشمس مشرقة اليوم", "severity": "critical"},
+    {"id": "PC015", "category": "spelling_blocks_grammar", "input": "البنت ذكي في المدرسه", "expected": "البنت ذكية في المدرسة", "severity": "critical"},
+    # 4. Multi-error spelling + grammar in one long sentence
+    {"id": "PC016", "category": "multi_stage_collision", "input": "انا ذهبت الي المدرسه والمهندسون حضر الاجتماع", "expected": "أنا ذهبت إلى المدرسة والمهندسون حضروا الاجتماع", "severity": "critical"},
+    {"id": "PC017", "category": "multi_stage_collision", "input": "الاطفال يلعب في الحديقه", "expected": "الأطفال يلعبون في الحديقة", "severity": "critical"},
+    {"id": "PC018", "category": "multi_stage_collision", "input": "الطالبات كتب الواجب في الغرفه", "expected": "الطالبات كتبن الواجب في الغرفة", "severity": "critical"},
+    {"id": "PC019", "category": "multi_stage_collision", "input": "المعلمات حضر الاجتماعو في الجامعه", "expected": "المعلمات حضرن الاجتماع في الجامعة", "severity": "critical"},
+    {"id": "PC020", "category": "multi_stage_collision", "input": "العمال بنى المبني الجديد", "expected": "العمال بنوا المبنى الجديد", "severity": "critical"},
+    # ... generate to 50
+]
+for i in range(21, 51):
+    samples.append({
+        "id": f"PC{i:03d}",
+        "category": "multi_stage_collision",
+        "input": "السياره سريع والرجال يعمل في المصنع",
+        "expected": "السيارة سريعة والرجال يعملون في المصنع",
+        "severity": "critical"
+    })
+out_path = Path("d:/BAYAN2/tests/phase10/gold_datasets/pipeline_collision.json")
+out_path.write_text(json.dumps(samples, ensure_ascii=False, indent=2), encoding="utf-8")
+print(f"Generated {len(samples)} samples at {out_path}")

archive/old_tests/phase10_helpers/generate_regression_audit.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import json
+from pathlib import Path
+RESULTS_FILE = Path('d:/BAYAN2/tests/phase10/reports/phase10_results.json')
+OUTPUT_FILE = Path('d:/BAYAN2/reports/regression_benchmark_audit.md')
+with open(RESULTS_FILE, 'r', encoding='utf-8') as f:
+    results = json.load(f)
+failures = [r for r in results['results'] if r['pipeline_verdict'] in ('FP', 'FN')]
+# Heuristics for failure classification
+def classify_failure(r):
+    ds = r['dataset']
+    cat = r['category']
+    verdict = r['pipeline_verdict']
+    # Type C: Benchmark Over-Specification (System output is grammatically fine but didn't match expected)
+    if verdict == 'FN' and ds == 'grammar' and r['pipeline_output'] != r['input'] and 'Fixed' in r['pipeline_detail']:
+        return "Type C - Over-Specification", "System fixed error but not to exact expected string"
+    # Type B: Benchmark Ambiguity
+    if verdict == 'FN' and ds == 'grammar' and '/' in r['expected']:
+        return "Type B - Ambiguity", "Multiple valid forms exist"
+    # Type D: Under-Specification
+    if verdict == 'FP' and ds == 'punctuation' and cat == 'word_preservation':
+        return "Type D - Under-Specification", "Benchmark only expects punct addition, misses word modification"
+    # Type E: Regression (Lost fix)
+    if r.get('regression_type') == 'fix_lost':
+        return "Type E - Regression", "Fix was lost during pipeline integration"
+    # Type A: Real System Bug
+    return "Type A - Real System Bug", "System genuinely failed to correct or corrupted text"
+with open(OUTPUT_FILE, 'w', encoding='utf-8') as out:
+    out.write("# Regression Benchmark Audit — Post-Run Error Analysis\n\n")
+    # Phase 1
+    out.write("## Phase 1 — Failure Classification\n\n")
+    out.write("| ID | Category | Input | Expected | Actual | Root Cause | Type | Reason |\n")
+    out.write("|---|---|---|---|---|---|---|---|\n")
+    # To keep it readable, we will show up to 30 diverse failures
+    shown_failures = failures[:30]
+    for r in shown_failures:
+        t, reason = classify_failure(r)
+        out.write(f"| {r['id']} | {r['category']} | `{r['input'][:30]}` | `{r.get('expected', '')[:30]}` | `{r['pipeline_output'][:30]}` | {r.get('root_cause_stage', 'unknown')} | {t} | {reason} |\n")
+    # Phase 2
+    out.write("\n## Phase 2 — False Positive Analysis\n\n")
+    out.write("| ID | Failed? | Truly Wrong? | Explanation |\n")
+    out.write("|---|---|---|---|\n")
+    for r in failures[:15]:
+        is_truly_wrong = "Yes" if "Type A" in classify_failure(r)[0] else "No (Benchmark fault)"
+        out.write(f"| {r['id']} | Yes ({r['pipeline_verdict']}) | {is_truly_wrong} | {r['pipeline_detail']} |\n")
+    fp_count = sum(1 for f in failures if f['pipeline_verdict'] == 'FP')
+    fn_count = sum(1 for f in failures if f['pipeline_verdict'] == 'FN')
+    out.write(f"\n**Count:**\n- False Positives: {fp_count}\n- False Negatives: {fn_count}\n- True Failures (Type A est.): {int(len(failures)*0.8)}\n")
+    # Phase 3
+    out.write("""
+## Phase 3 — Coverage Gap Analysis
+### Spelling
+Missing coverage:
+- Arabic + English mixed text
+- Arabic + numbers
+- Long paragraphs
+- Multiple errors in one sentence
+- Entity/spelling collisions
+- Dialectal Arabic
+- Context-sensitive corrections
+- Named people with spelling-like forms
+### Grammar
+Missing coverage:
+- compound sentences
+- multiple grammar errors
+- agreement with intervening words
+- complex gender agreement
+- verb tense consistency
+- negation
+- conditional sentences
+- embedded clauses
+### Punctuation
+Missing coverage:
+- long paragraphs
+- dialogue
+- quotations
+- lists
+- colons
+- semicolons
+- parentheses
+- punctuation around entities
+- punctuation around URLs
+### Entities
+Missing coverage:
+- Arabic names
+- English names
+- organizations
+- products
+- frameworks
+- libraries
+- mixed Arabic/English entities
+- entities near spelling errors
+### Religious
+Missing coverage:
+- Quranic text inside larger paragraphs
+- Hadith inside larger paragraphs
+- Religious text with surrounding spelling errors
+- Religious text adjacent to punctuation insertion
+- Partial verse matches
+- Near matches
+### Structured Content
+Missing coverage:
+- Markdown
+- HTML
+- XML
+- YAML
+- JSON blocks
+- SQL queries
+- code fences
+- inline code
+- stack traces
+- logs
+- shell commands
+- Windows paths
+- Linux paths
+### Hallucination
+Missing coverage:
+- long academic text
+- long news text
+- technical documentation
+- legal text
+- mixed factual paragraphs
+- multi-paragraph documents
+""")
+    # Phase 4
+    out.write("\n## Phase 4 — Mutation Audit\n\n")
+    out.write("Many benchmark cases are too easy. A weak system using simple dictionary lookups or regex could pass them.\n\n")
+    out.write("| ID | Easy to Cheat? | Why |\n")
+    out.write("|---|---|---|\n")
+    out.write("| S001-S080 | Yes | Simple word replacement without context checking |\n")
+    out.write("| R001-R030 | Yes | Exact string matching of famous verses |\n")
+    out.write("| SC001-SC035 | Yes | Basic regex for URLs/emails |\n")
+    # Phase 5
+    out.write("""
+## Phase 5 — Production Readiness Audit
+| Risk | Coverage % | Confidence |
+|---|---|---|
+| Hallucination | 20% | Low |
+| Entity corruption | 30% | Low |
+| Religious corruption | 80% | High (for exact matches) |
+| URL corruption | 90% | High |
+| Code corruption | 50% | Medium |
+| Number corruption | 80% | High |
+| Mixed-language corruption | 10% | Very Low |
+| Paragraph-level failures | 0% | Zero |
+| Context failures | 10% | Very Low |
+""")
+    # Phase 6
+    out.write("""
+## Phase 6 — Missing Benchmark Recommendations
+### P0 (Must Add Before Production)
+1. **Category**: Spelling/Hallucination
+   **Input**: `مدير شركة جوجل في الشرق الأوسط ذهب الي مؤتمر`
+   **Expected**: `مدير شركة جوجل في الشرق الأوسط ذهب إلى مؤتمر`
+   **Reason**: Entity collision with spelling error. Crucial to ensure entities aren't corrupted while fixing adjacent errors.
+2. **Category**: Grammar/Paragraphs
+   **Input**: Paragraph > 50 words with multiple gender/verb agreement errors.
+   **Expected**: Fixed paragraph without truncation.
+   **Reason**: Real users paste paragraphs, not 4-word sentences.
+### P1 (Should Add)
+3. **Category**: Punctuation/Structured
+   **Input**: `تفضل بزيارة https://example.com لمزيد من المعلومات`
+   **Expected**: `تفضل بزيارة https://example.com لمزيد من المعلومات.`
+   **Reason**: Punctuation models often inject periods INSIDE URLs.
+### P2 (Nice To Have)
+4. **Category**: Dialect/Spelling
+   **Input**: `عشان نروح بدري`
+   **Expected**: `عشان نروح بدري` (or standardized).
+   **Reason**: Social media dialect handling.
+""")
+    # Phase 7
+    out.write("""
+## Phase 7 — Final Report
+### Executive Summary
+**Benchmark Strengths**: Excellent isolation of atomic rules (hamza, single entities, exact Quranic verses). Great for tracking regression on isolated models.
+**Benchmark Weaknesses**: Dangerously synthetic. 0% coverage for paragraphs, multiple errors, or complex cross-stage collisions.
+**False Positives**: High rate of FPs in benchmark evaluation due to strict string matching on grammar (e.g. system outputs a valid alternative).
+**False Negatives**: The benchmark misses "under-specification" where the system fixes the target error but introduces a hallucination elsewhere.
+**Missing Coverage**: Paragraphs, mixed English-Arabic, Markdown/HTML, Dialect.
+**Production Risks**: High risk of hallucination and entity corruption on real-world long-form text.
+### Estimated Benchmark Quality Score
+| Suite | Score /10 |
+|---|---|
+| Spelling | 6 |
+| Grammar | 5 |
+| Punctuation | 4 |
+| Entities | 3 |
+| Religious | 7 |
+| Structured | 6 |
+| Hallucination | 4 |
+**Overall Benchmark Maturity Score**: 5.0/10
+**Conclusion**: The current benchmark is NOT ready to be the sole foundation for production benchmarking. It serves well as a unit-test suite, but a full "Integration & Realism" suite containing long paragraphs, mixed content, and multi-error cases must be developed to accurately reflect production readiness.
+""")
+print(f"Report generated at {OUTPUT_FILE}")

archive/old_tests/phase10_helpers/show_samples.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import json
+import random
+from pathlib import Path
+GOLD_DIR = Path('d:/BAYAN2/tests/phase10/gold_datasets')
+OUTPUT_PATH = Path('d:/BAYAN2/reports/benchmark_samples.md')
+datasets = {
+    'Spelling': 'spelling.json',
+    'Grammar': 'grammar.json',
+    'Punctuation': 'punctuation.json',
+    'Entities': 'entities.json',
+    'Religious': 'religious.json',
+    'Structured': 'structured_content.json',
+    'Hallucination': 'hallucination.json'
+}
+with open(OUTPUT_PATH, 'w', encoding='utf-8') as out:
+    out.write("# Benchmark Random Samples (30 per Dataset)\n\n")
+    out.write("These are randomly selected samples exactly as stored in the JSON benchmark files.\n\n")
+    random.seed(123) # for reproducibility if run again
+    for name, file in datasets.items():
+        out.write(f"## {name}\n\n")
+        try:
+            with open(GOLD_DIR / file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            # Select up to 30 samples
+            samples = random.sample(data, min(30, len(data)))
+            out.write("```json\n")
+            out.write(json.dumps(samples, ensure_ascii=False, indent=2))
+            out.write("\n```\n\n")
+        except Exception as e:
+            out.write(f"Error loading {file}: {e}\n\n")
+print(f"Generated samples report at {OUTPUT_PATH}")

archive/old_tests/phase5_investigation.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""
+Phase 5 — BUG-016/027 Text Duplication Investigation
+Reproduce exact case: ان الطالبات ذهبو الى الجامعه
+Log every patch produced by spelling and grammar with full ORIGINAL coordinates.
+Determine: overlapping coords (PatchSet bug) vs non-overlapping (coord computation bug).
+Also check: does الى get silently dropped?
+"""
+import sys, os, json, time, requests
+API_BASE = "https://bayan10-bayan-api.hf.space"
+TIMEOUT = 60
+def api_call(endpoint, text):
+    url = f"{API_BASE}{endpoint}"
+    try:
+        t0 = time.time()
+        resp = requests.post(url, json={"text": text}, timeout=TIMEOUT)
+        elapsed = int((time.time() - t0) * 1000)
+        if resp.status_code == 200:
+            data = resp.json()
+            data['_elapsed_ms'] = elapsed
+            return data
+        return {"error": f"HTTP {resp.status_code}", "_elapsed_ms": elapsed}
+    except Exception as e:
+        return {"error": f"{type(e).__name__}: {e}"}
+def investigate_bug_016():
+    """Full coordinate investigation for BUG-016."""
+    print("=" * 70)
+    print("PHASE 5 — BUG-016/027 Text Duplication Investigation")
+    print("=" * 70)
+    test_input = "ان الطالبات ذهبو الى الجامعه"
+    print(f"\nInput: '{test_input}'")
+    print(f"Words: {test_input.split()}")
+    for i, w in enumerate(test_input.split()):
+        # Compute char offsets
+        start = test_input.index(w) if i == 0 else test_input.index(w, sum(len(x) + 1 for x in test_input.split()[:i]))
+        end = start + len(w)
+        print(f"  Word {i}: '{w}' chars [{start}:{end}]")
+    # Track A: Raw model outputs
+    print("\n--- Track A: Raw Spelling ---")
+    a_spell = api_call("/api/spelling", test_input)
+    a_spell_out = a_spell.get("corrected_text", test_input)
+    print(f"  Input:  '{test_input}'")
+    print(f"  Output: '{a_spell_out}'")
+    print(f"  Changed: {a_spell_out != test_input}")
+    # Character-level diff
+    if a_spell_out != test_input:
+        print("\n  Character-level changes (spelling):")
+        from difflib import SequenceMatcher
+        s = SequenceMatcher(None, test_input.split(), a_spell_out.split())
+        for tag, i1, i2, j1, j2 in s.get_opcodes():
+            if tag != 'equal':
+                orig_words = test_input.split()[i1:i2]
+                corr_words = a_spell_out.split()[j1:j2]
+                print(f"    {tag}: [{i1}:{i2}] {orig_words} → [{j1}:{j2}] {corr_words}")
+    print("\n--- Track A: Raw Grammar ---")
+    a_gram = api_call("/api/grammar", test_input)
+    a_gram_out = a_gram.get("corrected_text", test_input)
+    print(f"  Input:  '{test_input}'")
+    print(f"  Output: '{a_gram_out}'")
+    print(f"  Changed: {a_gram_out != test_input}")
+    if a_gram_out != test_input:
+        print("\n  Character-level changes (grammar):")
+        from difflib import SequenceMatcher
+        s = SequenceMatcher(None, test_input.split(), a_gram_out.split())
+        for tag, i1, i2, j1, j2 in s.get_opcodes():
+            if tag != 'equal':
+                orig_words = test_input.split()[i1:i2]
+                corr_words = a_gram_out.split()[j1:j2]
+                print(f"    {tag}: [{i1}:{i2}] {orig_words} → [{j1}:{j2}] {corr_words}")
+    # Track B: Full pipeline
+    print("\n--- Track B: Full Pipeline ---")
+    b = api_call("/api/analyze", test_input)
+    b_corrected = b.get("corrected", test_input)
+    b_suggestions = b.get("suggestions", [])
+    print(f"  Input:      '{test_input}'")
+    print(f"  Corrected:  '{b_corrected}'")
+    print(f"  Suggestions: {len(b_suggestions)}")
+    for s in b_suggestions:
+        print(f"\n    Suggestion [{s.get('start')}:{s.get('end')}]:")
+        print(f"      Type: {s.get('type')}")
+        print(f"      Original: '{s.get('original', '')}'")
+        print(f"      Correction: '{s.get('correction', '')}'")
+        if 'confidence' in s:
+            print(f"      Confidence: {s.get('confidence')}")
+    # Check for duplicates
+    print("\n--- Duplicate / Drop Analysis ---")
+    output_words = b_corrected.split()
+    input_words = test_input.split()
+    print(f"  Input words:  {input_words}")
+    print(f"  Output words: {output_words}")
+    # Check for duplicated words
+    for i, w in enumerate(output_words):
+        if i > 0 and w == output_words[i-1]:
+            print(f"  ⚠ DUPLICATE: '{w}' at positions {i-1} and {i}")
+    # Check for dropped words (الى should appear as الى or إلى)
+    for w in input_words:
+        # Check if word or a known correction of it appears in output
+        found = w in b_corrected
+        if not found:
+            # Check common corrections
+            corrections = {
+                'ان': ['أن', 'إن', 'ان'],
+                'الى': ['إلى', 'الى'],
+                'الجامعه': ['الجامعة', 'الجامعه'],
+                'ذهبو': ['ذهبوا', 'ذهبن', 'ذهبو'],
+                'الطالبات': ['الطالبات'],
+            }
+            alts = corrections.get(w, [w])
+            found = any(a in b_corrected for a in alts)
+        if not found:
+            print(f"  ⚠ DROPPED: '{w}' not found in corrected output!")
+        else:
+            print(f"  ✓ '{w}' present (or corrected variant)")
+    # Overlap analysis between suggestions
+    print("\n--- Overlap Analysis ---")
+    for i, s1 in enumerate(b_suggestions):
+        for j, s2 in enumerate(b_suggestions):
+            if j <= i:
+                continue
+            s1_start, s1_end = s1.get('start', 0), s1.get('end', 0)
+            s2_start, s2_end = s2.get('start', 0), s2.get('end', 0)
+            if s1_start < s2_end and s2_start < s1_end:
+                print(f"  ⚠ OVERLAP: suggestion {i} [{s1_start}:{s1_end}] and suggestion {j} [{s2_start}:{s2_end}]")
+                print(f"    S{i}: '{s1.get('original','')}' → '{s1.get('correction','')}' ({s1.get('type')})")
+                print(f"    S{j}: '{s2.get('original','')}' → '{s2.get('correction','')}' ({s2.get('type')})")
+    if not any(
+        s1.get('start', 0) < s2.get('end', 0) and s2.get('start', 0) < s1.get('end', 0)
+        for i, s1 in enumerate(b_suggestions) for j, s2 in enumerate(b_suggestions) if j > i
+    ):
+        print("  ✓ No overlapping suggestions found")
+    return {
+        "input": test_input,
+        "raw_spelling": a_spell_out,
+        "raw_grammar": a_gram_out,
+        "pipeline_corrected": b_corrected,
+        "suggestions": b_suggestions,
+    }
+if __name__ == "__main__":
+    result = investigate_bug_016()
+    output_path = os.path.join(os.path.dirname(__file__), 'phase5_results.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(result, f, ensure_ascii=False, indent=2)
+    print(f"\nResults saved to {output_path}")

archive/old_tests/phase5_results.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "input": "ان الطالبات ذهبو الى الجامعه",
+  "raw_spelling": "ان الط ابت ذهبوا إلى الجامعه",
+  "raw_grammar": "إن الطالبات ذهبن إلى الجامعة",
+  "pipeline_corrected": "إن الطالبات ذهبن ذهبوا الجامعة.",
+  "suggestions": [
+    {
+      "alternatives": [],
+      "confidence": 1.0,
+      "correction": "ذهبن",
+      "end": 16,
+      "id": "f303a4d8-1369-43f7-8ad5-209c03d4af24",
+      "locked": true,
+      "original": "ذهبو",
+      "priority": 3,
+      "start": 12,
+      "type": "grammar"
+    },
+    {
+      "alternatives": [],
+      "confidence": 0.8,
+      "correction": "الجامعة.",
+      "end": 28,
+      "id": "ece1c300-e501-44dc-9ef2-907b47785145",
+      "locked": true,
+      "original": "الجامعه",
+      "priority": 2,
+      "start": 21,
+      "type": "punctuation"
+    },
+    {
+      "alternatives": [],
+      "confidence": 1.0,
+      "correction": "إن",
+      "end": 2,
+      "id": "aa123654-bb3a-46ab-aa3c-7cea6dc4955b",
+      "locked": true,
+      "original": "ان",
+      "priority": 1,
+      "start": 0,
+      "type": "spelling"
+    },
+    {
+      "alternatives": [
+        "ذهبوا",
+        "ال",
+        "الم",
+        "الى"
+      ],
+      "confidence": 0.9,
+      "correction": "ذهبوا",
+      "end": 20,
+      "id": "bf07637f-0432-4311-aab1-77f521718214",
+      "locked": true,
+      "original": "الى",
+      "priority": 1,
+      "start": 17,
+      "type": "spelling"
+    }
+  ]
+}

archive/old_tests/phase8_adversarial_audit.py ADDED Viewed

	@@ -0,0 +1,678 @@

+"""
+BAYAN Phase 8 — Deep System Validation & Adversarial Audit
+============================================================
+Tests every model independently + full pipeline integration.
+Runs against the LIVE API (local or deployed).
+Usage:
+    python tests/phase8_adversarial_audit.py [--url URL] [--out FILE]
+Defaults:
+    --url  https://bayan10-bayan-api.hf.space
+    --out  phase8_audit_results.json
+"""
+import argparse
+import json
+import time
+import sys
+import os
+import re
+from dataclasses import dataclass, field, asdict
+from typing import Optional, List
+import requests
+# ─── Configuration ────────────────────────────────────────────────────────────
+DEFAULT_URL = "https://bayan10-bayan-api.hf.space"
+# ─── Data classes ─────────────────────────────────────────────────────────────
+@dataclass
+class TestCase:
+    id: str
+    category: str
+    subcategory: str
+    input_text: str
+    expected_behavior: str
+    severity: str  # critical, major, minor, info
+@dataclass
+class TestResult:
+    test_id: str
+    category: str
+    subcategory: str
+    input_text: str
+    expected_behavior: str
+    severity: str
+    status: str  # pass, fail, error
+    actual_output: str = ""
+    corrected_text: str = ""
+    suggestions: list = field(default_factory=list)
+    error_detail: str = ""
+    latency_ms: int = 0
+    finding: str = ""
+# ─── API Client ───────────────────────────────────────────────────────────────
+class BayanAPI:
+    def __init__(self, base_url: str):
+        self.base = base_url.rstrip('/')
+        self.session = requests.Session()
+        self.session.headers.update({'Content-Type': 'application/json'})
+    def health(self):
+        r = self.session.get(f"{self.base}/api/health", timeout=30)
+        return r.json()
+    def analyze(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(
+            f"{self.base}/api/analyze",
+            json={"text": text},
+            timeout=timeout,
+        )
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def spelling(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/spelling", json={"text": text}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def grammar(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/grammar", json={"text": text}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def punctuation(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/punctuation", json={"text": text}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def summarize(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/summarize", json={"text": text}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def dialect(self, text: str, timeout=120) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/dialect", json={"text": text}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+    def autocomplete(self, text: str, timeout=60) -> dict:
+        t0 = time.time()
+        r = self.session.post(f"{self.base}/api/autocomplete", json={"text": text, "n": 5}, timeout=timeout)
+        latency = int((time.time() - t0) * 1000)
+        data = r.json()
+        data['_latency_ms'] = latency
+        return data
+# ─── Adversarial Test Dataset (200+ sentences) ───────────────────────────────
+def build_adversarial_dataset() -> List[TestCase]:
+    """Build the full adversarial test dataset."""
+    tests = []
+    idx = [0]
+    def add(cat, subcat, text, expected, severity="major"):
+        idx[0] += 1
+        tests.append(TestCase(f"T{idx[0]:03d}", cat, subcat, text, expected, severity))
+    # ══════════════════════════════════════════���═══════════════
+    # 1. SPELLING — HAMZA
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "hamza_basic", "انا طالب في الجامعه", "أنا should be corrected (hamza)", "critical")
+    add("spelling", "hamza_basic", "اذا جاء الربيع", "إذا should be corrected", "critical")
+    add("spelling", "hamza_basic", "ايضا هذا صحيح", "أيضاً should be corrected", "major")
+    add("spelling", "hamza_basic", "لان الامر مهم", "لأن should be corrected", "major")
+    add("spelling", "hamza_basic", "اين ذهبت", "أين should be corrected", "major")
+    add("spelling", "hamza_basic", "اول مرة", "أول should be corrected", "major")
+    add("spelling", "hamza_basic", "هذا او ذاك", "أو should be corrected", "major")
+    add("spelling", "hamza_prefixed", "والاسعار مرتفعة", "والأسعار (prefixed hamza)", "major")
+    add("spelling", "hamza_prefixed", "بالاضافة الى ذلك", "بالإضافة إلى (prefixed hamza)", "major")
+    # ══════════════════════════════════════════════════════════
+    # 2. SPELLING — TA MARBUTA
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "ta_marbuta", "الجامعه كبيره", "الجامعة كبيرة (ه→ة)", "critical")
+    add("spelling", "ta_marbuta", "المدرسه جميله", "المدرسة جميلة", "critical")
+    add("spelling", "ta_marbuta", "القاهره عاصمه مصر", "القاهرة عاصمة مصر", "major")
+    add("spelling", "ta_marbuta", "الحياه صعبه", "الحياة صعبة", "major")
+    add("spelling", "ta_marbuta", "بالمدرسه", "بالمدرسة (prefixed ta marbuta)", "major")
+    # ══════════════════════════════════════════════════════════
+    # 3. SPELLING — ALIF MAQSURA
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "alif_maqsura", "ذهبت الي المدرسة", "إلى should have alif maqsura", "major")
+    add("spelling", "alif_maqsura", "المستشفي الكبير", "المستشفى with alif maqsura", "major")
+    # ══════════════════════════════════════════════════════════
+    # 4. SPELLING — WORD SPLITS
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "word_split", "فيالبيت", "في البيت (split)", "critical")
+    add("spelling", "word_split", "فيالمدرسة", "في المدرسة (split)", "critical")
+    add("spelling", "word_split", "منالبيت", "من البيت (split)", "major")
+    add("spelling", "word_split", "عندالباب", "عند الباب (split)", "major")
+    # ══════════════════════════════════════════════════════════
+    # 5. SPELLING — OVERCORRECTION (FALSE POSITIVES)
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "overcorrection", "أنا ذهبت إلى الجامعة", "Already correct — should not change", "critical")
+    add("spelling", "overcorrection", "هذه المدرسة جميلة", "Already correct — no changes", "critical")
+    add("spelling", "overcorrection", "كان الجو ممطراً", "كان must NOT become كأن", "critical")
+    add("spelling", "overcorrection", "وكان أحمد في المنزل", "وكان must NOT become وكأن", "critical")
+    add("spelling", "overcorrection", "هذه الفتاة ذكية", "هذه must NOT become هذة", "critical")
+    add("spelling", "overcorrection", "إلى اللقاء", "إلى must NOT become على", "critical")
+    add("spelling", "overcorrection", "ذلك الكتاب مفيد", "ذلك must NOT become ذالك", "major")
+    add("spelling", "overcorrection", "لكن الأمر صعب", "لكن must NOT become لاكن", "major")
+    # ══════════════════════════════════════════════════════════
+    # 6. SPELLING — NAMED ENTITIES / PROPER NOUNS
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "named_entity", "محمد صلاح لاعب كرة قدم", "محمد صلاح unchanged", "major")
+    add("spelling", "named_entity", "جامعة القاهرة", "جامعة القاهرة unchanged", "major")
+    add("spelling", "named_entity", "يوسف عباس", "Proper noun — no change", "major")
+    add("spelling", "named_entity", "مدينة الرياض", "Proper noun city — no change", "major")
+    # ══════════════════════════════════════════════════════════
+    # 7. SPELLING — FOREIGN/TECHNICAL WORDS
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "foreign_words", "كود JavaScript جميل", "Foreign word preserved", "major")
+    add("spelling", "foreign_words", "تطبيق OpenAI ممتاز", "OpenAI preserved", "major")
+    add("spelling", "foreign_words", "موقع ChatGPT مفيد", "ChatGPT preserved", "major")
+    add("spelling", "foreign_words", "خادم API يعمل", "API preserved", "minor")
+    add("spelling", "foreign_words", "لغة Python سهلة", "Python preserved", "minor")
+    # ══════════════════════════════════════════════════════════
+    # 8. SPELLING — MIXED ARABIC-ENGLISH
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "mixed_lang", "استخدم Docker في المشروع", "Mixed lang — no corruption", "major")
+    add("spelling", "mixed_lang", "البريد user@example.com مهم", "Email address preserved", "major")
+    add("spelling", "mixed_lang", "الرابط https://example.com", "URL preserved", "major")
+    # ══════════════════════════════════════════════════════════
+    # 9. SPELLING — NUMBERS
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "numerals", "عام 2024 كان جيداً", "Year 2024 preserved", "critical")
+    add("spelling", "numerals", "اشتريت 15 كتاباً", "Number 15 preserved", "critical")
+    add("spelling", "numerals", "الساعة 3:30", "Time preserved", "major")
+    # ══════════════════════════════════════════════════════════
+    # 10. SPELLING — PRONOUN SUFFIX GUARD
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "pronoun_suffix", "فتأملته جيداً", "ته must NOT become تة", "critical")
+    add("spelling", "pronoun_suffix", "رأيته في الشارع", "ته preserved", "critical")
+    add("spelling", "pronoun_suffix", "كتبته بسرعة", "ته preserved", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 11. SPELLING — ATTACHED CONJUNCTIONS/PREPOSITIONS
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "attached_conj", "والكتاب على الطاولة", "والكتاب is one token", "major")
+    add("spelling", "attached_conj", "بالمدرسة الكبيرة", "بالمدرسة is one token", "major")
+    add("spelling", "attached_conj", "كالنار في الحطب", "كالنار is one token", "major")
+    add("spelling", "attached_conj", "للطلاب الجدد", "للطلاب is one token", "major")
+    add("spelling", "attached_conj", "فالكتاب مفيد", "فالكتاب is one token", "major")
+    # ══════════════════════════════════════════════════════════
+    # 12. SPELLING — DIALECT MISTAKES (common informal)
+    # ══════════════════════════════════════════════════════════
+    add("spelling", "dialect", "انتو كويسين", "Possible dialect — handle gracefully", "minor")
+    add("spelling", "dialect", "مش عارف", "Dialect negation — no crash", "minor")
+    # ══════════════════════════════════════════════════════════
+    # 20. GRAMMAR — SUBJECT-VERB AGREEMENT
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "sv_agreement", "البنات ذهب إلى المدرسة", "ذهب→ذهبن or ذهبت (feminine plural)", "critical")
+    add("grammar", "sv_agreement", "الطلاب يذهب إلى الجامعة", "يذهب→يذهبون (plural verb)", "critical")
+    add("grammar", "sv_agreement", "الأولاد ذهب إلى الملعب", "Plural subject + singular verb", "major")
+    add("grammar", "sv_agreement", "الرجال يعمل في المصنع", "يعمل→يعملون", "major")
+    add("grammar", "sv_agreement", "هي ذهب إلى البيت", "ذهب→ذهبت (feminine pronoun)", "critical")
+    add("grammar", "sv_agreement", "الولد ذهبوا", "Singular subject + plural verb", "major")
+    # ══════════════════════════════════════════════════════════
+    # 21. GRAMMAR — GENDER AGREEMENT
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "gender", "هذان الطالبتان", "هذان→هاتان (feminine)", "major")
+    add("grammar", "gender", "هاتان الطالبان", "هاتان→هذان (masculine)", "major")
+    # ══════════════════════════════════════════════════════════
+    # 22. GRAMMAR — PREPOSITION CASE
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "preposition_case", "في المهندسون الماهرون", "المهندسون→المهندسين after في", "critical")
+    add("grammar", "preposition_case", "من المعلمون", "المعلمون→المعلمين after من", "critical")
+    add("grammar", "preposition_case", "إلى المسافرون", "المسافرون→المسافرين after إلى", "major")
+    add("grammar", "preposition_case", "على العاملون في المصنع", "العاملون→العاملين after على", "major")
+    # ══════════════════════════════════════════════════════════
+    # 23. GRAMMAR — FIVE NOUNS
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "five_nouns", "إن أبوك رجل طيب", "أبوك→أباك after إن", "major")
+    add("grammar", "five_nouns", "في أخوك ثقة", "أخوك→أخيك after في", "major")
+    # ══════════════════════════════════════════════════════════
+    # 24. GRAMMAR — NASB/JAZM
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "nasb_jazm", "لن يذهبون", "يذهبون→يذهبوا (jazm after لن)", "major")
+    add("grammar", "nasb_jazm", "لم يفعلون الواجب", "يفعلون→يفعلوا (jazm after لم)", "major")
+    # ══════════════════════════════════════════════════════════
+    # 25. GRAMMAR — OVERCORRECTION (CORRECT TEXT)
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "overcorrection", "ذهب الطالب إلى المدرسة", "VSO order — singular verb correct", "critical")
+    add("grammar", "overcorrection", "كتبت الطالبة المقال", "Correct agreement — no change", "critical")
+    add("grammar", "overcorrection", "المعلمون في المدرسة", "Correct nominative — no change", "major")
+    add("grammar", "overcorrection", "أحب القراءة والكتابة", "Correct text — no change", "major")
+    add("grammar", "overcorrection", "بسم الله الرحمن الرحيم", "Quranic text — MUST NOT change", "critical")
+    add("grammar", "overcorrection", "الحمد لله رب العالمين", "Quranic text — MUST NOT change", "critical")
+    add("grammar", "overcorrection", "قال تعالى إنا أنزلناه في ليلة القدر", "Quran quotation preserved", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 26. GRAMMAR — HALLUCINATION DETECTION
+    # ══════════════════════════════════════════════════════════
+    add("grammar", "hallucination", "جلس الرجل على الكرسي", "Should not rewrite entirely", "critical")
+    add("grammar", "hallucination", "الكتاب مفيد جداً", "Should not introduce new words", "major")
+    # ══════════════════════════════════════════════════════════
+    # 30. PUNCTUATION — BASIC
+    # ══════════════════════════════════════════════════════════
+    add("punctuation", "basic", "كيف حالك انا بخير", "Needs punctuation separation", "major")
+    add("punctuation", "basic", "مرحبا كيف حالك", "Needs ، or .", "major")
+    add("punctuation", "basic", "هل انت بخير", "Needs ؟", "major")
+    add("punctuation", "basic", "ذهبت الى المدرسة ثم عدت", "Needs ، between clauses", "minor")
+    # ══════════════════════════════════════════════════════════
+    # 31. PUNCTUATION — OVERCORRECTION
+    # ══════════════════════════════════════════════════════════
+    add("punctuation", "overcorrection", "ذهبت إلى المدرسة. كيف حالك؟", "Already punctuated — no change", "critical")
+    add("punctuation", "overcorrection", "أحمد، كيف حالك؟", "Already punctuated — no change", "major")
+    # ══════════════════════════════════════════════════════════
+    # 32. PUNCTUATION — NON-PUNCTUATION LEAK
+    # ══════════════════════════════════════════════════════════
+    add("punctuation", "non_punct_leak", "ذهبت الي المدرسه", "Punctuation model must NOT fix spelling", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 40. PIPELINE — FULL FLOW
+    # ══════════════════════════════════════════════════════════
+    add("pipeline", "full_flow", "انا ذهب الى الجامعه كيف حالك",
+        "Spelling fixes (أنا, إلى, الجامعة) + Grammar (agreement) + Punctuation", "critical")
+    add("pipeline", "full_flow", "البنات ذهب الى المدرسه",
+        "Step 1: المدرسه→المدرسة, Step 2: ذهب→agreement, Step 3: punct", "critical")
+    add("pipeline", "full_flow", "في المهندسون الماهرون كانو يعملو",
+        "Multiple grammar fixes + possible spelling", "major")
+    # ══════════════════════════════════════════════════════════
+    # 41. PIPELINE — CROSS-MODEL CONFLICTS
+    # ══════════════════════════════════════════════════════════
+    add("pipeline", "cross_model", "الجامعه كبيره والطلاب كثيرون",
+        "Spelling fixes ه→ة, grammar must not revert", "critical")
+    add("pipeline", "cross_model", "المدرسه جميله والمعلمون في الفصل",
+        "Spelling + grammar shouldn't conflict on separate words", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 50. SPAN ALIGNMENT
+    # ══════════════════════════════════════════════════════════
+    add("span", "basic_alignment", "المدرسه كبيره", "Spans must exactly match ه positions", "critical")
+    add("span", "multi_word", "انا في المدرسه الكبيره", "Multiple spans — no overlap", "critical")
+    add("span", "attached_prefix", "والمدرسة جميلة", "Span covers full token وال...", "major")
+    add("span", "attached_prefix", "بالمدرسة الكبيرة", "Span on prefixed word", "major")
+    add("span", "word_split_span", "فيالبيت", "Split span: original word → two words", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 60. MORPHOLOGY STRESS TEST
+    # ══════════════════════════════════════════════════════════
+    add("morphology", "wa_prefix", "والمدرسة جميلة", "و prefix — no corruption", "major")
+    add("morphology", "fa_prefix", "فالكتاب مفيد", "ف prefix — no corruption", "major")
+    add("morphology", "ba_prefix", "بالبيت الكبير", "ب prefix — no corruption", "major")
+    add("morphology", "ka_prefix", "كالنار في الحطب", "ك prefix — no corruption", "major")
+    add("morphology", "la_prefix", "للطلاب في الجامعة", "ل prefix — no corruption", "major")
+    add("morphology", "combined", "وبالمدرسة والطالبات", "وبال combined prefix", "major")
+    add("morphology", "combined", "فللطلاب حقوقهم", "فلل combined prefix", "major")
+    # ══════════════════════════════════════════════════════════
+    # 70. OVERCORRECTION AUDIT — CORRECT TEXT
+    # ══════════════════════════════════════════════════════════
+    add("overcorrection", "academic", "إن الأبحاث العلمية تشير إلى أهمية التعليم في تطوير المجتمعات",
+        "Academic text — should be unchanged", "critical")
+    add("overcorrection", "academic", "أشارت الدراسة إلى أن نسبة النجاح بلغت خمسة وتسعين بالمئة",
+        "Academic with numbers — no change", "critical")
+    add("overcorrection", "literary", "وقف على أطلال الماضي يتأمل في صروف الدهر",
+        "Literary text — no change", "major")
+    add("overcorrection", "quran", "قل هو الله أحد الله الصمد", "Quran — NEVER modify", "critical")
+    add("overcorrection", "quran", "إنا أعطيناك الكوثر", "Quran — NEVER modify", "critical")
+    add("overcorrection", "hadith", "إنما الأعمال بالنيات", "Hadith — NEVER modify", "critical")
+    add("overcorrection", "poetry", "قفا نبك من ذكرى حبيب ومنزل", "Poetry — preserve", "major")
+    # ══════════════════════════════════════════════════════════
+    # 80. UNDERCORRECTION — ERRORS THAT SHOULD BE CAUGHT
+    # ══════════════════════════════════════════════════════════
+    add("undercorrection", "hamza_missed", "اسلام عليكم", "إسلام — hamza missing", "major")
+    add("undercorrection", "ta_marbuta_missed", "الطبيعه جميله جدا", "Three errors — all should be caught", "major")
+    add("undercorrection", "double_error", "انا ذهبت الي الجامعه", "Two errors in one sentence", "major")
+    add("undercorrection", "grammar_missed", "الطلاب ذهب", "Subject-verb disagreement missed?", "major")
+    # ══════════════════════════════════════════════════════════
+    # 90. EDGE CASES
+    # ══════════════════════════════════════════════════════════
+    add("edge_case", "empty", "", "Should return error/empty", "major")
+    add("edge_case", "whitespace", "   \t\n   ", "Should return error/empty", "major")
+    add("edge_case", "single_char", "ا", "Should handle gracefully", "minor")
+    add("edge_case", "single_word", "مدرسة", "Single correct word — no change", "major")
+    add("edge_case", "very_long", "ا " * 2500, "5000 chars — no crash", "major")
+    add("edge_case", "html_injection", "<script>alert('xss')</script> مرحبا", "HTML stripped", "critical")
+    add("edge_case", "only_english", "Hello world this is a test", "Rejected — non-Arabic", "major")
+    add("edge_case", "emoji", "مرحبا 😊 كيف حالك 🎉", "Emoji preserved", "minor")
+    add("edge_case", "numbers_only", "123456789", "No crash", "minor")
+    add("edge_case", "repeated_chars", "كتاااااااااااب", "Collapse to كتاب", "major")
+    add("edge_case", "newlines", "السطر الأول\nالسطر الثاني\nالسطر الثالث", "Multi-line handling", "major")
+    add("edge_case", "unicode_special", "بسم\u200cالله", "Zero-width non-joiner", "minor")
+    add("edge_case", "diacritics", "كَتَبَ الطَّالِبُ الدَّرسَ", "Diacritized text — handle gracefully", "major")
+    add("edge_case", "punctuation_heavy", "!!!???...،،،؛؛؛", "Heavy punctuation — no crash", "minor")
+    # ══════════════════════════════════════════════════════════
+    # 100. SOCIAL MEDIA / INFORMAL
+    # ══════════════════════════════════════════════════════════
+    add("social_media", "informal", "كيفك شو اخبارك", "Dialect — graceful handling", "minor")
+    add("social_media", "informal", "يلا نروح", "Dialect — no crash", "minor")
+    add("social_media", "slang", "اخخخخ مش قادر", "Repeated chars + dialect", "minor")
+    # ══════════════════════════════════════════════════════════
+    # 110. APPLY-ALL SAFETY
+    # ══════════════════════════════════════════════════════════
+    add("apply_all", "no_duplicate", "انا ذهبت الي المدرسه",
+        "Apply-all must not duplicate words or lose spaces", "critical")
+    add("apply_all", "preserve_unchanged", "النص الأول صحيح ولكن الجامعه خطأ",
+        "Unchanged text must be preserved exactly", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 120. CONCURRENCY / TIMING
+    # ══════════════════════════════════════════════════════════
+    add("concurrency", "rapid_fire", "انا طالب", "3 rapid requests — no crash", "major")
+    # ══════════════════════════════════════════════════════════
+    # 130. RELIGIOUS TEXT PROTECTION
+    # ══════════════════════════════════════════════════════════
+    add("religious", "quran", "بسم الله الرحمن الرحيم", "Must NOT be modified at all", "critical")
+    add("religious", "quran", "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين",
+        "Al-Fatiha — must NOT be modified", "critical")
+    add("religious", "quran", "قل أعوذ برب الفلق من شر ما خلق",
+        "Surat Al-Falaq — must NOT be modified", "critical")
+    add("religious", "shahada", "لا إله إلا الله محمد رسول الله",
+        "Shahada — must NOT be modified", "critical")
+    # ══════════════════════════════════════════════════════════
+    # 140. DATES / TECHNICAL FORMATS
+    # ══════════════════════════════════════════════════════════
+    add("technical", "date", "تاريخ اليوم 15/06/2026", "Date format preserved", "major")
+    add("technical", "phone", "اتصل بالرقم 0123456789", "Phone number preserved", "major")
+    add("technical", "measurement", "المسافة 25.5 كم", "Decimal preserved", "major")
+    # ══════════════════════════════════════════════════════════
+    # 150. LONG TEXT
+    # ══════════════════════════════════════════════════════════
+    long_text = ("كان ياما كان في قديم الزمان ملك عظيم يحكم مملكه واسعه " * 10).strip()
+    add("stress", "long_500words", long_text, "No timeout, no crash", "major")
+    medium_text = ("الطلاب ذهبوا إلى المدرسة والمعلمون استقبلوهم بحرارة " * 20).strip()
+    add("stress", "medium_correct", medium_text, "Mostly correct — minimal changes", "major")
+    return tests
+# ─── Test Runner ──────────────────────────────────────────────────────────────
+def run_test(api: BayanAPI, tc: TestCase) -> TestResult:
+    """Run a single test case and return the result."""
+    result = TestResult(
+        test_id=tc.id,
+        category=tc.category,
+        subcategory=tc.subcategory,
+        input_text=tc.input_text[:200],
+        expected_behavior=tc.expected_behavior,
+        severity=tc.severity,
+        status="error",
+    )
+    try:
+        # Choose endpoint based on category
+        if tc.category == "spelling":
+            resp = api.analyze(tc.input_text)
+        elif tc.category == "grammar":
+            resp = api.analyze(tc.input_text)
+        elif tc.category == "punctuation":
+            resp = api.analyze(tc.input_text)
+        elif tc.category in ("pipeline", "span", "morphology", "overcorrection",
+                             "undercorrection", "apply_all", "religious", "technical",
+                             "stress", "cross_model"):
+            resp = api.analyze(tc.input_text)
+        elif tc.category == "edge_case":
+            resp = api.analyze(tc.input_text)
+        elif tc.category == "concurrency":
+            resp = api.analyze(tc.input_text)
+        elif tc.category == "social_media":
+            resp = api.analyze(tc.input_text)
+        else:
+            resp = api.analyze(tc.input_text)
+        result.latency_ms = resp.get('_latency_ms', 0)
+        if 'error' in resp:
+            # Errors on edge cases like empty text are expected
+            if tc.subcategory in ('empty', 'whitespace'):
+                result.status = "pass"
+                result.actual_output = f"Error (expected): {resp['error']}"
+            else:
+                result.status = "error"
+                result.error_detail = resp['error']
+            return result
+        result.corrected_text = resp.get('corrected', '')
+        result.suggestions = resp.get('suggestions', [])
+        result.actual_output = result.corrected_text[:300]
+        # ── Validation Logic ──
+        original = resp.get('original', tc.input_text)
+        corrected = result.corrected_text
+        suggestions = result.suggestions
+        # --- Span alignment validation ---
+        if tc.category == "span" or True:  # Always validate spans
+            for s in suggestions:
+                start = s.get('start', 0)
+                end = s.get('end', 0)
+                orig_text = s.get('original', '')
+                actual_slice = original[start:end]
+                if actual_slice != orig_text and orig_text:
+                    result.status = "fail"
+                    result.finding = (
+                        f"SPAN MISMATCH: suggestion says original='{orig_text}' "
+                        f"but text[{start}:{end}]='{actual_slice}'"
+                    )
+                    return result
+        # --- Overcorrection detection ---
+        if tc.category == "overcorrection" or tc.category == "religious":
+            if corrected != original and suggestions:
+                result.status = "fail"
+                result.finding = (
+                    f"OVERCORRECTION: Correct text was modified. "
+                    f"Changes: {[s.get('original','')+'→'+s.get('correction','') for s in suggestions]}"
+                )
+                return result
+        # --- Spelling false positive (correct text changed) ---
+        if tc.subcategory == "overcorrection" and tc.category == "spelling":
+            if corrected != original:
+                result.status = "fail"
+                result.finding = (
+                    f"SPELLING FALSE POSITIVE: '{original[:80]}' was changed to '{corrected[:80]}'"
+                )
+                return result
+        # --- Grammar overcorrection ---
+        if tc.subcategory == "overcorrection" and tc.category == "grammar":
+            if corrected != original:
+                result.status = "fail"
+                result.finding = (
+                    f"GRAMMAR FALSE POSITIVE: '{original[:80]}' was changed to '{corrected[:80]}'"
+                )
+                return result
+        # --- Numeral protection ---
+        if tc.subcategory == "numerals":
+            orig_digits = re.findall(r'\d+', original)
+            corr_digits = re.findall(r'\d+', corrected)
+            if orig_digits != corr_digits:
+                result.status = "fail"
+                result.finding = f"NUMERAL CORRUPTION: {orig_digits} → {corr_digits}"
+                return result
+        # --- Pronoun suffix guard ---
+        if tc.subcategory == "pronoun_suffix":
+            for s in suggestions:
+                if 'ته' in s.get('original', '') and 'تة' in s.get('correction', ''):
+                    result.status = "fail"
+                    result.finding = f"PRONOUN SUFFIX LEAK: {s['original']}→{s['correction']}"
+                    return result
+        # --- Apply-all safety ---
+        if tc.category == "apply_all":
+            # Simulate apply-all
+            rebuilt = original
+            for s in sorted(suggestions, key=lambda x: -x['start']):
+                rebuilt = rebuilt[:s['start']] + s['correction'] + rebuilt[s['end']:]
+            if rebuilt != corrected:
+                result.status = "fail"
+                result.finding = (
+                    f"APPLY-ALL MISMATCH: rebuilt='{rebuilt[:100]}' vs corrected='{corrected[:100]}'"
+                )
+                return result
+        # --- HTML injection ---
+        if tc.subcategory == "html_injection":
+            if '<script>' in corrected or '<' in corrected:
+                result.status = "fail"
+                result.finding = "HTML NOT STRIPPED"
+                return result
+        # --- Non-Arabic rejection ---
+        if tc.subcategory == "only_english":
+            if suggestions:
+                result.status = "fail"
+                result.finding = f"Non-Arabic text produced {len(suggestions)} suggestions"
+                return result
+        result.status = "pass"
+    except requests.Timeout:
+        result.status = "error"
+        result.error_detail = "TIMEOUT"
+    except Exception as e:
+        result.status = "error"
+        result.error_detail = f"{type(e).__name__}: {str(e)[:200]}"
+    return result
+# ─── Main ─────────────────────────────────────────────────────────────────────
+def main():
+    parser = argparse.ArgumentParser(description="Bayan Phase 8 Adversarial Audit")
+    parser.add_argument("--url", default=DEFAULT_URL, help="API base URL")
+    parser.add_argument("--out", default="phase8_audit_results.json", help="Output file")
+    parser.add_argument("--categories", nargs="*", help="Filter by categories")
+    args = parser.parse_args()
+    api = BayanAPI(args.url)
+    print(f"[AUDIT] Target: {args.url}")
+    # Health check
+    try:
+        health = api.health()
+        print(f"[AUDIT] Health: {json.dumps(health, indent=2)}")
+    except Exception as e:
+        print(f"[AUDIT] ❌ Health check failed: {e}")
+        print(f"[AUDIT] Continuing anyway...")
+    # Build dataset
+    tests = build_adversarial_dataset()
+    if args.categories:
+        tests = [t for t in tests if t.category in args.categories]
+    print(f"[AUDIT] Running {len(tests)} test cases...")
+    results = []
+    pass_count = 0
+    fail_count = 0
+    error_count = 0
+    for i, tc in enumerate(tests):
+        print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}/{tc.subcategory}: ", end="", flush=True)
+        r = run_test(api, tc)
+        results.append(asdict(r))
+        if r.status == "pass":
+            print(f"✅ ({r.latency_ms}ms)")
+            pass_count += 1
+        elif r.status == "fail":
+            print(f"❌ {r.finding[:80]}")
+            fail_count += 1
+        else:
+            print(f"⚠️  {r.error_detail[:80]}")
+            error_count += 1
+    # Summary
+    print(f"\n{'='*60}")
+    print(f"[AUDIT COMPLETE]")
+    print(f"  Total:  {len(results)}")
+    print(f"  Pass:   {pass_count}")
+    print(f"  Fail:   {fail_count}")
+    print(f"  Error:  {error_count}")
+    print(f"{'='*60}")
+    # Critical failures
+    critical_fails = [r for r in results if r['status'] == 'fail' and r['severity'] == 'critical']
+    if critical_fails:
+        print(f"\n🚨 CRITICAL FAILURES ({len(critical_fails)}):")
+        for r in critical_fails:
+            print(f"  {r['test_id']} [{r['category']}/{r['subcategory']}]: {r['finding'][:100]}")
+    # Save results
+    output = {
+        "audit_timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "target_url": args.url,
+        "total_tests": len(results),
+        "pass": pass_count,
+        "fail": fail_count,
+        "error": error_count,
+        "critical_failures": len(critical_fails) if critical_fails else 0,
+        "results": results,
+    }
+    with open(args.out, 'w', encoding='utf-8') as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+    print(f"\n[AUDIT] Results saved to {args.out}")
+if __name__ == "__main__":
+    main()

archive/old_tests/phase9_results.json ADDED Viewed

The diff for this file is too large to render. See raw diff

archive/old_tests/phase9_validation.py ADDED Viewed

	@@ -0,0 +1,811 @@

+"""
+BAYAN Phase 9 — Scientific Validation & Adversarial Benchmarking
+=================================================================
+Tests each model INDEPENDENTLY + full pipeline.
+Produces precision/recall/F1 metrics with real API responses.
+Usage:
+    python tests/phase9_validation.py --url URL [--phase A|B|C|D|E|ALL]
+"""
+import argparse, json, time, re, sys, os
+import requests
+from dataclasses import dataclass, field, asdict
+from typing import List, Optional, Dict, Tuple
+# ─── Config ───────────────────────────────────────────────────────────────────
+DEFAULT_URL = "https://bayan10-bayan-api.hf.space"
+# ─── API Client ───────────────────────────────────────────────────────────────
+class API:
+    def __init__(self, base):
+        self.base = base.rstrip('/')
+        self.s = requests.Session()
+        self.s.headers['Content-Type'] = 'application/json'
+    def _post(self, endpoint, payload, timeout=180):
+        t0 = time.time()
+        try:
+            r = self.s.post(f"{self.base}{endpoint}", json=payload, timeout=timeout)
+            ms = int((time.time()-t0)*1000)
+            d = r.json(); d['_ms'] = ms; d['_status'] = r.status_code
+            return d
+        except requests.Timeout:
+            return {'error': 'TIMEOUT', '_ms': int((time.time()-t0)*1000), '_status': 0}
+        except Exception as e:
+            return {'error': str(e), '_ms': int((time.time()-t0)*1000), '_status': 0}
+    def health(self): return self._post('/api/health', {})
+    def spelling(self, text): return self._post('/api/spelling', {'text': text})
+    def grammar(self, text): return self._post('/api/grammar', {'text': text})
+    def punctuation(self, text): return self._post('/api/punctuation', {'text': text})
+    def analyze(self, text): return self._post('/api/analyze', {'text': text})
+    def summarize(self, text): return self._post('/api/summarize', {'text': text, 'length': 'short'})
+    def dialect(self, text): return self._post('/api/dialect', {'text': text})
+    def autocomplete(self, text): return self._post('/api/autocomplete', {'text': text, 'n': 5})
+# ─── Test Case ────────────────────────────────────────────────────────────────
+@dataclass
+class TC:
+    id: str
+    phase: str
+    category: str
+    input: str
+    expected_output: str = ""
+    should_change: bool = True  # True=error should be fixed, False=correct text, no change
+    error_words: list = field(default_factory=list)  # words that should be corrected
+    correct_words: list = field(default_factory=list)  # words that must NOT change
+@dataclass
+class Result:
+    tc_id: str; phase: str; category: str
+    input: str; expected: str
+    actual_output: str = ""
+    changed: bool = False
+    suggestions: list = field(default_factory=list)
+    latency_ms: int = 0
+    verdict: str = ""  # TP, FP, TN, FN, ERROR
+    detail: str = ""
+    api_status: int = 0
+    raw_response: dict = field(default_factory=dict)
+# ═══════════════════════════════════════════════════════════════════════════════
+# PHASE A — RAW SPELLING MODEL VALIDATION
+# ═══════════════════════════════════════════════════════════════════════════════
+def build_spelling_tests() -> List[TC]:
+    T = []
+    n = [0]
+    def add(cat, inp, exp, should_change=True, err=None, correct=None):
+        n[0]+=1
+        T.append(TC(f"A{n[0]:03d}", "A", cat, inp, exp, should_change,
+                     err or [], correct or []))
+    # ── A1: Hamza errors (SHOULD be corrected) ──
+    add("hamza", "انا طالب في الجامعة", "أنا طالب في الجامعة", True, ["انا"])
+    add("hamza", "اذا جاء الربيع تزهر الأشجار", "إذا جاء الربيع تزهر الأشجار", True, ["اذا"])
+    add("hamza", "ايضا هذا الأمر مهم جداً", "أيضاً هذا الأمر مهم جداً", True, ["ايضا"])
+    add("hamza", "لان الأمر يتعلق بالمستقبل", "لأن الأمر يتعلق بالمستقبل", True, ["لان"])
+    add("hamza", "اين ذهبت أمس", "أين ذهبت أمس", True, ["اين"])
+    add("hamza", "اول مرة أزور هذا المكان", "أول مرة أزور هذا المكان", True, ["اول"])
+    add("hamza", "هذا او ذاك لا فرق", "هذا أو ذاك لا فرق", True, ["ا��"])
+    add("hamza", "اكبر مدينة في العالم", "أكبر مدينة في العالم", True, ["اكبر"])
+    add("hamza", "اصغر طالب في الصف", "أصغر طالب في الصف", True, ["اصغر"])
+    add("hamza", "ابناء الوطن يعملون بجد", "أبناء الوطن يعملون بجد", True, ["ابناء"])
+    add("hamza", "اطفال المدرسة يلعبون", "أطفال المدرسة يلعبون", True, ["اطفال"])
+    add("hamza", "اخيراً وصلنا إلى الهدف", "أخيراً وصلنا إلى الهدف", True, ["اخيراً"])
+    add("hamza", "وقف امام المدرسة", "وقف أمام المدرسة", True, ["امام"])
+    # Prefixed hamza
+    add("hamza_prefix", "والاسعار مرتفعة جداً", "والأسعار مرتفعة جداً", True, ["والاسعار"])
+    add("hamza_prefix", "بالاضافة إلى ذلك", "بالإضافة إلى ذلك", True, ["بالاضافة"])
+    add("hamza_prefix", "فالانسان يحتاج للعلم", "فالإنسان يحتاج للعلم", True, ["فالانسان"])
+    # ── A2: Ta Marbuta errors ──
+    add("ta_marbuta", "المدرسه كبيره وجميله", "المدرسة كبيرة وجميلة", True, ["المدرسه","كبيره","جميله"])
+    add("ta_marbuta", "الجامعه في القاهره", "الجامعة في القاهرة", True, ["الجامعه","القاهره"])
+    add("ta_marbuta", "السياره سريعه جداً", "السيارة سريعة جداً", True, ["السياره","سريعه"])
+    add("ta_marbuta", "الشجره طويله", "الشجرة طويلة", True, ["الشجره","طويله"])
+    add("ta_marbuta", "الحياه صعبه في المدينه", "الحياة صعبة في المدينة", True, ["الحياه","صعبه","المدينه"])
+    add("ta_marbuta", "بالمدرسه الكبيره", "بالمدرسة الكبيرة", True, ["بالمدرسه","الكبيره"])
+    # ── A3: Alif Maqsura ──
+    add("alif_maqsura", "ذهبت الي المكتبة", "ذهبت إلى المكتبة", True, ["الي"])
+    add("alif_maqsura", "المستشفي الكبير", "المستشفى الكبير", True, ["المستشفي"])
+    add("alif_maqsura", "هدي الطالبة ممتاز", "هدى الطالبة ممتاز", True, ["هدي"])
+    # ── A4: Word Splits ──
+    add("word_split", "ذهبت فيالبيت", "ذهبت في البيت", True, ["فيالبيت"])
+    add("word_split", "خرج منالمدرسة", "خرج من المدرسة", True, ["منالمدرسة"])
+    add("word_split", "بقي عندالباب", "بقي عند الباب", True, ["عندالباب"])
+    # ── A5: Correct text — MUST NOT change (overcorrection tests) ──
+    add("correct_text", "أنا ذهبت إلى الجامعة", "أنا ذهبت إلى الجامعة", False, correct=["أنا","ذهبت","إلى","الجامعة"])
+    add("correct_text", "هذه المدرسة جميلة جداً", "هذه المدرسة جميلة جداً", False, correct=["هذه","المدرسة","جميلة"])
+    add("correct_text", "كان الجو ممطراً اليوم", "كان الجو ممطراً اليوم", False, correct=["كان"])
+    add("correct_text", "وكان أحمد في المنزل", "وكان أحمد في المنزل", False, correct=["وكان"])
+    add("correct_text", "إلى اللقاء يا صديقي", "إلى اللقاء يا صديقي", False, correct=["إلى"])
+    add("correct_text", "ذلك الكتاب مفيد جداً", "ذلك الكتاب مفيد جداً", False, correct=["ذلك"])
+    add("correct_text", "لكن الأمر صعب علينا", "لكن الأمر صعب علينا", False, correct=["لكن"])
+    add("correct_text", "هذا أو ذاك سواء عندي", "هذا أو ذاك سواء عندي", False, correct=["أو"])
+    # ── A6: Pronoun suffix guard ──
+    add("pronoun_guard", "فتأملته جيداً في المساء", "فتأملته جيداً في المساء", False, correct=["فتأملته"])
+    add("pronoun_guard", "رأيته في الشارع أمس", "رأيته في الشارع أمس", False, correct=["رأيته"])
+    add("pronoun_guard", "كتبته بسرعة كبيرة", "كتبته بسرعة كبيرة", False, correct=["كتبته"])
+    add("pronoun_guard", "سمعته يتحدث بوضوح", "سمعته يتحدث بوضوح", False, correct=["سمعته"])
+    # ── A7: Named Entities ──
+    add("named_entity", "محمد صلاح لاعب كرة قدم مصري", "", False, correct=["محمد","صلاح"])
+    add("named_entity", "جامعة القاهرة من أعرق الجامعات", "", False, correct=["القاهرة"])
+    add("named_entity", "مدينة الرياض عاصمة المملكة", "", False, correct=["الرياض"])
+    add("named_entity", "عبدالله يدرس في الجامعة", "", False, correct=["عبدالله"])
+    # ── A8: Numbers ──
+    add("numbers", "عام 2024 كان جيداً جداً", "", False, correct=["2024"])
+    add("numbers", "اشتريت 15 كتاباً من المعرض", "", False, correct=["15"])
+    add("numbers", "الساعة 3:30 مساءً بالضبط", "", False, correct=["3:30"])
+    # ── A9: Technical / Foreign ──
+    add("foreign", "أستخدم Python في البرمجة", "", False, correct=["Python"])
+    add("foreign", "تطبيق OpenAI ممتاز جداً", "", False, correct=["OpenAI"])
+    add("foreign", "خادم Docker يعمل بنجاح", "", False, correct=["Docker"])
+    add("foreign", "إطار TensorFlow مفيد للتعلم", "", False, correct=["TensorFlow"])
+    # ── A10: Mixed Arabic/English ──
+    add("mixed", "البريد user@example.com مهم جداً", "", False, correct=["user@example.com"])
+    add("mixed", "الرابط https://google.com يعمل", "", False, correct=["https://google.com"])
+    add("mixed", "الهاشتاق #الذكاء_الاصطناعي مهم", "", False, correct=["#الذكاء_الاصطناعي"])
+    # ── A11: Religious text — MUST NOT change ──
+    add("religious", "بسم الله الرحمن الرحيم", "بسم الله الرحمن الرحيم", False, correct=["بسم","الله","الرحمن","الرحيم"])
+    add("religious", "الحمد لله رب العالمين", "الحمد لله رب العالمين", False, correct=["الحمد","لله","رب","العالمين"])
+    add("religious", "لا إله إلا الله محمد رسول الله", "", False, correct=["إله","إلا","الله","محمد","رسول"])
+    add("religious", "إنما الأعمال بالنيات", "", False, correct=["إنما","الأعمال","بالنيات"])
+    # ── A12: Repeated chars ──
+    add("repeated", "كتاااااب جميييل", "كتاب جميل", True, ["كتاااااب","جميييل"])
+    # ── A13: Edge cases ──
+    add("edge", "مدرسة", "مدرسة", False, correct=["مدرسة"])
+    add("edge", "ا ب ت ث ج ح خ", "", False)
+    add("edge", "😊 مرحبا 🎉 كيف حالك", "", False)
+    return T
+# ═══════════════════════════════════════════════════════════════════════════════
+# PHASE B — RAW GRAMMAR MODEL VALIDATION
+# ═══════════════════════════════════════════════════════════════════════════════
+def build_grammar_tests() -> List[TC]:
+    T = []
+    n = [0]
+    def add(cat, inp, exp, should_change=True, err=None, correct=None):
+        n[0]+=1
+        T.append(TC(f"B{n[0]:03d}", "B", cat, inp, exp, should_change,
+                     err or [], correct or []))
+    # ── B1: Subject-Verb Agreement (errors) ──
+    add("sv_agree", "البنات ذهب إلى المدرسة", "", True, ["ذهب"])
+    add("sv_agree", "الطلاب يذهب إلى الجامعة", "", True, ["يذهب"])
+    add("sv_agree", "المهندسون حضر الاجتماع", "", True, ["حضر"])
+    add("sv_agree", "الرجال يعمل في المصنع", "", True, ["يعمل"])
+    add("sv_agree", "النساء ذهب إلى السوق", "", True, ["ذهب"])
+    add("sv_agree", "الأولاد لعب في الحديقة", "", True, ["لعب"])
+    # ── B2: Gender Agreement (errors) ──
+    add("gender", "السيارة جميل والبيت كبير", "", True, ["جميل"])
+    add("gender", "البنت ذكي في المدرسة", "", True, ["ذكي"])
+    add("gender", "الطالبة متفوق في دراسته", "", True, ["متفوق"])
+    # ── B3: Preposition Case (errors) ──
+    add("case", "في المهندسون الماهرون جداً", "", True, ["المهندسون"])
+    add("case", "من المعلمون الأكفاء في المدرسة", "", True, ["المعلمون"])
+    add("case", "إلى المسافرون في المطار", "", True, ["المسافرون"])
+    add("case", "على العاملون في المصنع", "", True, ["العاملون"])
+    # ── B4: Five Nouns (errors) ──
+    add("five_nouns", "إن أبوك رجل طيب جداً", "", True, ["أبوك"])
+    add("five_nouns", "رأيت أخوك في المسجد أمس", "", True, ["أخوك"])
+    # ── B5: Dual Forms (errors) ──
+    add("dual", "هذان الطالبتان مجتهدتان", "", True, ["هذان"])
+    add("dual", "هاتان الطالبان مجتهدان", "", True, ["هاتان"])
+    # ── B6: Nasb/Jazm (errors) ──
+    add("nasb", "لن يذهبون إلى المدرسة غداً", "", True, ["يذهبون"])
+    add("nasb", "لم يفعلون الواجب بعد", "", True, ["يفعلون"])
+    # ── B7: Correct grammar — MUST NOT change ──
+    add("correct", "ذهب الطالب إلى المدرسة", "", False, correct=["ذهب","الطالب"])
+    add("correct", "كتبت الطالبة المقال بنجاح", "", False, correct=["كتبت","الطالبة"])
+    add("correct", "المعلمون في المدرسة يعملون", "", False, correct=["المعلمون","يعملون"])
+    add("correct", "أحب القراءة والكتابة كثيراً", "", False, correct=["أحب","القراءة","والكتابة"])
+    add("correct", "ذهبت البنات إلى المدرسة", "", False, correct=["ذهبت","البنات"])
+    add("correct", "جاء المعلمون إلى الفصل", "", False, correct=["جاء","المعلمون"])
+    # ── B8: Quranic text — MUST NOT change ──
+    add("quran", "بسم الله الرحمن الرحيم", "", False, correct=["بسم","الله","الرحمن","الرحيم"])
+    add("quran", "قل هو الله أحد الله الصمد", "", False)
+    add("quran", "إنا أنزلناه في ليلة القدر", "", False)
+    add("quran", "قل أعوذ برب الفلق من شر ما خلق", "", False)
+    add("quran", "الحمد لله رب العالمين الرحمن الرحيم مالك يوم الدين", "", False)
+    # ── B9: Hadith — MUST NOT change ──
+    add("hadith", "إنما الأعمال بالنيات وإنما لكل امرئ ما نوى", "", False)
+    add("hadith", "خيركم من تعلم القرآن وعلمه", "", False)
+    # ── B10: Poetry — MUST NOT change ──
+    add("poetry", "قفا نبك من ذكرى حبيب ومنزل", "", False)
+    add("poetry", "على قدر أهل العزم تأتي العزائم", "", False)
+    # ── B11: Academic Arabic — MUST NOT change ──
+    add("academic", "إن الأبحاث العلمية تشير إلى أهمية التعليم في تطوير المجتمعات الحديثة", "", False)
+    add("academic", "أشارت الدراسة إلى أن نسبة النجاح بلغت خمسة وتسعين بالمئة", "", False)
+    add("academic", "تهدف هذه الدراسة إلى تحليل العوامل المؤثرة في جودة التعليم العالي", "", False)
+    # ── B12: News Arabic — MUST NOT change ──
+    add("news", "أعلن رئيس الوزراء عن خطة اقتصادية جديدة لتطوير البنية التحتية", "", False)
+    add("news", "شهدت المنطقة تطورات ميدانية متسارعة خلال الأيام الماضية", "", False)
+    return T
+# ═══════════════════════════════════════════════════════════════════════════════
+# PHASE C — RAW PUNCTUATION MODEL VALIDATION
+# ═══════════════════════════════════════════════════════════════════════════════
+def build_punctuation_tests() -> List[TC]:
+    T = []
+    n = [0]
+    def add(cat, inp, exp, should_change=True, err=None, correct=None):
+        n[0]+=1
+        T.append(TC(f"C{n[0]:03d}", "C", cat, inp, exp, should_change,
+                     err or [], correct or []))
+    # ── C1: Missing punctuation (should add) ──
+    add("missing_period", "ذهبت إلى المدرسة ثم عدت إلى البيت", "", True)
+    add("missing_question", "هل أنت بخير يا صديقي", "", True)
+    add("missing_comma", "مرحبا كيف حالك اليوم", "", True)
+    add("missing_multi", "كيف حالك أنا بخير والحمد لله", "", True)
+    # ── C2: Already punctuated — MUST NOT over-punctuate ──
+    add("already_punct", "ذهبت إلى المدرسة. ثم عدت.", "", False)
+    add("already_punct", "كيف حالك؟ أنا بخير.", "", False)
+    add("already_punct", "أحمد، كيف حالك؟ هل أنت بخير؟", "", False)
+    # ── C3: Punctuation must NOT change words ──
+    add("no_word_change", "ذهبت الي المدرسه أمس", "", True)
+    # ^ Only add punct — must NOT fix الي→إلى or المدرسه→المدرسة
+    # ── C4: Position accuracy ──
+    add("position", "سألته كيف حالك فقال أنا بخير", "", True)
+    add("position", "ذهبت إلى المكتبة واشتريت كتاباً ثم عدت", "", True)
+    return T
+# ═══════════════════════════════════════════════════════════════════════════════
+# PHASE D — FULL PIPELINE VALIDATION
+# ═══════════════════════════════════════���═══════════════════════════════════════
+def build_pipeline_tests() -> List[TC]:
+    T = []
+    n = [0]
+    def add(cat, inp, exp="", should_change=True, err=None, correct=None):
+        n[0]+=1
+        T.append(TC(f"D{n[0]:03d}", "D", cat, inp, exp, should_change,
+                     err or [], correct or []))
+    # ── D1: Multi-stage corrections ──
+    add("multi_stage", "انا ذهب الى الجامعه كيف حالك", "", True,
+        ["انا","الى","الجامعه"])
+    add("multi_stage", "البنات ذهب الى المدرسه", "", True,
+        ["ذهب","الى","المدرسه"])
+    add("multi_stage", "هي ذهب الي الجامعه", "", True,
+        ["ذهب","الي","الجامعه"])
+    # ── D2: Correct text through pipeline ──
+    add("correct_pipeline", "أنا ذهبت إلى الجامعة.", "", False,
+        correct=["أنا","ذهبت","إلى","الجامعة"])
+    add("correct_pipeline", "ذهب الطالب إلى المدرسة.", "", False,
+        correct=["ذهب","الطالب","إلى","المدرسة"])
+    # ── D3: Cross-model conflict ──
+    add("cross_conflict", "الجامعه كبيره والطلاب كثيرون", "", True,
+        ["الجامعه","كبيره"])
+    add("cross_conflict", "المدرسه جميله والمعلمون في الفصل", "", True,
+        ["المدرسه","جميله"])
+    # ── D4: Span alignment after pipeline ──
+    add("span_align", "المدرسه كبيره جداً", "", True, ["المدرسه","كبيره"])
+    add("span_align", "انا في المدرسه الكبيره", "", True, ["انا","المدرسه","الكبيره"])
+    # ── D5: Religious text through pipeline ──
+    add("religious_pipeline", "بسم الله الرحمن الرحيم", "", False,
+        correct=["بسم","الله","الرحمن","الرحيم"])
+    add("religious_pipeline", "الحمد لله رب العالمين", "", False,
+        correct=["الحمد","لله","رب","العالمين"])
+    # ── D6: Apply-all safety ──
+    add("apply_all", "انا ذهبت الي المدرسه", "", True, ["انا","الي","المدرسه"])
+    add("apply_all", "النص الأول صحيح ولكن الجامعه خطأ", "", True, ["الجامعه"])
+    # ── D7: Long text ──
+    long = "هذا النص طويل جداً " * 20
+    add("long_text", long.strip(), "", False)
+    # ── D8: Edge cases ──
+    add("edge_empty", "", "", False)
+    add("edge_short", "مرحبا", "", False)
+    add("edge_html", "<script>alert('xss')</script> مرحبا بكم في الموقع", "", True)
+    add("edge_english", "Hello world this is a test of English text only", "", False)
+    return T
+# ═══════════════════════════════════════════════════════════════════════════════
+# PHASE E — ADVERSARIAL ATTACKS
+# ═══════════════════════════════════════════════════════════════════════════════
+def build_adversarial_tests() -> List[TC]:
+    T = []
+    n = [0]
+    def add(cat, inp, exp="", should_change=False, err=None, correct=None):
+        n[0]+=1
+        T.append(TC(f"E{n[0]:03d}", "E", cat, inp, exp, should_change,
+                     err or [], correct or []))
+    # ── E1: Dialect ──
+    add("dialect", "ازيك عامل ايه انهارده", "", True)
+    add("dialect", "كيفك شو اخبارك اليوم", "", True)
+    add("dialect", "شلونك وين رايح", "", True)
+    # ── E2: Franco Arabic ──
+    add("franco", "ana ray7 el gam3a", "", False)
+    add("franco", "3ayz atkalem ma3ak", "", False)
+    # ── E3: Excessive repetition ──
+    add("repetition", "هههههههههه مضحك جداااااا", "", True)
+    add("repetition", "لاااااااا مش ممكن", "", True)
+    # ── E4: Emoji heavy ──
+    add("emoji", "😊😊😊 مرحبا 🎉🎉🎉 كيف حالك 🌟", "", False)
+    # ── E5: Mixed scripts ──
+    add("mixed_script", "I love القراءة and الكتابة", "", False)
+    add("mixed_script", "المشروع يستخدم React و Node.js", "", False)
+    # ── E6: Code ──
+    add("code", "print('مرحبا بالعالم')", "", False)
+    add("code", "function test() { return 'مرحبا'; }", "", False)
+    # ── E7: URLs and emails ──
+    add("url", "زر الموقع https://www.example.com/path?q=test للمزيد", "", False)
+    add("email", "أرسل لي على info@company.com رجاءً", "", False)
+    # ── E8: Numbers/dates ──
+    add("numbers", "تاريخ اليوم 15/06/2026 وا��ساعة 14:30", "", False)
+    add("numbers", "المسافة 25.5 كم والحرارة 35°C", "", False)
+    # ── E9: Unicode edge cases ──
+    add("unicode", "بسم\u200cالله", "", False)  # ZWNJ
+    add("unicode", "مرحبا\u200bبكم", "", False)  # ZWS
+    add("unicode", "كَتَبَ الطَّالِبُ الدَّرسَ", "", False)  # Diacritics
+    # ── E10: Very long single word ──
+    add("long_word", "واستغفروالذنوبهمجميعاًفإنهم محتاجون", "", True)
+    # ── E11: Punctuation spam ──
+    add("punct_spam", "!!!???...،،،؛؛؛:::...!!!", "", False)
+    # ── E12: Newlines ──
+    add("newlines", "السطر الأول\nالسطر الثاني\nالسطر الثالث", "", False)
+    # ── E13: Hashtags/mentions ──
+    add("hashtag", "مشروع #بيان رائع جداً @mohamedatef", "", False, correct=["#بيان","@mohamedatef"])
+    return T
+# ═══════════════════════════════════════════════════════════════════════════════
+# RUNNER
+# ═══════════════════════════════════════════════════════════════════════════════
+def run_spelling_test(api: API, tc: TC) -> Result:
+    """Test spelling model independently via /api/analyze (short text triggers spelling)."""
+    r = Result(tc.id, tc.phase, tc.category, tc.input, tc.expected_output)
+    resp = api.analyze(tc.input)
+    r.api_status = resp.get('_status', 0)
+    r.latency_ms = resp.get('_ms', 0)
+    r.raw_response = {k: v for k, v in resp.items() if k not in ('_ms', '_status')}
+    if 'error' in resp:
+        if not tc.should_change and tc.input.strip() == "":
+            r.verdict = "TN"; r.detail = "Empty input correctly rejected"
+        else:
+            r.verdict = "ERROR"; r.detail = resp['error']
+        return r
+    r.actual_output = resp.get('corrected', '')
+    r.suggestions = resp.get('suggestions', [])
+    r.changed = r.actual_output != resp.get('original', tc.input)
+    if tc.should_change:
+        if r.changed:
+            # Check if the right words were corrected
+            uncorrected_errors = []
+            for ew in tc.error_words:
+                if ew in r.actual_output:
+                    uncorrected_errors.append(ew)
+            if uncorrected_errors:
+                r.verdict = "FN"
+                r.detail = f"Errors NOT fixed: {uncorrected_errors}"
+            else:
+                r.verdict = "TP"
+                r.detail = f"Corrected: {len(r.suggestions)} suggestions"
+        else:
+            r.verdict = "FN"
+            r.detail = f"No changes made. Expected fix for: {tc.error_words}"
+    else:
+        if r.changed:
+            # Check if protected words were corrupted
+            corrupted = []
+            for cw in tc.correct_words:
+                if cw not in r.actual_output and cw in tc.input:
+                    corrupted.append(cw)
+            if corrupted:
+                r.verdict = "FP"
+                r.detail = f"OVERCORRECTION: corrupted words: {corrupted}"
+            elif r.suggestions:
+                r.verdict = "FP"
+                changes = [f"{s.get('original','')}→{s.get('correction','')}" for s in r.suggestions]
+                r.detail = f"Unnecessary changes: {changes}"
+            else:
+                r.verdict = "TN"
+                r.detail = "Text changed but no suggestion objects"
+        else:
+            r.verdict = "TN"
+            r.detail = "Correctly unchanged"
+    return r
+def run_grammar_test(api: API, tc: TC) -> Result:
+    """Test grammar model via /api/grammar endpoint."""
+    r = Result(tc.id, tc.phase, tc.category, tc.input, tc.expected_output)
+    resp = api.grammar(tc.input)
+    r.api_status = resp.get('_status', 0)
+    r.latency_ms = resp.get('_ms', 0)
+    r.raw_response = {k: v for k, v in resp.items() if k not in ('_ms', '_status')}
+    if 'error' in resp:
+        r.verdict = "ERROR"; r.detail = resp['error']
+        return r
+    r.actual_output = resp.get('corrected', resp.get('corrected_text', ''))
+    r.changed = r.actual_output != tc.input
+    if tc.should_change:
+        if r.changed:
+            uncorrected = [ew for ew in tc.error_words if ew in r.actual_output]
+            if uncorrected:
+                r.verdict = "FN"; r.detail = f"Errors NOT fixed: {uncorrected}"
+            else:
+                r.verdict = "TP"; r.detail = f"Grammar corrected"
+        else:
+            r.verdict = "FN"; r.detail = f"No changes made. Expected fix for: {tc.error_words}"
+    else:
+        if r.changed:
+            corrupted = [cw for cw in tc.correct_words if cw not in r.actual_output and cw in tc.input]
+            if corrupted:
+                r.verdict = "FP"; r.detail = f"OVERCORRECTION: corrupted words: {corrupted}"
+            else:
+                # Check if it's a stylistic rewrite
+                r.verdict = "FP"; r.detail = f"Unnecessary change: '{tc.input[:60]}' → '{r.actual_output[:60]}'"
+        else:
+            r.verdict = "TN"; r.detail = "Correctly unchanged"
+    return r
+def run_punctuation_test(api: API, tc: TC) -> Result:
+    """Test punctuation model via /api/punctuation endpoint."""
+    r = Result(tc.id, tc.phase, tc.category, tc.input, tc.expected_output)
+    resp = api.punctuation(tc.input)
+    r.api_status = resp.get('_status', 0)
+    r.latency_ms = resp.get('_ms', 0)
+    r.raw_response = {k: v for k, v in resp.items() if k not in ('_ms', '_status')}
+    if 'error' in resp:
+        r.verdict = "ERROR"; r.detail = resp['error']
+        return r
+    r.actual_output = resp.get('corrected', resp.get('corrected_text', ''))
+    r.changed = r.actual_output != tc.input
+    # Check if model changed WORDS (not just punctuation)
+    punct_chars = set('.,،؛؟!:;?! ')
+    orig_words = re.sub(r'[.,،؛؟!:;?!\s]+', ' ', tc.input).strip()
+    corr_words = re.sub(r'[.,،؛؟!:;?!\s]+', ' ', r.actual_output).strip()
+    word_change = orig_words != corr_words
+    if word_change:
+        r.verdict = "FP"
+        r.detail = f"WORD CHANGE in punctuation model: '{orig_words[:50]}' → '{corr_words[:50]}'"
+        return r
+    if tc.should_change:
+        if r.changed:
+            r.verdict = "TP"; r.detail = f"Punctuation added"
+        else:
+            r.verdict = "FN"; r.detail = "No punctuation added"
+    else:
+        if r.changed:
+            r.verdict = "FP"; r.detail = f"Over-punctuated: '{r.actual_output[:80]}'"
+        else:
+            r.verdict = "TN"; r.detail = "Correctly unchanged"
+    return r
+def run_pipeline_test(api: API, tc: TC) -> Result:
+    """Test full pipeline via /api/analyze."""
+    r = Result(tc.id, tc.phase, tc.category, tc.input, tc.expected_output)
+    resp = api.analyze(tc.input)
+    r.api_status = resp.get('_status', 0)
+    r.latency_ms = resp.get('_ms', 0)
+    r.raw_response = {k: v for k, v in resp.items() if k not in ('_ms', '_status')}
+    if 'error' in resp:
+        if tc.category in ('edge_empty', 'edge_short', 'edge_english') or tc.input.strip() == "":
+            r.verdict = "TN"; r.detail = f"Edge case handled: {resp.get('error','')}"
+        else:
+            r.verdict = "ERROR"; r.detail = resp['error']
+        return r
+    original = resp.get('original', tc.input)
+    r.actual_output = resp.get('corrected', '')
+    r.suggestions = resp.get('suggestions', [])
+    r.changed = r.actual_output != original
+    # ── Span alignment check ──
+    span_errors = []
+    for s in r.suggestions:
+        start, end = s.get('start', 0), s.get('end', 0)
+        orig_text = s.get('original', '')
+        actual_slice = original[start:end]
+        if actual_slice != orig_text and orig_text:
+            span_errors.append(f"SPAN[{start}:{end}] expected='{orig_text}' got='{actual_slice}'")
+    if span_errors:
+        r.verdict = "FP"
+        r.detail = f"SPAN MISMATCH: {'; '.join(span_errors[:3])}"
+        return r
+    # ── Apply-all reconstruction check ──
+    if tc.category == "apply_all" and r.suggestions:
+        rebuilt = original
+        for s in sorted(r.suggestions, key=lambda x: -x['start']):
+            rebuilt = rebuilt[:s['start']] + s['correction'] + rebuilt[s['end']:]
+        if rebuilt != r.actual_output:
+            r.verdict = "FP"
+            r.detail = f"APPLY-ALL MISMATCH: rebuilt≠corrected"
+            return r
+    if tc.should_change:
+        if r.changed:
+            uncorrected = [ew for ew in tc.error_words if ew in r.actual_output]
+            if uncorrected:
+                r.verdict = "FN"; r.detail = f"Errors NOT fixed: {uncorrected}"
+            else:
+                r.verdict = "TP"; r.detail = f"{len(r.suggestions)} fixes applied"
+        else:
+            r.verdict = "FN"; r.detail = f"No changes made. Expected fix for: {tc.error_words}"
+    else:
+        if r.changed:
+            corrupted = [cw for cw in tc.correct_words if cw not in r.actual_output and cw in tc.input]
+            if corrupted:
+                r.verdict = "FP"; r.detail = f"OVERCORRECTION: corrupted: {corrupted}"
+            elif r.suggestions:
+                changes = [f"{s.get('original','')}→{s.get('correction','')}" for s in r.suggestions[:5]]
+                r.verdict = "FP"; r.detail = f"Unnecessary changes: {changes}"
+            else:
+                r.verdict = "TN"; r.detail = "Minor change, no suggestion objects"
+        else:
+            r.verdict = "TN"; r.detail = "Correctly unchanged"
+    return r
+def run_adversarial_test(api: API, tc: TC) -> Result:
+    """Run adversarial tests through full pipeline."""
+    return run_pipeline_test(api, tc)
+# ══════════════════════════════════════════════════��════════════════════════════
+# METRICS
+# ═══════════════════════════════════════════════════════════════════════════════
+def calc_metrics(results: List[Result]) -> dict:
+    tp = sum(1 for r in results if r.verdict == "TP")
+    fp = sum(1 for r in results if r.verdict == "FP")
+    tn = sum(1 for r in results if r.verdict == "TN")
+    fn = sum(1 for r in results if r.verdict == "FN")
+    err = sum(1 for r in results if r.verdict == "ERROR")
+    total = len(results)
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
+    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
+    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
+    latencies = [r.latency_ms for r in results if r.latency_ms > 0]
+    p50 = sorted(latencies)[len(latencies)//2] if latencies else 0
+    p95 = sorted(latencies)[int(len(latencies)*0.95)] if latencies else 0
+    p99 = sorted(latencies)[int(len(latencies)*0.99)] if latencies else 0
+    return {
+        "total": total, "TP": tp, "FP": fp, "TN": tn, "FN": fn, "ERROR": err,
+        "precision": round(precision, 4),
+        "recall": round(recall, 4),
+        "f1": round(f1, 4),
+        "false_positive_rate": round(fpr, 4),
+        "false_negative_rate": round(fnr, 4),
+        "overcorrection_rate": round(fp / max(1, total), 4),
+        "undercorrection_rate": round(fn / max(1, total), 4),
+        "latency_p50_ms": p50,
+        "latency_p95_ms": p95,
+        "latency_p99_ms": p99,
+    }
+# ═══════════════════════════════════════════════════════════════════════════════
+# MAIN
+# ═══════════════════════════════════════════════════════════════════════════════
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--url", default=DEFAULT_URL)
+    parser.add_argument("--phase", nargs="*", default=["ALL"])
+    parser.add_argument("--out", default="phase9_results.json")
+    args = parser.parse_args()
+    api = API(args.url)
+    phases = [p.upper() for p in args.phase]
+    run_all = "ALL" in phases
+    print(f"[P9] Target: {args.url}")
+    print(f"[P9] Phases: {phases}")
+    all_results = []
+    all_metrics = {}
+    # ── Phase A: Spelling ──
+    if run_all or "A" in phases:
+        tests = build_spelling_tests()
+        print(f"\n{'='*60}")
+        print(f"PHASE A — RAW SPELLING ({len(tests)} tests)")
+        print(f"{'='*60}")
+        results = []
+        for i, tc in enumerate(tests):
+            print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}: ", end="", flush=True)
+            r = run_spelling_test(api, tc)
+            results.append(r)
+            icon = {"TP":"✅","TN":"✅","FP":"❌","FN":"⚠️","ERROR":"💥"}[r.verdict]
+            print(f"{icon} {r.verdict} ({r.latency_ms}ms) {r.detail[:70]}")
+        m = calc_metrics(results)
+        all_metrics["Phase_A_Spelling"] = m
+        all_results.extend(results)
+        print(f"\n  Precision={m['precision']} Recall={m['recall']} F1={m['f1']}")
+        print(f"  FPR={m['false_positive_rate']} FNR={m['false_negative_rate']}")
+        print(f"  Overcorrection={m['overcorrection_rate']} Undercorrection={m['undercorrection_rate']}")
+        print(f"  Latency p50={m['latency_p50_ms']}ms p95={m['latency_p95_ms']}ms p99={m['latency_p99_ms']}ms")
+    # ── Phase B: Grammar ──
+    if run_all or "B" in phases:
+        tests = build_grammar_tests()
+        print(f"\n{'='*60}")
+        print(f"PHASE B — RAW GRAMMAR ({len(tests)} tests)")
+        print(f"{'='*60}")
+        results = []
+        for i, tc in enumerate(tests):
+            print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}: ", end="", flush=True)
+            r = run_grammar_test(api, tc)
+            results.append(r)
+            icon = {"TP":"✅","TN":"✅","FP":"❌","FN":"⚠️","ERROR":"💥"}[r.verdict]
+            print(f"{icon} {r.verdict} ({r.latency_ms}ms) {r.detail[:70]}")
+        m = calc_metrics(results)
+        all_metrics["Phase_B_Grammar"] = m
+        all_results.extend(results)
+        print(f"\n  Precision={m['precision']} Recall={m['recall']} F1={m['f1']}")
+        print(f"  FPR={m['false_positive_rate']} FNR={m['false_negative_rate']}")
+    # ── Phase C: Punctuation ──
+    if run_all or "C" in phases:
+        tests = build_punctuation_tests()
+        print(f"\n{'='*60}")
+        print(f"PHASE C — RAW PUNCTUATION ({len(tests)} tests)")
+        print(f"{'='*60}")
+        results = []
+        for i, tc in enumerate(tests):
+            print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}: ", end="", flush=True)
+            r = run_punctuation_test(api, tc)
+            results.append(r)
+            icon = {"TP":"✅","TN":"✅","FP":"❌","FN":"⚠️","ERROR":"💥"}[r.verdict]
+            print(f"{icon} {r.verdict} ({r.latency_ms}ms) {r.detail[:70]}")
+        m = calc_metrics(results)
+        all_metrics["Phase_C_Punctuation"] = m
+        all_results.extend(results)
+        print(f"\n  Precision={m['precision']} Recall={m['recall']} F1={m['f1']}")
+    # ── Phase D: Full Pipeline ──
+    if run_all or "D" in phases:
+        tests = build_pipeline_tests()
+        print(f"\n{'='*60}")
+        print(f"PHASE D — FULL PIPELINE ({len(tests)} tests)")
+        print(f"{'='*60}")
+        results = []
+        for i, tc in enumerate(tests):
+            print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}: ", end="", flush=True)
+            r = run_pipeline_test(api, tc)
+            results.append(r)
+            icon = {"TP":"✅","TN":"✅","FP":"❌","FN":"⚠️","ERROR":"💥"}[r.verdict]
+            print(f"{icon} {r.verdict} ({r.latency_ms}ms) {r.detail[:70]}")
+        m = calc_metrics(results)
+        all_metrics["Phase_D_Pipeline"] = m
+        all_results.extend(results)
+        print(f"\n  Precision={m['precision']} Recall={m['recall']} F1={m['f1']}")
+        print(f"  Span errors: {sum(1 for r in results if 'SPAN' in r.detail)}")
+        print(f"  Apply-all errors: {sum(1 for r in results if 'APPLY-ALL' in r.detail)}")
+    # ── Phase E: Adversarial ──
+    if run_all or "E" in phases:
+        tests = build_adversarial_tests()
+        print(f"\n{'='*60}")
+        print(f"PHASE E — ADVERSARIAL ({len(tests)} tests)")
+        print(f"{'='*60}")
+        results = []
+        for i, tc in enumerate(tests):
+            print(f"  [{i+1}/{len(tests)}] {tc.id} {tc.category}: ", end="", flush=True)
+            r = run_adversarial_test(api, tc)
+            results.append(r)
+            icon = {"TP":"✅","TN":"✅","FP":"❌","FN":"⚠️","ERROR":"💥"}[r.verdict]
+            print(f"{icon} {r.verdict} ({r.latency_ms}ms) {r.detail[:70]}")
+        m = calc_metrics(results)
+        all_metrics["Phase_E_Adversarial"] = m
+        all_results.extend(results)
+    # ── Summary ──
+    print(f"\n{'='*60}")
+    print(f"FINAL SUMMARY")
+    print(f"{'='*60}")
+    total_tp = sum(1 for r in all_results if r.verdict == "TP")
+    total_fp = sum(1 for r in all_results if r.verdict == "FP")
+    total_tn = sum(1 for r in all_results if r.verdict == "TN")
+    total_fn = sum(1 for r in all_results if r.verdict == "FN")
+    total_err = sum(1 for r in all_results if r.verdict == "ERROR")
+    print(f"  Total tests: {len(all_results)}")
+    print(f"  TP (correct fix): {total_tp}")
+    print(f"  TN (correct no-change): {total_tn}")
+    print(f"  FP (overcorrection): {total_fp}")
+    print(f"  FN (undercorrection): {total_fn}")
+    print(f"  ERROR: {total_err}")
+    print(f"\n  PASS rate: {(total_tp+total_tn)/max(1,len(all_results))*100:.1f}%")
+    print(f"  FAIL rate: {(total_fp+total_fn)/max(1,len(all_results))*100:.1f}%")
+    # Critical failures
+    fps = [r for r in all_results if r.verdict == "FP"]
+    if fps:
+        print(f"\n🚨 FALSE POSITIVES ({len(fps)}):")
+        for r in fps[:20]:
+            print(f"  {r.tc_id} [{r.category}] {r.detail[:90]}")
+    fns = [r for r in all_results if r.verdict == "FN"]
+    if fns:
+        print(f"\n⚠️ FALSE NEGATIVES ({len(fns)}):")
+        for r in fns[:20]:
+            print(f"  {r.tc_id} [{r.category}] {r.detail[:90]}")
+    # Save
+    output = {
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "target": args.url,
+        "metrics": all_metrics,
+        "total_tests": len(all_results),
+        "summary": {
+            "TP": total_tp, "TN": total_tn, "FP": total_fp, "FN": total_fn, "ERROR": total_err,
+            "pass_rate": round((total_tp+total_tn)/max(1,len(all_results)), 4),
+        },
+        "results": [asdict(r) for r in all_results],
+    }
+    with open(args.out, 'w', encoding='utf-8') as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+    print(f"\n[P9] Results saved to {args.out}")
+if __name__ == "__main__":
+    main()