Commit ·
5dcc696
1
Parent(s): 84cce80
chore: complete 100-finding audit + deep project cleanup
Browse files- Complete all 96 actionable audit findings across 6 categories
- Fix TD8 Grammrar typo, TD16 script bundling, TD18 CSS cleanup
- Delete dead code: punctuation/spelling dir, orphaned contextual_corrector
- Remove debug output, one-off scripts, stale docs
- Archive reports and 35 dev test scripts
- Remove unused deps: datasets, scikit-learn, pandas, rapidfuzz
- Update .gitignore for build output and debug artifacts
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This view is limited to 50 files because it contains too many changes. See raw diff
- .github/workflows/deploy.yml +3 -3
- .gitignore +12 -1
- BAYAN_COMPLETE_AUDIT.md +0 -366
- Dockerfile +41 -0
- PROJECT_DESCRIPTION.md +2 -2
- QUICKSTART.md +0 -126
- README_SETUP.md +0 -172
- analyze_failures.py +0 -67
- apply_locks.py +0 -77
- archive/BAYAN_COMPLETE_AUDIT.md +510 -0
- {reports → archive/benchmark_reports}/Phase10_Post_IVtoOOV_Audit.md +0 -0
- {reports → archive/benchmark_reports}/benchmark_audit.md +0 -0
- {reports → archive/benchmark_reports}/benchmark_samples.md +0 -0
- {reports → archive/benchmark_reports}/regression_benchmark_audit.md +0 -0
- debug_pc002.py → archive/dev_tests/debug_pc002.py +0 -0
- debug_pc023.py → archive/dev_tests/debug_pc023.py +0 -0
- debug_pipeline.py → archive/dev_tests/debug_pipeline.py +0 -0
- debug_punctuation.py → archive/dev_tests/debug_punctuation.py +0 -0
- extract_failures.py → archive/dev_tests/extract_failures.py +0 -0
- extract_grammar_fails.py → archive/dev_tests/extract_grammar_fails.py +0 -0
- extract_pc023.py → archive/dev_tests/extract_pc023.py +0 -0
- test_camel.py → archive/dev_tests/test_camel.py +0 -0
- test_colon.py → archive/dev_tests/test_colon.py +0 -0
- test_failures.py → archive/dev_tests/test_failures.py +0 -0
- test_grammar_fast.py → archive/dev_tests/test_grammar_fast.py +0 -0
- test_grammar_fixes.py → archive/dev_tests/test_grammar_fixes.py +0 -0
- test_grammar_logic.py → archive/dev_tests/test_grammar_logic.py +0 -0
- test_grammar_only.py → archive/dev_tests/test_grammar_only.py +0 -0
- test_grammar_rules.py → archive/dev_tests/test_grammar_rules.py +0 -0
- test_kana.py → archive/dev_tests/test_kana.py +0 -0
- test_local.py → archive/dev_tests/test_local.py +0 -0
- test_mapper.py → archive/dev_tests/test_mapper.py +0 -0
- test_mapper_isolated.py → archive/dev_tests/test_mapper_isolated.py +0 -0
- test_mlm.py → archive/dev_tests/test_mlm.py +0 -0
- test_models.py → archive/dev_tests/test_models.py +0 -0
- test_pc.py → archive/dev_tests/test_pc.py +0 -0
- test_pc001.py → archive/dev_tests/test_pc001.py +0 -0
- test_pc002.py → archive/dev_tests/test_pc002.py +0 -0
- test_pc002_api.py → archive/dev_tests/test_pc002_api.py +0 -0
- test_pc023.py → archive/dev_tests/test_pc023.py +0 -0
- test_pc027.py → archive/dev_tests/test_pc027.py +0 -0
- test_pc034.py → archive/dev_tests/test_pc034.py +0 -0
- test_pc044.py → archive/dev_tests/test_pc044.py +0 -0
- test_pos.py → archive/dev_tests/test_pos.py +0 -0
- test_punc.py → archive/dev_tests/test_punc.py +0 -0
- test_punc_rules.py → archive/dev_tests/test_punc_rules.py +0 -0
- test_punctuation.py → archive/dev_tests/test_punctuation.py +0 -0
- test_raw_punc.py → archive/dev_tests/test_raw_punc.py +0 -0
- test_sv.py → archive/dev_tests/test_sv.py +0 -0
- extension/IMPLEMENTATION_CHANGELOG.md → archive/phase_reports/extension_changelog.md +0 -0
.github/workflows/deploy.yml
CHANGED
|
@@ -28,7 +28,7 @@ jobs:
|
|
| 28 |
|
| 29 |
- name: Verify critical files exist
|
| 30 |
run: |
|
| 31 |
-
for f in src/app.py src/model_loader.py src/
|
| 32 |
src/nlp/__init__.py src/nlp/spelling/araspell_service.py \
|
| 33 |
src/nlp/grammar/grammar_service.py src/nlp/punctuation/punctuation_service.py \
|
| 34 |
Dockerfile Procfile requirements.txt; do
|
|
@@ -36,11 +36,11 @@ jobs:
|
|
| 36 |
done
|
| 37 |
echo "✅ All critical files present"
|
| 38 |
|
| 39 |
-
- name: Verify API routes defined
|
| 40 |
run: |
|
| 41 |
for route in "/api/health" "/api/analyze" "/api/summarize" "/api/spelling" \
|
| 42 |
"/api/grammar" "/api/punctuation" "/api/quran"; do
|
| 43 |
-
grep -
|
| 44 |
done
|
| 45 |
echo "✅ All API routes defined"
|
| 46 |
|
|
|
|
| 28 |
|
| 29 |
- name: Verify critical files exist
|
| 30 |
run: |
|
| 31 |
+
for f in src/app.py src/model_loader.py src/index.html \
|
| 32 |
src/nlp/__init__.py src/nlp/spelling/araspell_service.py \
|
| 33 |
src/nlp/grammar/grammar_service.py src/nlp/punctuation/punctuation_service.py \
|
| 34 |
Dockerfile Procfile requirements.txt; do
|
|
|
|
| 36 |
done
|
| 37 |
echo "✅ All critical files present"
|
| 38 |
|
| 39 |
+
- name: Verify API routes defined
|
| 40 |
run: |
|
| 41 |
for route in "/api/health" "/api/analyze" "/api/summarize" "/api/spelling" \
|
| 42 |
"/api/grammar" "/api/punctuation" "/api/quran"; do
|
| 43 |
+
grep -rq "$route" src/routes/ src/app.py && echo " ✅ $route" || { echo " ❌ MISSING ROUTE: $route"; exit 1; }
|
| 44 |
done
|
| 45 |
echo "✅ All API routes defined"
|
| 46 |
|
.gitignore
CHANGED
|
@@ -38,4 +38,15 @@ node_modules/
|
|
| 38 |
# Test artifacts
|
| 39 |
.pytest_cache/
|
| 40 |
test-results/
|
| 41 |
-
extension/assets/icons/*.png
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Test artifacts
|
| 39 |
.pytest_cache/
|
| 40 |
test-results/
|
| 41 |
+
extension/assets/icons/*.png
|
| 42 |
+
|
| 43 |
+
# Build output
|
| 44 |
+
dist/
|
| 45 |
+
src/js/bayan.bundle.js
|
| 46 |
+
|
| 47 |
+
# Debug/temp output
|
| 48 |
+
out*.txt
|
| 49 |
+
local_debug.txt
|
| 50 |
+
pc_data.txt
|
| 51 |
+
camel_test_out.json
|
| 52 |
+
grammar_fails_output.md
|
BAYAN_COMPLETE_AUDIT.md
DELETED
|
@@ -1,366 +0,0 @@
|
|
| 1 |
-
# BAYAN — Complete Product, Codebase & Extension Deep Audit
|
| 2 |
-
|
| 3 |
-
> **Audit Date:** 2026-06-26
|
| 4 |
-
> **Auditor Perspective:** Product Manager + Senior Frontend + Backend Architect + Extension Engineer + SaaS Reviewer
|
| 5 |
-
|
| 6 |
-
---
|
| 7 |
-
|
| 8 |
-
## 1. Current System Overview
|
| 9 |
-
|
| 10 |
-
### Architecture Map
|
| 11 |
-
|
| 12 |
-
```
|
| 13 |
-
┌──────────────────────────────────────────────────────┐
|
| 14 |
-
│ BAYAN ECOSYSTEM │
|
| 15 |
-
│ │
|
| 16 |
-
│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │
|
| 17 |
-
│ │ Website │───▶│ Flask API │───▶│ NLP Pipeline │ │
|
| 18 |
-
│ │ (SPA) │ │ (app.py) │ │ Spell/Gram/Punct│ │
|
| 19 |
-
│ └─────────┘ └──────────┘ └─────────────────┘ │
|
| 20 |
-
│ │ │ │ │
|
| 21 |
-
│ │ │ ┌─────────────────┐ │
|
| 22 |
-
│ │ ├─────────▶│ HF Models │ │
|
| 23 |
-
│ │ │ │ Summarization │ │
|
| 24 |
-
│ │ │ │ Grammar (Gradio)│ │
|
| 25 |
-
│ │ │ └─────────────────┘ │
|
| 26 |
-
│ │ │ │
|
| 27 |
-
│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │
|
| 28 |
-
│ │Supabase │◀───│ Auth │───▶│ Documents DB │ │
|
| 29 |
-
│ │ (Cloud) │ │ Module │ │ Settings Sync │ │
|
| 30 |
-
│ └─────────┘ └──────────┘ └─────────────────┘ │
|
| 31 |
-
│ │
|
| 32 |
-
│ ┌────────────────────────────────────────────────┐ │
|
| 33 |
-
│ │ Chrome Extension (MV3) │ │
|
| 34 |
-
│ │ ┌──────────┐ ┌──────────┐ ┌───────────────┐ │ │
|
| 35 |
-
│ │ │ Content │ │Background│ │ Side Panel │ │ │
|
| 36 |
-
│ │ │ Script │ │ Worker │ │ + Popup │ │ │
|
| 37 |
-
│ │ └──────────┘ └──────────┘ └───────────────┘ │ │
|
| 38 |
-
│ └────────────────────────────────────────────────┘ │
|
| 39 |
-
└──────────────────────────────────────────────────────┘
|
| 40 |
-
```
|
| 41 |
-
|
| 42 |
-
### Technology Stack
|
| 43 |
-
|
| 44 |
-
| Layer | Technology | Notes |
|
| 45 |
-
|-------|-----------|-------|
|
| 46 |
-
| **Frontend** | Vanilla JS, HTML, CSS (Tailwind CDN) | Custom `contenteditable` editor engine |
|
| 47 |
-
| **Backend** | Flask (Python) | Single monolith `app.py` — 2,844 lines |
|
| 48 |
-
| **NLP Pipeline** | Custom Python modules | Spelling, Grammar, Punctuation, Autocomplete, Dialect |
|
| 49 |
-
| **AI Models** | Transformer-based | Summarization (local), Grammar (Gradio proxy), Spelling (CAMeL + custom) |
|
| 50 |
-
| **Database** | Supabase (PostgreSQL) | Documents, profiles, user settings |
|
| 51 |
-
| **Auth** | Supabase Auth | Guest (anonymous), Google OAuth |
|
| 52 |
-
| **Deployment** | HuggingFace Spaces (Docker) | CPU-only free tier |
|
| 53 |
-
| **Extension** | Chrome MV3 | Background SW, Content Script, Side Panel, Popup |
|
| 54 |
-
|
| 55 |
-
### File Structure Summary
|
| 56 |
-
|
| 57 |
-
| Directory | Files | Purpose |
|
| 58 |
-
|-----------|-------|---------|
|
| 59 |
-
| `src/` | 6 core files | Backend + HTML + CSS |
|
| 60 |
-
| `src/js/` | 8 JS files + 7 subdirs | Frontend logic |
|
| 61 |
-
| `src/js/auth/` | 5 files | Supabase auth (client, session, UI) |
|
| 62 |
-
| `src/js/documents/` | 4 files | Local doc management + export |
|
| 63 |
-
| `src/js/documents-cloud/` | 3 files | Supabase CRUD for documents |
|
| 64 |
-
| `src/js/sync/` | 3 files | Offline queue + conflict resolution |
|
| 65 |
-
| `src/js/settings-sync/` | 2 files | User settings cloud persistence |
|
| 66 |
-
| `src/nlp/` | 6 subdirs | All NLP processing modules |
|
| 67 |
-
| `extension/` | 8 files + 4 subdirs | Chrome Extension |
|
| 68 |
-
| `extension/shared/` | 9 files | Shared utilities (api, renderer, patches) |
|
| 69 |
-
| `extension/sidepanel/` | 3 files | Side panel UI |
|
| 70 |
-
| `tests/` | 16 test files | Backend unit tests |
|
| 71 |
-
| `extension/tests/` | 8 files | Extension integration tests |
|
| 72 |
-
|
| 73 |
-
---
|
| 74 |
-
|
| 75 |
-
## 2. Feature Inventory
|
| 76 |
-
|
| 77 |
-
### Core AI Features
|
| 78 |
-
|
| 79 |
-
| Feature | Backend API | Website Frontend | Extension | Files |
|
| 80 |
-
|---------|------------|-----------------|-----------|-------|
|
| 81 |
-
| **Spelling Correction** | ✅ `/api/spelling` + `/api/analyze` | ✅ Full (highlights, suggestions, apply) | ✅ Inline overlay + Popup + SidePanel | `nlp/spelling/`, `editor.js`, `renderer.js` |
|
| 82 |
-
| **Grammar Correction** | ✅ `/api/grammar` + `/api/analyze` | ✅ Full (via Gradio proxy to HF model) | ✅ Inline overlay + Popup + SidePanel | `nlp/grammar/`, `hf_inference.py` |
|
| 83 |
-
| **Punctuation** | ✅ `/api/punctuation` + `/api/analyze` | ✅ Full (PuncAra-v1 model) | ✅ Inline overlay + Popup + SidePanel | `nlp/punctuation/` |
|
| 84 |
-
| **Summarization** | ✅ `/api/summarize` | ✅ Full (tab in editor, length control) | ✅ Popup tab + SidePanel tab | `model_loader.py`, `summaries-api.js` |
|
| 85 |
-
| **AutoComplete** | ✅ `/api/autocomplete` | ✅ Ghost text + dropdown in editor | ⚠️ SidePanel text-box only, NO inline ghost text | `autocomplete.js`, sidepanel `btnAutocomplete` |
|
| 86 |
-
| **Dialect→MSA** | ✅ `/api/dialect` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `nlp/dialect/` |
|
| 87 |
-
| **Quran Verification** | ✅ `/api/quran` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `quran.py`, `quran_master.db` |
|
| 88 |
-
|
| 89 |
-
### Platform Features
|
| 90 |
-
|
| 91 |
-
| Feature | Website | Extension (Popup) | Extension (SidePanel) | Extension (Content Script) |
|
| 92 |
-
|---------|---------|-------------------|----------------------|--------------------------|
|
| 93 |
-
| **Authentication** | ✅ Guest + Google | ❌ None | ⚠️ Partial (`initExtensionAuth()` exists but requires web page auth sync) | ⚠️ Listens for `BAYAN_AUTH_SYNC` message from web |
|
| 94 |
-
| **Document Save** | ✅ Supabase CRUD | ❌ None | ⚠️ UI exists (`btnNewDocument`, `btnSaveSelection`) but depends on auth | ❌ None |
|
| 95 |
-
| **Document Load/History** | ✅ Full panel | ❌ None | ⚠️ UI exists (`documentsList`, `historyList`) but depends on auth | ❌ None |
|
| 96 |
-
| **Export (PDF/DOCX/TXT)** | ✅ Full (mammoth.js, docx.js) | ❌ None | ❌ None | ❌ None |
|
| 97 |
-
| **Import (TXT/DOCX)** | ✅ Full | ❌ None | ❌ None | ❌ None |
|
| 98 |
-
| **Settings Sync** | ✅ Supabase | ❌ None | ⚠️ Placeholder (`syncExtensionSettings()`) | ❌ None |
|
| 99 |
-
| **Theme Toggle** | ✅ Full dark/light | ❌ Hardcoded dark | ✅ Dark only | N/A |
|
| 100 |
-
| **Focus Mode** | ✅ Full | N/A | ❌ None | N/A |
|
| 101 |
-
| **Score Ring** | ✅ Animated SVG | ✅ Simplified | ✅ Simplified | ❌ None |
|
| 102 |
-
| **Writing Score History** | ✅ Sparkline chart | ❌ None | ❌ None | ❌ None |
|
| 103 |
-
| **Error Donut Chart** | ✅ SVG donut | ❌ None | ❌ None | ❌ None |
|
| 104 |
-
| **Offline Mode** | ✅ Graceful degradation | ❌ No offline handling | ❌ No offline handling | ❌ No offline handling |
|
| 105 |
-
| **Keyboard Shortcuts** | ✅ Extensive (Alt+1-3, Ctrl+S, etc.) | ❌ None | ❌ None | ❌ None |
|
| 106 |
-
|
| 107 |
-
---
|
| 108 |
-
|
| 109 |
-
## 3. Website vs Extension Comparison
|
| 110 |
-
|
| 111 |
-
### Authentication Flow
|
| 112 |
-
|
| 113 |
-
| Aspect | Website | Extension | Gap |
|
| 114 |
-
|--------|---------|-----------|-----|
|
| 115 |
-
| Guest login | ✅ `signInAnonymously()` | ❌ | **Critical** — extension users can't persist anything |
|
| 116 |
-
| Google OAuth | ✅ `signInWithOAuth()` | ❌ | **High** |
|
| 117 |
-
| Session restore | ✅ `restoreSession()` via Supabase | ❌ | **High** |
|
| 118 |
-
| Auth state sync | ✅ `onAuthStateChange()` | ⚠️ Listens for `BAYAN_AUTH_SYNC` postMessage but only works when user visits Bayan website with extension installed | **High** — unreliable |
|
| 119 |
-
| Auth-gated features | ✅ Documents, sync, settings | ⚠️ UI elements exist but non-functional without auth | **High** |
|
| 120 |
-
|
| 121 |
-
### AI Feature Comparison
|
| 122 |
-
|
| 123 |
-
| Feature | Website UX | Extension UX | Parity? |
|
| 124 |
-
|---------|-----------|-------------|---------|
|
| 125 |
-
| Analyze (S+G+P) | Rich editor with inline highlights, suggestion sidebar, popover tooltip, apply/dismiss per-suggestion | **Content Script:** Overlay marks + tooltip. **Popup/SidePanel:** Textarea + suggestion cards | ⚠️ Functional but UX gap |
|
| 126 |
-
| Summarize | Editor tab with radio buttons (short/medium/long) | Popup/SidePanel textarea with radio buttons | ✅ Near parity |
|
| 127 |
-
| AutoComplete | **Ghost text** inside editor (Tab to accept) | SidePanel has a text box with "إكمال" button but NO inline ghost text on 3rd party sites | **Medium** — missing the core UX |
|
| 128 |
-
| Dialect | Dedicated editor tab with "Convert" button | SidePanel tab with text box and "Convert" button | ✅ Near parity |
|
| 129 |
-
| Quran | Dedicated editor tab with search | SidePanel tab with text box and search | ✅ Near parity |
|
| 130 |
-
|
| 131 |
-
### Documents
|
| 132 |
-
|
| 133 |
-
| Aspect | Website | Extension | Gap |
|
| 134 |
-
|--------|---------|-----------|-----|
|
| 135 |
-
| Create document | ✅ `createDocument()` | ⚠️ Button exists in SidePanel but blocked by no auth | **High** |
|
| 136 |
-
| List documents | ✅ Desktop sidebar panel | ⚠️ `documentsList` in SidePanel workspace tab, blocked by no auth | **High** |
|
| 137 |
-
| Save/auto-save | ✅ Debounced sync via `SyncManager` | ❌ | **High** |
|
| 138 |
-
| Export PDF/DOCX | ✅ `export.js` | ❌ | **Medium** |
|
| 139 |
-
| Import | ✅ `import.js` (TXT, DOCX) | ❌ | **Low** |
|
| 140 |
-
|
| 141 |
-
---
|
| 142 |
-
|
| 143 |
-
## 4. Missing Features
|
| 144 |
-
|
| 145 |
-
### Critical (Blocks Production)
|
| 146 |
-
|
| 147 |
-
| # | Issue | Impact | Solution |
|
| 148 |
-
|---|-------|--------|----------|
|
| 149 |
-
| C1 | **`.env` file committed to Git** | Supabase URL and anon key are in the repo. While anon key is safe for client use, this is a security anti-pattern and may expose the project URL. | Remove `.env` from Git history, use HF Spaces secrets exclusively. `.gitignore` has `.env` but it was committed before the rule was added. |
|
| 150 |
-
| C2 | **CORS wildcard `origins: "*"`** | Any website can call `/api/analyze`, `/api/summarize`, etc. directly. Abusers can drain compute. | Restrict CORS to `bayan10-bayan-api.hf.space` + extension origin `chrome-extension://<id>`. |
|
| 151 |
-
| C3 | **No rate limiting on API** | No throttle on any endpoint. A single user can overwhelm the free-tier HF Space. | Add Flask-Limiter or simple in-memory token bucket. |
|
| 152 |
-
|
| 153 |
-
### High (Important Feature Gap)
|
| 154 |
-
|
| 155 |
-
| # | Issue | Impact | Solution |
|
| 156 |
-
|---|-------|--------|----------|
|
| 157 |
-
| H1 | Extension has no auth | Users cannot access cloud docs, settings, or history from extension | Implement Supabase auth in extension via `chrome.identity` or shared session from Bayan website |
|
| 158 |
-
| H2 | Extension content script lacks AutoComplete ghost text | The flagship "ghost text" feature doesn't work on 3rd-party sites | Port `autocomplete.js` logic into `content-inline.js` with `/api/autocomplete` calls |
|
| 159 |
-
| H3 | Extension popup/sidepanel have no export | Users cannot export corrected text as PDF/DOCX | Add "Copy as formatted text" or lightweight export |
|
| 160 |
-
| H4 | No `documents` table migration | `supabase/migrations/001_profiles.sql` exists but no migration creates the `documents` table that `documents-api.js` uses | Create `002_documents.sql` migration |
|
| 161 |
-
| H5 | Backend monolith: `app.py` is 2,844 lines | Extremely difficult to maintain, test, or extend | Split into `routes/`, `services/`, `middleware/` modules |
|
| 162 |
-
|
| 163 |
-
### Medium (Improvement Needed)
|
| 164 |
-
|
| 165 |
-
| # | Issue | Impact | Solution |
|
| 166 |
-
|---|-------|--------|----------|
|
| 167 |
-
| M1 | `src/js/api.js` uses ES module `export` syntax but is loaded via `<script>` tag (not `type="module"`) | The `api.js` exports are **never importable** — the website uses inline `fetch()` calls instead | Either convert to `type="module"` or remove the dead `export` statements |
|
| 168 |
-
| M2 | Extension content script overlay doesn't handle `<iframe>` editors | Rich text editors in iframes (e.g., WordPress Gutenberg, TinyMCE) are invisible to the content script | Use `all_frames: true` in manifest or detect iframe editors |
|
| 169 |
-
| M3 | Duplicated suggestion rendering logic | `ui.js` (website) and `bayan-ui.js` (extension) implement the same card HTML generation | Extract to shared package |
|
| 170 |
-
| M4 | Extension `popup.js` (498 lines) and `sidepanel.js` (702 lines) share ~60% identical code | Maintenance nightmare — fixing a bug requires changes in 2+ files | Refactor into shared modules with UI-specific wrappers |
|
| 171 |
-
| M5 | Grammar model uses Gradio proxy with SSE streaming | Creates a hard dependency on external `mohammedahmedezz2004-bayan-arabic-grammarly-correction.hf.space`. If that Space goes down, grammar breaks. | Host the grammar model directly on the Bayan Space, or add fallback |
|
| 172 |
-
| M6 | No i18n framework on website | All strings are hardcoded in Arabic HTML. Adding English support requires rewriting HTML | Add simple i18n JSON loader (extension already has `_locales/ar/`) |
|
| 173 |
-
|
| 174 |
-
### Low (Nice to Have)
|
| 175 |
-
|
| 176 |
-
| # | Issue | Impact | Solution |
|
| 177 |
-
|---|-------|--------|----------|
|
| 178 |
-
| L1 | Extension only has Arabic locale | Cannot be published on Chrome Web Store for non-Arabic users | Add `_locales/en/messages.json` |
|
| 179 |
-
| L2 | No analytics or telemetry | No visibility into usage patterns, error rates, or feature adoption | Add lightweight event tracking (privacy-respecting) |
|
| 180 |
-
| L3 | Heavy vendor libraries loaded synchronously | `mammoth.browser.min.js`, `docx.umd.js`, `html2canvas.min.js` block initial render | Lazy-load on first export action |
|
| 181 |
-
| L4 | No service worker for website | No offline caching for the web app | Add basic SW for static assets |
|
| 182 |
-
|
| 183 |
-
---
|
| 184 |
-
|
| 185 |
-
## 5. Bugs Found
|
| 186 |
-
|
| 187 |
-
| # | Bug | Severity | Location | Status |
|
| 188 |
-
|---|-----|----------|----------|--------|
|
| 189 |
-
| B1 | `ENABLE_AUTOCOMPLETE_MODEL = False` in `app.py:62` | Medium | `app.py` line 62 | AutoComplete model disabled by default — `/api/autocomplete` still works via lazy-loading, but the flag is misleading |
|
| 190 |
-
| B2 | `src/js/api.js` uses `export` keyword but is not loaded as ES module | Low | `api.js` | Dead code — never actually imported anywhere |
|
| 191 |
-
| B3 | Extension `bayan-api.js` missing functions `bayanAutocomplete`, `bayanDialect`, `bayanQuran` | High | `bayan-api.js` only defines `bayanAnalyze`, `bayanSummarize`, `bayanHealthCheck` | SidePanel calls these undefined functions — will throw `ReferenceError` |
|
| 192 |
-
| B4 | Extension content script overlay position breaks on page scroll (absolute vs fixed positioning) | Medium | `content-inline.js:191` | Overlay uses `window.scrollY` but doesn't update on window resize |
|
| 193 |
-
| B5 | Score sparkline renders with only 2 data points creating a meaningless line | Low | `format.js` | ✅ Fixed (raised minimum to 3 points) |
|
| 194 |
-
| B6 | `dismissAllFiltered()` only removed DOM elements without updating `window.currentSuggestions` | Medium | `format.js` | ✅ Fixed |
|
| 195 |
-
|
| 196 |
-
---
|
| 197 |
-
|
| 198 |
-
## 6. Security Issues
|
| 199 |
-
|
| 200 |
-
| # | Issue | Severity | Details |
|
| 201 |
-
|---|-------|----------|---------|
|
| 202 |
-
| S1 | **`.env` committed to repo** | **Critical** | Supabase URL + anon key visible in Git history. While anon keys are designed for client-side use, the URL+key combo allows anyone to make Supabase API calls. |
|
| 203 |
-
| S2 | **CORS `origins: "*"`** | **Critical** | `app.py:94` — allows any origin to call all API endpoints. Enables: (a) compute theft, (b) DDoS via free proxy, (c) third-party scraping. |
|
| 204 |
-
| S3 | **No API authentication** | **High** | No JWT, API key, or session check on any endpoint. Extension uses only `host_permissions` scoping. |
|
| 205 |
-
| S4 | **XSS risk in editor** | **Medium** | `setEditorHTML()` injects HTML directly into contenteditable. While `renderer.js` escapes text, any upstream bug in suggestion rendering could inject arbitrary HTML. |
|
| 206 |
-
| S5 | **Supabase RLS incomplete** | **Medium** | Only `profiles` has RLS policies. The `documents` table (if exists) needs RLS to prevent cross-user data access. |
|
| 207 |
-
| S6 | **Extension Trusted Types partial** | **Low** | `content-inline.js` implements `trustedTypes.createPolicy()` with identity transform (`input => input`), which passes the CSP check but provides no actual sanitization. |
|
| 208 |
-
| S7 | **Debug endpoint exposed** | **Low** | `/api/debug-models` is accessible in production and leaks internal model status, memory usage, and startup errors. |
|
| 209 |
-
|
| 210 |
-
---
|
| 211 |
-
|
| 212 |
-
## 7. Performance Issues
|
| 213 |
-
|
| 214 |
-
| # | Issue | Severity | Details |
|
| 215 |
-
|---|-------|----------|---------|
|
| 216 |
-
| P1 | **`app.py` is 2,844 lines** | High | Single-file monolith. Every request loads all imports. Cold start on HF Spaces free tier takes ~60s. |
|
| 217 |
-
| P2 | **Vendor JS loaded synchronously** | Medium | `mammoth.browser.min.js` (340KB), `docx.umd.js` (1.2MB), `html2canvas.min.js` (210KB) all load on page start even if never used. |
|
| 218 |
-
| P3 | **Extension content script injected on ALL sites** | Medium | `matches: ["https://*/*", "http://*/*"]` — runs on every page. The `BayanController` module loads even on sites where user never types Arabic. |
|
| 219 |
-
| P4 | **No API response caching on website** | Medium | Every keystroke after debounce triggers a full `/api/analyze` call. Extension has background worker caching, but website doesn't. |
|
| 220 |
-
| P5 | **Grammar Gradio SSE dependency** | Medium | Grammar correction requires streaming from external HF Space. Average latency: 3-8 seconds. Adds significant delay to the analysis pipeline. |
|
| 221 |
-
| P6 | **Quran DB is 23MB** | Low | `quran_master.db` (SQLite, 23MB) is loaded into the Docker container. Fine for now, but limits scaling. |
|
| 222 |
-
| P7 | **No CSS/JS minification** | Low | All assets served unminified. `components.css` alone is 4,125+ lines (~90KB). |
|
| 223 |
-
|
| 224 |
-
---
|
| 225 |
-
|
| 226 |
-
## 8. UX Problems
|
| 227 |
-
|
| 228 |
-
| # | Issue | Severity | Details |
|
| 229 |
-
|---|-------|----------|---------|
|
| 230 |
-
| U1 | **Extension content script tooltip clips at viewport edge** | Medium | Tooltip for highlighted errors can overflow off-screen on narrow viewports. No boundary detection. |
|
| 231 |
-
| U2 | **No loading skeleton on website** | Medium | Editor page shows blank white space during model initialization. No skeleton/shimmer to indicate loading. |
|
| 232 |
-
| U3 | **Extension popup has no dialect/quran/autocomplete** | Medium | Only "تصحيح" and "تلخيص" tabs. SidePanel has all features, but popup is the first surface users see. |
|
| 233 |
-
| U4 | **Inconsistent branding between popup and sidepanel** | Low | Popup uses `.bayan-*` class prefix, SidePanel uses `.sp-*` prefix. Different color palettes. |
|
| 234 |
-
| U5 | **No onboarding flow** | Low | First-time users see an empty editor with no guidance. No tooltips, walkthrough, or sample text. |
|
| 235 |
-
| U6 | **Mobile responsiveness incomplete** | Low | Website has responsive breakpoints but bottom-sheet for suggestions lacks smooth gestures. |
|
| 236 |
-
|
| 237 |
-
---
|
| 238 |
-
|
| 239 |
-
## 9. Technical Debt
|
| 240 |
-
|
| 241 |
-
### Backend
|
| 242 |
-
|
| 243 |
-
| Item | Severity | Details |
|
| 244 |
-
|------|----------|---------|
|
| 245 |
-
| **Monolith `app.py`** | High | 2,844 lines. Contains routes, NLP logic, model loading, diffing algorithms, offset mapping, pipeline orchestration, Quran search integration, and CORS — all in one file. |
|
| 246 |
-
| **Duplicated directional blocks** | Medium | `_DIRECTIONAL_BLOCKS` in `app.py` duplicates logic that also exists in `araspell_rules.py`. |
|
| 247 |
-
| **12+ test files at project root** | Low | `test_proof.py`, `test_sv.py`, `test_pc.py`, etc. scattered in root instead of `tests/`. |
|
| 248 |
-
| **Dead code** | Low | `ENABLE_DIALECT_MODEL = False`, `ENABLE_AUTOCOMPLETE_MODEL = False` flags in `app.py` — no code path checks them for these features since they use lazy-loading. |
|
| 249 |
-
| **Archive directory** | Low | `archive/legacy_scripts/` contains old code that shouldn't ship in Docker image. |
|
| 250 |
-
|
| 251 |
-
### Frontend (Website)
|
| 252 |
-
|
| 253 |
-
| Item | Severity | Details |
|
| 254 |
-
|------|----------|---------|
|
| 255 |
-
| **`api.js` dead exports** | Medium | `export async function analyzeText()` — never imported. Website uses inline `fetch()` in `editor.js`. |
|
| 256 |
-
| **Tight coupling in `editor.js`** | Medium | DOM manipulation, API calls, suggestion management, and UI updates all in one 29KB file. |
|
| 257 |
-
| **No build system** | Low | No bundler, no tree-shaking, no code-splitting. All JS loaded via `<script>` tags. |
|
| 258 |
-
| **CSS structure** | Low | Single `components.css` at 4,125+ lines. No CSS modules, no scoping. |
|
| 259 |
-
|
| 260 |
-
### Extension
|
| 261 |
-
|
| 262 |
-
| Item | Severity | Details |
|
| 263 |
-
|------|----------|---------|
|
| 264 |
-
| **`popup.js` and `sidepanel.js` code duplication** | High | ~60% identical code: `updateCounts()`, `markStale()`, `setLoading()`, `updateScore()`, `renderSuggestions()`, `showToast()`. |
|
| 265 |
-
| **Missing API functions in `bayan-api.js`** | High | SidePanel calls `bayanAutocomplete()`, `bayanDialect()`, `bayanQuran()` which are not defined in `bayan-api.js`. These must be defined elsewhere or will throw. |
|
| 266 |
-
| **No TypeScript / JSDoc validation** | Low | All extension code is plain JS with no compile-time checking. |
|
| 267 |
-
|
| 268 |
-
---
|
| 269 |
-
|
| 270 |
-
## 10. Recommended Roadmap
|
| 271 |
-
|
| 272 |
-
### Phase 1: Security Hardening ⚡ (Critical — Before Any Growth)
|
| 273 |
-
|
| 274 |
-
**Timeline: 1-2 days**
|
| 275 |
-
|
| 276 |
-
1. **Remove `.env` from Git history** — `git filter-branch` or BFG Repo Cleaner
|
| 277 |
-
2. **Restrict CORS** — Change `origins: "*"` to allowlist `["https://bayan10-bayan-api.hf.space", "chrome-extension://<ext-id>"]`
|
| 278 |
-
3. **Add rate limiting** — Flask-Limiter: 30 req/min per IP for `/api/analyze`, 10 req/min for `/api/summarize`
|
| 279 |
-
4. **Disable debug endpoint in production** — Guard `/api/debug-models` behind `app.debug` flag
|
| 280 |
-
5. **Add Supabase RLS for `documents` table** — `CREATE POLICY ... USING (auth.uid() = user_id)`
|
| 281 |
-
|
| 282 |
-
### Phase 2: Extension Auth Unification 🔐 (High)
|
| 283 |
-
|
| 284 |
-
**Timeline: 3-5 days**
|
| 285 |
-
|
| 286 |
-
1. **Implement Supabase client in extension** — Add `@supabase/supabase-js` as UMD bundle in `shared/`
|
| 287 |
-
2. **Auth flow**: Use `chrome.identity.launchWebAuthFlow()` for Google OAuth → receive tokens → init Supabase session
|
| 288 |
-
3. **Session persistence**: Store refresh token in `chrome.storage.local`
|
| 289 |
-
4. **Auth sync**: When user logs in on website, broadcast via `postMessage` → content script → `chrome.storage`
|
| 290 |
-
5. **Result**: Extension users can access their documents, settings, and history
|
| 291 |
-
|
| 292 |
-
### Phase 3: Extension Feature Parity 🔧 (High)
|
| 293 |
-
|
| 294 |
-
**Timeline: 3-5 days**
|
| 295 |
-
|
| 296 |
-
1. **Add missing API functions** to `bayan-api.js`: `bayanAutocomplete()`, `bayanDialect()`, `bayanQuran()`
|
| 297 |
-
2. **Add autocomplete/dialect/quran tabs to popup** (currently SidePanel-only)
|
| 298 |
-
3. **Inline ghost text for content script** — Port `autocomplete.js` logic for textareas on 3rd-party sites
|
| 299 |
-
4. **Add basic export** — "Copy corrected text" button already exists; add "Download as TXT"
|
| 300 |
-
|
| 301 |
-
### Phase 4: Backend Refactoring 🏗️ (Medium)
|
| 302 |
-
|
| 303 |
-
**Timeline: 5-7 days**
|
| 304 |
-
|
| 305 |
-
1. **Split `app.py`** into:
|
| 306 |
-
- `routes/analyze.py`, `routes/summarize.py`, `routes/dialect.py`, `routes/quran.py`
|
| 307 |
-
- `services/pipeline.py` (orchestration)
|
| 308 |
-
- `middleware/cors.py`, `middleware/rate_limit.py`
|
| 309 |
-
2. **Create `002_documents.sql` migration** with proper RLS
|
| 310 |
-
3. **Move root-level test files** into `tests/`
|
| 311 |
-
4. **Remove `archive/` from Docker build** (add to `.dockerignore`)
|
| 312 |
-
|
| 313 |
-
### Phase 5: Extension Code Quality 🧹 (Medium)
|
| 314 |
-
|
| 315 |
-
**Timeline: 3-4 days**
|
| 316 |
-
|
| 317 |
-
1. **Extract shared logic** from `popup.js` and `sidepanel.js` into `shared/bayan-core.js`
|
| 318 |
-
2. **Add English locale** `_locales/en/messages.json`
|
| 319 |
-
3. **Add `all_frames: true`** to manifest for iframe editor support
|
| 320 |
-
4. **Add theme toggle** to popup and sidepanel
|
| 321 |
-
|
| 322 |
-
### Phase 6: Performance & Polish ✨ (Low)
|
| 323 |
-
|
| 324 |
-
**Timeline: 2-3 days**
|
| 325 |
-
|
| 326 |
-
1. **Lazy-load vendor libs** (mammoth, docx, html2canvas) on first use
|
| 327 |
-
2. **Add website-side API caching** (localStorage TTL cache like extension has)
|
| 328 |
-
3. **Add CSS/JS minification** to Docker build
|
| 329 |
-
4. **Add loading skeletons** for editor page
|
| 330 |
-
5. **Add onboarding flow** — sample text + guided tooltips
|
| 331 |
-
|
| 332 |
-
---
|
| 333 |
-
|
| 334 |
-
## Summary Matrix
|
| 335 |
-
|
| 336 |
-
| Category | Critical | High | Medium | Low | Total |
|
| 337 |
-
|----------|---------|------|--------|-----|-------|
|
| 338 |
-
| **Security** | 2 (S1, S2) | 1 (S3) | 2 (S4, S5) | 2 (S6, S7) | 7 |
|
| 339 |
-
| **Missing Features** | 0 | 5 (H1-H5) | 6 (M1-M6) | 4 (L1-L4) | 15 |
|
| 340 |
-
| **Bugs** | 0 | 1 (B3) | 2 (B1, B4) | 1 (B2) | 4 (+2 fixed) |
|
| 341 |
-
| **Performance** | 0 | 1 (P1) | 4 (P2-P5) | 2 (P6, P7) | 7 |
|
| 342 |
-
| **UX** | 0 | 0 | 3 (U1-U3) | 3 (U4-U6) | 6 |
|
| 343 |
-
| **Tech Debt** | 0 | 3 | 5 | 5 | 13 |
|
| 344 |
-
| **TOTAL** | **2** | **11** | **22** | **17** | **52** |
|
| 345 |
-
|
| 346 |
-
---
|
| 347 |
-
|
| 348 |
-
## Final Verdict
|
| 349 |
-
|
| 350 |
-
Bayan is a technically impressive product with a solid NLP pipeline, a mature editor engine, and a well-architected extension. The core correction features (Spelling → Grammar → Punctuation) work end-to-end across both surfaces.
|
| 351 |
-
|
| 352 |
-
**What Bayan does well:**
|
| 353 |
-
- ✅ Custom contenteditable editor with proper cursor handling
|
| 354 |
-
- ✅ Multi-stage NLP pipeline with offset mapping
|
| 355 |
-
- ✅ Extension uses overlay-only rendering (never modifies user DOM)
|
| 356 |
-
- ✅ Supabase integration for cloud persistence
|
| 357 |
-
- ✅ Comprehensive test coverage (16 backend test files)
|
| 358 |
-
- ✅ Extension follows MV3 best practices (service worker, side panel)
|
| 359 |
-
|
| 360 |
-
**What must be fixed before growth:**
|
| 361 |
-
1. 🔴 **Security**: CORS wildcard + no rate limiting = anyone can abuse the API
|
| 362 |
-
2. 🔴 **Auth gap**: Extension users can't persist anything — breaks the SaaS value proposition
|
| 363 |
-
3. 🟡 **Extension missing API functions**: `bayanAutocomplete/Dialect/Quran` will throw `ReferenceError`
|
| 364 |
-
4. 🟡 **Backend monolith**: 2,844-line `app.py` is a maintenance bottleneck
|
| 365 |
-
|
| 366 |
-
**Bottom line:** Bayan is 80% of the way to a production-grade SaaS product. The remaining 20% is security hardening, extension auth, and code architecture — all achievable in 2-3 focused weeks.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
CHANGED
|
@@ -77,6 +77,47 @@ COPY quran.py ./
|
|
| 77 |
COPY quran_master.db ./
|
| 78 |
COPY .env* ./
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# Set environment variables
|
| 81 |
ENV PORT=7860
|
| 82 |
ENV DEBUG=False
|
|
|
|
| 77 |
COPY quran_master.db ./
|
| 78 |
COPY .env* ./
|
| 79 |
|
| 80 |
+
# Minify JS/CSS for production
|
| 81 |
+
RUN pip install --no-cache-dir rjsmin rcssmin && \
|
| 82 |
+
python -c "\
|
| 83 |
+
import os, rjsmin, rcssmin; \
|
| 84 |
+
for root, dirs, files in os.walk('src'): \
|
| 85 |
+
for f in files: \
|
| 86 |
+
p = os.path.join(root, f); \
|
| 87 |
+
if f.endswith('.js'): \
|
| 88 |
+
with open(p) as fh: src = fh.read(); \
|
| 89 |
+
with open(p, 'w') as fh: fh.write(rjsmin.jsmin(src)); \
|
| 90 |
+
elif f.endswith('.css'): \
|
| 91 |
+
with open(p) as fh: src = fh.read(); \
|
| 92 |
+
with open(p, 'w') as fh: fh.write(rcssmin.cssmin(src)); \
|
| 93 |
+
"
|
| 94 |
+
|
| 95 |
+
# Bundle JS files in dependency order (replaces 33 script tags)
|
| 96 |
+
RUN python -c "\
|
| 97 |
+
import os; \
|
| 98 |
+
js_order = [ \
|
| 99 |
+
'js/vendor/supabase.min.js', 'js/auth/config.js', 'js/vendor-loader.js', \
|
| 100 |
+
'js/auth/client.js', 'js/auth/session.js', 'js/auth/auth.js', 'js/auth/auth-ui.js', \
|
| 101 |
+
'js/theme.js', 'js/vendor/FileSaver.min.js', 'js/dialogs.js', 'js/i18n.js', \
|
| 102 |
+
'js/analytics.js', 'js/onboarding.js', 'js/renderer.js', 'js/selection.js', \
|
| 103 |
+
'js/ui.js', 'js/documents/doc-utils.js', 'js/editor.js', 'js/autocomplete.js', \
|
| 104 |
+
'js/format.js', 'js/documents/import.js', 'js/documents/export.js', \
|
| 105 |
+
'js/documents/documents.js', 'js/sync/sync-queue.js', 'js/sync/sync-resolver.js', \
|
| 106 |
+
'js/sync/sync-manager.js', 'js/documents-cloud/documents-api.js', \
|
| 107 |
+
'js/documents-cloud/documents-state.js', 'js/documents-cloud/documents-ui.js', \
|
| 108 |
+
'js/summaries/summaries-api.js', 'js/summaries/summaries-ui.js', \
|
| 109 |
+
'js/settings-sync/settings-api.js', 'js/settings-sync/settings-sync.js', \
|
| 110 |
+
'js/app.js', \
|
| 111 |
+
]; \
|
| 112 |
+
bundle = ''; \
|
| 113 |
+
for f in js_order: \
|
| 114 |
+
p = os.path.join('src', f); \
|
| 115 |
+
if os.path.exists(p): \
|
| 116 |
+
with open(p) as fh: bundle += fh.read() + '\n'; \
|
| 117 |
+
with open('src/js/bayan.bundle.js', 'w') as fh: fh.write(bundle); \
|
| 118 |
+
print(f'Bundled {len(js_order)} JS files'); \
|
| 119 |
+
"
|
| 120 |
+
|
| 121 |
# Set environment variables
|
| 122 |
ENV PORT=7860
|
| 123 |
ENV DEBUG=False
|
PROJECT_DESCRIPTION.md
CHANGED
|
@@ -11,7 +11,7 @@ Bayan/
|
|
| 11 |
├── data/ # Directory for raw and processed datasets (empty by default)
|
| 12 |
├── models/ # Deep learning models directory (organized by task)
|
| 13 |
│ ├── Autocomplete/ # GPT-2 autocomplete model
|
| 14 |
-
│ ├──
|
| 15 |
│ ├── Punctuation/ # Seq2Seq punctuation correction model
|
| 16 |
│ ├── Spelling/ # BERT-based spelling corrector checkpoint
|
| 17 |
│ └── Summarization/ # mBART summarization model checkpoint
|
|
@@ -199,7 +199,7 @@ Verify that you have placed the model files under the `models/` directory:
|
|
| 199 |
- Summarization: `models/Summarization/Model/`
|
| 200 |
- Spelling: `models/Spelling/Model/`
|
| 201 |
- Autocomplete: `models/Autocomplete/Model/`
|
| 202 |
-
- Grammar: `models/
|
| 203 |
- Punctuation: `models/Punctuation/Model/`
|
| 204 |
|
| 205 |
### 3. Run the Server
|
|
|
|
| 11 |
├── data/ # Directory for raw and processed datasets (empty by default)
|
| 12 |
├── models/ # Deep learning models directory (organized by task)
|
| 13 |
│ ├── Autocomplete/ # GPT-2 autocomplete model
|
| 14 |
+
│ ├── Grammar/ # Gemma-based grammar correction model
|
| 15 |
│ ├── Punctuation/ # Seq2Seq punctuation correction model
|
| 16 |
│ ├── Spelling/ # BERT-based spelling corrector checkpoint
|
| 17 |
│ └── Summarization/ # mBART summarization model checkpoint
|
|
|
|
| 199 |
- Summarization: `models/Summarization/Model/`
|
| 200 |
- Spelling: `models/Spelling/Model/`
|
| 201 |
- Autocomplete: `models/Autocomplete/Model/`
|
| 202 |
+
- Grammar: `models/Grammar/Model/`
|
| 203 |
- Punctuation: `models/Punctuation/Model/`
|
| 204 |
|
| 205 |
### 3. Run the Server
|
QUICKSTART.md
DELETED
|
@@ -1,126 +0,0 @@
|
|
| 1 |
-
# Bayan - Quick Start Guide
|
| 2 |
-
|
| 3 |
-
## 🚀 Quick Start
|
| 4 |
-
|
| 5 |
-
### 1. Install Dependencies
|
| 6 |
-
```bash
|
| 7 |
-
pip install -r requirements.txt
|
| 8 |
-
```
|
| 9 |
-
|
| 10 |
-
**Note:** If you have issues, install PyTorch separately:
|
| 11 |
-
- CPU: `pip install torch --index-url https://download.pytorch.org/whl/cpu`
|
| 12 |
-
- GPU: Visit https://pytorch.org/get-started/locally/
|
| 13 |
-
|
| 14 |
-
### 2. Run the Application
|
| 15 |
-
```bash
|
| 16 |
-
python run_app.py
|
| 17 |
-
```
|
| 18 |
-
|
| 19 |
-
### 3. Open in Browser
|
| 20 |
-
Navigate to: **http://localhost:5000**
|
| 21 |
-
|
| 22 |
-
## 📁 Project Structure
|
| 23 |
-
|
| 24 |
-
```
|
| 25 |
-
Bayan/
|
| 26 |
-
├── src/
|
| 27 |
-
│ ├── app.py # Flask backend server
|
| 28 |
-
│ ├── model_loader.py # Model loading and inference
|
| 29 |
-
│ └── index.html # Web interface
|
| 30 |
-
├── models/
|
| 31 |
-
│ └── arabic_summarization_model/
|
| 32 |
-
│ └── content/drive/MyDrive/arabic_summarization_model/
|
| 33 |
-
│ ├── config.json
|
| 34 |
-
│ ├── model.safetensors
|
| 35 |
-
│ └── ... (other model files)
|
| 36 |
-
├── run_app.py # Application launcher
|
| 37 |
-
├── requirements.txt # Python dependencies
|
| 38 |
-
└── README_SETUP.md # Detailed setup guide
|
| 39 |
-
```
|
| 40 |
-
|
| 41 |
-
## 🔧 Features
|
| 42 |
-
|
| 43 |
-
✅ **Robust Error Handling**
|
| 44 |
-
- Path validation for model files
|
| 45 |
-
- Graceful fallbacks if model loading fails
|
| 46 |
-
- Input validation and sanitization
|
| 47 |
-
- Clear error messages
|
| 48 |
-
|
| 49 |
-
✅ **Security**
|
| 50 |
-
- Input length limits (max 5000 characters)
|
| 51 |
-
- CORS enabled for web interface
|
| 52 |
-
- Safe model loading
|
| 53 |
-
- Error logging
|
| 54 |
-
|
| 55 |
-
✅ **User Experience**
|
| 56 |
-
- Loading indicators
|
| 57 |
-
- Real-time feedback
|
| 58 |
-
- Arabic language support
|
| 59 |
-
- Responsive design
|
| 60 |
-
|
| 61 |
-
## 🧪 Testing
|
| 62 |
-
|
| 63 |
-
### Test API Health
|
| 64 |
-
```bash
|
| 65 |
-
curl http://localhost:5000/api/health
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
### Test Summarization
|
| 69 |
-
```bash
|
| 70 |
-
curl -X POST http://localhost:5000/api/summarize \
|
| 71 |
-
-H "Content-Type: application/json" \
|
| 72 |
-
-d '{"text": "نص تجريبي للاختبار", "length": 2, "full_text": true}'
|
| 73 |
-
```
|
| 74 |
-
|
| 75 |
-
## 🐛 Troubleshooting
|
| 76 |
-
|
| 77 |
-
### Model Not Found
|
| 78 |
-
- Verify model path: `models/arabic_summarization_model/content/drive/MyDrive/arabic_summarization_model/`
|
| 79 |
-
- Check that `config.json` exists
|
| 80 |
-
- The app will search multiple possible locations automatically
|
| 81 |
-
|
| 82 |
-
### Dependencies Missing
|
| 83 |
-
```bash
|
| 84 |
-
python check_dependencies.py
|
| 85 |
-
pip install -r requirements.txt
|
| 86 |
-
```
|
| 87 |
-
|
| 88 |
-
### Port Already in Use
|
| 89 |
-
```bash
|
| 90 |
-
set PORT=5001
|
| 91 |
-
python run_app.py
|
| 92 |
-
```
|
| 93 |
-
|
| 94 |
-
## 📝 API Documentation
|
| 95 |
-
|
| 96 |
-
### POST /api/summarize
|
| 97 |
-
Summarize Arabic text.
|
| 98 |
-
|
| 99 |
-
**Request:**
|
| 100 |
-
```json
|
| 101 |
-
{
|
| 102 |
-
"text": "النص العربي...",
|
| 103 |
-
"length": 2, // 1=short, 2=medium, 3=long
|
| 104 |
-
"full_text": true
|
| 105 |
-
}
|
| 106 |
-
```
|
| 107 |
-
|
| 108 |
-
**Response:**
|
| 109 |
-
```json
|
| 110 |
-
{
|
| 111 |
-
"status": "success",
|
| 112 |
-
"summary": "الملخص...",
|
| 113 |
-
"original_length": 500,
|
| 114 |
-
"summary_length": 150
|
| 115 |
-
}
|
| 116 |
-
```
|
| 117 |
-
|
| 118 |
-
## 🎯 Next Steps
|
| 119 |
-
|
| 120 |
-
1. Install dependencies: `pip install -r requirements.txt`
|
| 121 |
-
2. Run the app: `python run_app.py`
|
| 122 |
-
3. Open browser: http://localhost:5000
|
| 123 |
-
4. Write Arabic text and click "توليد الملخص"
|
| 124 |
-
|
| 125 |
-
For detailed information, see `README_SETUP.md`.
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README_SETUP.md
DELETED
|
@@ -1,172 +0,0 @@
|
|
| 1 |
-
# Bayan - Arabic Text Summarization Setup Guide
|
| 2 |
-
|
| 3 |
-
## Overview
|
| 4 |
-
Bayan is an Arabic text summarization application with a web interface. This guide will help you set up and run the application.
|
| 5 |
-
|
| 6 |
-
## Prerequisites
|
| 7 |
-
- Python 3.8 or higher
|
| 8 |
-
- pip (Python package manager)
|
| 9 |
-
- At least 4GB RAM (8GB+ recommended for better performance)
|
| 10 |
-
- Model files in the correct location (see below)
|
| 11 |
-
|
| 12 |
-
## Installation Steps
|
| 13 |
-
|
| 14 |
-
### 1. Install Dependencies
|
| 15 |
-
```bash
|
| 16 |
-
pip install -r requirements.txt
|
| 17 |
-
```
|
| 18 |
-
|
| 19 |
-
**Note:** If you encounter issues installing PyTorch, you may need to install it separately:
|
| 20 |
-
- For CPU: `pip install torch --index-url https://download.pytorch.org/whl/cpu`
|
| 21 |
-
- For CUDA: Visit https://pytorch.org/get-started/locally/ for the appropriate command
|
| 22 |
-
|
| 23 |
-
### 2. Verify Model Location
|
| 24 |
-
The model should be located at:
|
| 25 |
-
```
|
| 26 |
-
models/arabic_summarization_model/content/drive/MyDrive/arabic_summarization_model/
|
| 27 |
-
```
|
| 28 |
-
|
| 29 |
-
Required files:
|
| 30 |
-
- `config.json`
|
| 31 |
-
- `tokenizer.json`
|
| 32 |
-
- `model.safetensors`
|
| 33 |
-
- `sentencepiece.bpe.model`
|
| 34 |
-
- Other tokenizer/model files
|
| 35 |
-
|
| 36 |
-
### 3. Run the Application
|
| 37 |
-
|
| 38 |
-
#### Option A: Using the run script (Recommended)
|
| 39 |
-
```bash
|
| 40 |
-
python run_app.py
|
| 41 |
-
```
|
| 42 |
-
|
| 43 |
-
#### Option B: Direct Flask run
|
| 44 |
-
```bash
|
| 45 |
-
cd src
|
| 46 |
-
python app.py
|
| 47 |
-
```
|
| 48 |
-
|
| 49 |
-
#### Option C: Using Flask CLI
|
| 50 |
-
```bash
|
| 51 |
-
cd src
|
| 52 |
-
export FLASK_APP=app.py
|
| 53 |
-
flask run
|
| 54 |
-
```
|
| 55 |
-
|
| 56 |
-
### 4. Access the Application
|
| 57 |
-
Open your browser and navigate to:
|
| 58 |
-
```
|
| 59 |
-
http://localhost:5000
|
| 60 |
-
```
|
| 61 |
-
|
| 62 |
-
## Configuration
|
| 63 |
-
|
| 64 |
-
### Environment Variables
|
| 65 |
-
- `PORT`: Server port (default: 5000)
|
| 66 |
-
- `DEBUG`: Enable debug mode (default: False)
|
| 67 |
-
```bash
|
| 68 |
-
export DEBUG=True
|
| 69 |
-
export PORT=8080
|
| 70 |
-
```
|
| 71 |
-
|
| 72 |
-
### Supabase Authentication (Phase 5)
|
| 73 |
-
|
| 74 |
-
See `.env.example` and `PHASE_5_IMPLEMENTATION_PLAN.md`.
|
| 75 |
-
|
| 76 |
-
1. Create a Supabase project and enable **Anonymous** + **Google** auth.
|
| 77 |
-
2. Run `supabase/migrations/001_profiles.sql` in the SQL Editor.
|
| 78 |
-
3. Set meta tags in `src/index.html`:
|
| 79 |
-
```html
|
| 80 |
-
<meta name="supabase-url" content="https://YOUR_PROJECT.supabase.co">
|
| 81 |
-
<meta name="supabase-anon-key" content="YOUR_ANON_KEY">
|
| 82 |
-
```
|
| 83 |
-
4. Add redirect URL: `http://localhost:5000/**`
|
| 84 |
-
|
| 85 |
-
If Supabase is not configured, the editor still works in offline auth mode.
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
### Model Not Found Error
|
| 89 |
-
If you see "Model not found" error:
|
| 90 |
-
1. Verify the model path exists
|
| 91 |
-
2. Check that all required files are present
|
| 92 |
-
3. The application will search multiple possible paths automatically
|
| 93 |
-
|
| 94 |
-
### Out of Memory Error
|
| 95 |
-
If you encounter memory issues:
|
| 96 |
-
1. Close other applications
|
| 97 |
-
2. Use CPU mode (it will automatically use CPU if CUDA is not available)
|
| 98 |
-
3. Reduce the `MAX_TEXT_LENGTH` in `src/app.py` if needed
|
| 99 |
-
|
| 100 |
-
### Port Already in Use
|
| 101 |
-
If port 5000 is already in use:
|
| 102 |
-
```bash
|
| 103 |
-
export PORT=5001
|
| 104 |
-
python run_app.py
|
| 105 |
-
```
|
| 106 |
-
|
| 107 |
-
### Slow Performance
|
| 108 |
-
- First run will be slower as the model loads
|
| 109 |
-
- Subsequent requests will be faster
|
| 110 |
-
- Using GPU (CUDA) significantly improves performance
|
| 111 |
-
|
| 112 |
-
## API Endpoints
|
| 113 |
-
|
| 114 |
-
### Health Check
|
| 115 |
-
```
|
| 116 |
-
GET /api/health
|
| 117 |
-
```
|
| 118 |
-
Returns server status and model loading state.
|
| 119 |
-
|
| 120 |
-
### Summarize Text
|
| 121 |
-
```
|
| 122 |
-
POST /api/summarize
|
| 123 |
-
Content-Type: application/json
|
| 124 |
-
|
| 125 |
-
{
|
| 126 |
-
"text": "النص العربي المراد تلخيصه...",
|
| 127 |
-
"length": 2, // 1=short, 2=medium, 3=long
|
| 128 |
-
"full_text": true
|
| 129 |
-
}
|
| 130 |
-
```
|
| 131 |
-
|
| 132 |
-
Response:
|
| 133 |
-
```json
|
| 134 |
-
{
|
| 135 |
-
"status": "success",
|
| 136 |
-
"summary": "الملخص المولد...",
|
| 137 |
-
"original_length": 500,
|
| 138 |
-
"summary_length": 150
|
| 139 |
-
}
|
| 140 |
-
```
|
| 141 |
-
|
| 142 |
-
## Security Features
|
| 143 |
-
|
| 144 |
-
- Input validation (text length limits)
|
| 145 |
-
- CORS enabled for web interface
|
| 146 |
-
- Error handling and logging
|
| 147 |
-
- Path validation for model files
|
| 148 |
-
- Safe model loading with fallbacks
|
| 149 |
-
|
| 150 |
-
## Development
|
| 151 |
-
|
| 152 |
-
### Running in Debug Mode
|
| 153 |
-
```bash
|
| 154 |
-
export DEBUG=True
|
| 155 |
-
python run_app.py
|
| 156 |
-
```
|
| 157 |
-
|
| 158 |
-
### Testing the API
|
| 159 |
-
```bash
|
| 160 |
-
curl -X POST http://localhost:5000/api/summarize \
|
| 161 |
-
-H "Content-Type: application/json" \
|
| 162 |
-
-d '{"text": "نص تجريبي للاختبار", "length": 2, "full_text": true}'
|
| 163 |
-
```
|
| 164 |
-
|
| 165 |
-
## Support
|
| 166 |
-
|
| 167 |
-
For issues or questions:
|
| 168 |
-
1. Check the logs in the terminal
|
| 169 |
-
2. Verify model files are correct
|
| 170 |
-
3. Ensure all dependencies are installed
|
| 171 |
-
4. Check Python version compatibility
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analyze_failures.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
"""Analyze remaining 24 failures after Layer 1/2/3 fixes."""
|
| 2 |
-
import json, re
|
| 3 |
-
|
| 4 |
-
with open('tests/phase10/reports/collision_benchmark_results.json', 'r', encoding='utf-8') as f:
|
| 5 |
-
data = json.load(f)
|
| 6 |
-
|
| 7 |
-
def norm(t):
|
| 8 |
-
t = re.sub(r'[\u064B-\u065F\u0670]', '', t)
|
| 9 |
-
t = t.rstrip('.،؛؟!?!')
|
| 10 |
-
return re.sub(r'\s+', ' ', t).strip()
|
| 11 |
-
|
| 12 |
-
categories = {}
|
| 13 |
-
for r in data['results']:
|
| 14 |
-
if r['pipeline_verdict'] != 'FN':
|
| 15 |
-
continue
|
| 16 |
-
rid = r['id']
|
| 17 |
-
exp = r['expected'].strip()
|
| 18 |
-
act = r['pipeline_output'].strip()
|
| 19 |
-
inp = r['input'].strip()
|
| 20 |
-
|
| 21 |
-
inp_w = inp.split()
|
| 22 |
-
exp_w = exp.split()
|
| 23 |
-
act_w = act.split()
|
| 24 |
-
|
| 25 |
-
issues = []
|
| 26 |
-
for i in range(min(len(exp_w), len(act_w))):
|
| 27 |
-
aw = act_w[i].rstrip('.،؛؟!?!')
|
| 28 |
-
ew = exp_w[i].rstrip('.،؛؟!?!')
|
| 29 |
-
iw = inp_w[i] if i < len(inp_w) else '—'
|
| 30 |
-
aw_n = re.sub(r'[\u064B-\u065F]', '', aw)
|
| 31 |
-
ew_n = re.sub(r'[\u064B-\u065F]', '', ew)
|
| 32 |
-
|
| 33 |
-
if aw_n == ew_n:
|
| 34 |
-
continue # tanween/diacritic only diff
|
| 35 |
-
if aw != ew:
|
| 36 |
-
if iw == aw:
|
| 37 |
-
cause = "MODEL_MISS"
|
| 38 |
-
elif iw == ew:
|
| 39 |
-
cause = "CORRUPTED"
|
| 40 |
-
else:
|
| 41 |
-
cause = "WRONG_FIX"
|
| 42 |
-
issues.append(f" [{i}] '{iw}'→'{aw}' (exp:'{ew}') {cause}")
|
| 43 |
-
|
| 44 |
-
if len(exp_w) != len(act_w):
|
| 45 |
-
issues.append(f" word count: {len(act_w)} vs {len(exp_w)}")
|
| 46 |
-
|
| 47 |
-
# Classify
|
| 48 |
-
has_junk = any('وومن' in a or '.و' in a or 'ةل' in a for a in act_w)
|
| 49 |
-
has_trailing_و = any(a.endswith('و') and not e.endswith('و') and not e.endswith('وا')
|
| 50 |
-
for a, e in zip(act_w, exp_w) if a != e)
|
| 51 |
-
|
| 52 |
-
cat = r['category']
|
| 53 |
-
print(f"\n{rid} [{cat}]")
|
| 54 |
-
print(f" IN: {inp[:60]}")
|
| 55 |
-
print(f" EXP: {exp[:60]}")
|
| 56 |
-
print(f" ACT: {act[:60]}")
|
| 57 |
-
for iss in issues:
|
| 58 |
-
print(iss)
|
| 59 |
-
if has_junk:
|
| 60 |
-
print(" >>> TRAILING JUNK")
|
| 61 |
-
|
| 62 |
-
# Summary of what each failure needs
|
| 63 |
-
print("\n" + "="*60)
|
| 64 |
-
print("FIXABILITY ANALYSIS")
|
| 65 |
-
print("="*60)
|
| 66 |
-
print(f"\nTotal failures: 24")
|
| 67 |
-
print(f"Need: 17 more passes to reach 85% (43/50)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apply_locks.py
DELETED
|
@@ -1,77 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
|
| 3 |
-
def apply_lock_to_file(filepath, var_name, engine_name, func_name):
|
| 4 |
-
with open(filepath, 'r', encoding='utf-8') as f:
|
| 5 |
-
lines = f.readlines()
|
| 6 |
-
|
| 7 |
-
out_lines = []
|
| 8 |
-
in_imports = False
|
| 9 |
-
added_threading = False
|
| 10 |
-
in_globals = False
|
| 11 |
-
added_lock_var = False
|
| 12 |
-
in_func = False
|
| 13 |
-
|
| 14 |
-
for line in lines:
|
| 15 |
-
if line.startswith('import ') and not added_threading:
|
| 16 |
-
out_lines.append(line)
|
| 17 |
-
out_lines.append("import threading\n")
|
| 18 |
-
added_threading = True
|
| 19 |
-
continue
|
| 20 |
-
|
| 21 |
-
if line.startswith(f'_{var_name} = None') and not added_lock_var:
|
| 22 |
-
out_lines.append(line)
|
| 23 |
-
out_lines.append(f"_load_lock = threading.Lock()\n")
|
| 24 |
-
added_lock_var = True
|
| 25 |
-
continue
|
| 26 |
-
|
| 27 |
-
if line.startswith(f'def {func_name}('):
|
| 28 |
-
in_func = True
|
| 29 |
-
out_lines.append(line)
|
| 30 |
-
continue
|
| 31 |
-
|
| 32 |
-
if in_func:
|
| 33 |
-
if line.startswith(f' global '):
|
| 34 |
-
out_lines.append(line.replace('\n', f', _load_lock\n'))
|
| 35 |
-
continue
|
| 36 |
-
|
| 37 |
-
if line.startswith(f' try:'):
|
| 38 |
-
# The start of the old try block. We wrap everything from here.
|
| 39 |
-
out_lines.append(f' with _load_lock:\n')
|
| 40 |
-
out_lines.append(f' if _{var_name} is not None:\n')
|
| 41 |
-
out_lines.append(f' return _{var_name}\n\n')
|
| 42 |
-
out_lines.append(f' try:\n')
|
| 43 |
-
continue
|
| 44 |
-
|
| 45 |
-
# If we are inside the function and past the global declaration,
|
| 46 |
-
# and it's indented with at least 4 spaces, we need to add 4 more spaces
|
| 47 |
-
# for the lines that were inside the old `try:` and `except:`
|
| 48 |
-
# EXCEPT for `if _xxx is not None: return _xxx` which comes before the try
|
| 49 |
-
if line.startswith(' if _') or line.startswith(' return _'):
|
| 50 |
-
# This is the old `if checker is not None:` logic before try. Leave it alone.
|
| 51 |
-
out_lines.append(line)
|
| 52 |
-
continue
|
| 53 |
-
|
| 54 |
-
if line.startswith(' '):
|
| 55 |
-
# Shift everything that was inside try/except right by 4 spaces
|
| 56 |
-
if line.strip() == '':
|
| 57 |
-
out_lines.append('\n')
|
| 58 |
-
else:
|
| 59 |
-
out_lines.append(' ' + line)
|
| 60 |
-
|
| 61 |
-
if line.startswith(' return _') or line.startswith(' raise RuntimeError'):
|
| 62 |
-
# End of function
|
| 63 |
-
in_func = False
|
| 64 |
-
continue
|
| 65 |
-
|
| 66 |
-
out_lines.append(line)
|
| 67 |
-
|
| 68 |
-
with open(filepath, 'w', encoding='utf-8') as f:
|
| 69 |
-
f.writelines(out_lines)
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
apply_lock_to_file(r'src/nlp/spelling/araspell_service.py', 'spell_checker', 'AraSpell', 'get_spelling_model')
|
| 73 |
-
apply_lock_to_file(r'src/nlp/punctuation/punctuation_service.py', 'punctuation_checker', 'PuncAra', 'get_punctuation_model')
|
| 74 |
-
apply_lock_to_file(r'src/nlp/grammar/grammar_service.py', 'grammar_checker', 'Grammar', 'get_grammar_model')
|
| 75 |
-
apply_lock_to_file(r'src/nlp/autocomplete/autocomplete_service.py', 'autocomplete_engine', 'Autocomplete', 'get_autocomplete_model')
|
| 76 |
-
|
| 77 |
-
print("Locks applied perfectly with correct indentation!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
archive/BAYAN_COMPLETE_AUDIT.md
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# BAYAN — Complete Product, Codebase & Extension Deep Audit
|
| 2 |
+
|
| 3 |
+
> **Audit Date:** 2026-06-27
|
| 4 |
+
> **Auditor Perspective:** Product Manager + Senior Frontend + Backend Architect + Extension Engineer + SaaS Reviewer
|
| 5 |
+
> **Scope:** Website, Backend API, Chrome Extension, Auth/Database, AI Models, UX, Security, Performance, Code Quality
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
## 1. Current System Overview
|
| 10 |
+
|
| 11 |
+
### Architecture Map
|
| 12 |
+
|
| 13 |
+
```
|
| 14 |
+
┌────────────────────────────────────────────────────────────────┐
|
| 15 |
+
│ BAYAN ECOSYSTEM │
|
| 16 |
+
│ │
|
| 17 |
+
│ ┌──────────────┐ ┌──────────────┐ ┌─────────────────┐ │
|
| 18 |
+
│ │ Website SPA │───▶│ Flask API │───▶│ NLP Pipeline │ │
|
| 19 |
+
│ │ (index.html) │ │ (app.py) │ │ Spell→Gram→Punct│ │
|
| 20 |
+
│ │ 33 JS files │ │ 2,844 lines │ │ PipelineContext │ │
|
| 21 |
+
│ └──────┬───────┘ └──────┬───────┘ │ PatchSet/Locker │ │
|
| 22 |
+
│ │ │ └─────────────────┘ │
|
| 23 |
+
│ │ │ │
|
| 24 |
+
│ │ ┌──────┴───────┐ ┌─────────────────┐ │
|
| 25 |
+
│ │ │ Local Models │ │ Remote Grammar │ │
|
| 26 |
+
│ │ │ Spelling │ │ (Gradio Space) │ │
|
| 27 |
+
│ │ │ Punctuation │ │ Latency: 3-8s │ │
|
| 28 |
+
│ │ │ Summarization │ └─────────────────┘ │
|
| 29 |
+
│ │ │ Dialect (mT5) │ │
|
| 30 |
+
│ │ │ Autocomplete │ │
|
| 31 |
+
│ │ └──────────────┘ │
|
| 32 |
+
│ │ │
|
| 33 |
+
│ ┌──────┴───────┐ ┌──────────────┐ ┌─────────────────┐ │
|
| 34 |
+
│ │ Supabase │◀──│ Auth Module │──▶│ Documents DB │ │
|
| 35 |
+
│ │ (Cloud) │ │ Guest+Google │ │ Settings Sync │ │
|
| 36 |
+
│ │ Client-side │ │ PKCE OAuth │ │ Summaries │ │
|
| 37 |
+
│ └──────────────┘ └──────────────┘ └─────────────────┘ │
|
| 38 |
+
│ │
|
| 39 |
+
│ ┌────────────────────────────────────────────────────────┐ │
|
| 40 |
+
│ │ Chrome Extension (MV3 v2.1.0) │ │
|
| 41 |
+
│ │ ┌───────────┐ ┌────────────┐ ┌─────────────────────┐ │ │
|
| 42 |
+
│ │ │ Content │ │ Background │ │ Side Panel + Popup │ │ │
|
| 43 |
+
│ │ │ Script │ │ Worker │ │ 5 tabs each │ │ │
|
| 44 |
+
│ │ │ Overlay+ │ │ Cache+ │ │ Correct/Summarize/ │ │ │
|
| 45 |
+
│ │ │ Ghost txt │ │ Retry │ │ Dialect/Quran/Auto │ │ │
|
| 46 |
+
│ │ └───────────┘ └────────────┘ └─────────────────────┘ │ │
|
| 47 |
+
│ │ NO AUTH │ NO DOCUMENTS │ NO SYNC │ NO EXPORT │ │
|
| 48 |
+
│ └────────────────────────────────────────────────────────┘ │
|
| 49 |
+
└────────────────────────────────────────────────────────────────┘
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### Technology Stack
|
| 53 |
+
|
| 54 |
+
| Layer | Technology | Notes |
|
| 55 |
+
|-------|-----------|-------|
|
| 56 |
+
| **Frontend** | Vanilla JS, HTML, CSS (Tailwind CDN dev mode) | Custom `contenteditable` editor, 33 script tags, no bundler |
|
| 57 |
+
| **Backend** | Flask (Python) | Single monolith `app.py` — 2,844 lines |
|
| 58 |
+
| **NLP Pipeline** | Custom Python modules | 3-stage: Spelling → Grammar → Punctuation with PipelineContext/PatchSet/StageLocker |
|
| 59 |
+
| **AI Models** | 6 transformer-based models | Spelling (AraSpell), Grammar (remote Gradio), Punctuation (PuncAra-v1), Summarization (mBART), Dialect (mT5-300M), Autocomplete (AraBERT + AraGPT2) |
|
| 60 |
+
| **Database** | Supabase (PostgreSQL) | Documents, profiles, settings, summaries — all client-side only |
|
| 61 |
+
| **Auth** | Supabase Auth (PKCE) | Guest (anonymous) + Google OAuth, 8s timeout + offline fallback |
|
| 62 |
+
| **Deployment** | HuggingFace Spaces (Docker) | CPU-only free tier, ~60s cold start |
|
| 63 |
+
| **Extension** | Chrome MV3 | Background SW, Content Script (all sites), Side Panel, Popup |
|
| 64 |
+
|
| 65 |
+
### File Structure Summary
|
| 66 |
+
|
| 67 |
+
| Directory | Files | Purpose |
|
| 68 |
+
|-----------|-------|---------|
|
| 69 |
+
| `src/` | `app.py`, `hf_inference.py`, `model_loader.py` + HTML/CSS | Backend + serving |
|
| 70 |
+
| `src/js/` | 8 core JS files | Editor, renderer, selection, UI, theme, format, autocomplete, api |
|
| 71 |
+
| `src/js/auth/` | 5 files | Supabase auth (config, client, session, auth, auth-ui) |
|
| 72 |
+
| `src/js/documents/` | 4 files | Local doc management (documents, doc-utils, export, import) |
|
| 73 |
+
| `src/js/documents-cloud/` | 3 files | Supabase CRUD (api, state, ui) |
|
| 74 |
+
| `src/js/sync/` | 3 files | Offline queue (manager, queue, resolver) |
|
| 75 |
+
| `src/js/settings-sync/` | 2 files | User settings cloud persistence |
|
| 76 |
+
| `src/js/summaries/` | 2 files | Cloud summaries (api, ui) |
|
| 77 |
+
| `src/nlp/` | 6 subdirs | All NLP processing modules |
|
| 78 |
+
| `extension/` | 8 files + 4 subdirs | Chrome Extension |
|
| 79 |
+
| `extension/shared/` | 9 files | Shared utilities (api, renderer, patches, state, hash, ui, config, constants, analysis-controller) |
|
| 80 |
+
| `extension/sidepanel/` | 3 files | Side panel (HTML, JS, CSS) |
|
| 81 |
+
| `tests/` | 16+ test files | Backend unit/integration tests |
|
| 82 |
+
| `extension/tests/` | 8 files | Extension integration tests |
|
| 83 |
+
|
| 84 |
+
### NLP Pipeline Architecture
|
| 85 |
+
|
| 86 |
+
```
|
| 87 |
+
User Input → PipelineContext(text)
|
| 88 |
+
│
|
| 89 |
+
├─[1] SPELLING (if text ≤ 1000 chars && not religious && not URLs/hashtags)
|
| 90 |
+
│ AraSpell seq2seq + beam search (5 beams)
|
| 91 |
+
│ 10-step postprocessing: hybrid alignment, MLM validation, bidirectional check
|
| 92 |
+
│ 20+ safety guards (edit distance, length ratio, first-letter, numeral, pronoun suffix...)
|
| 93 |
+
│ ctx.mutate_text() → OffsetMapper chain
|
| 94 |
+
│
|
| 95 |
+
├─[2] GRAMMAR (if not religious text)
|
| 96 |
+
│ Remote Gradio API → mohammedahmedezz2004/bayan_arabic_grammarly_correction
|
| 97 |
+
│ ArabicGrammarGuard: 14 rule-based post-passes (camel-tools MLE disambiguator)
|
| 98 |
+
│ Jaccard hallucination filter, directional blocks, 10+ safety guards
|
| 99 |
+
│ StageLocker hierarchy: grammar(3) > spelling(2) > punctuation(1)
|
| 100 |
+
│ ctx.mutate_text() → OffsetMapper chain
|
| 101 |
+
│
|
| 102 |
+
├─[3] PUNCTUATION (if not religious && spelling+grammar made corrections)
|
| 103 |
+
│ PuncAra-v1 local model (50-word chunks, beam=3)
|
| 104 |
+
│ validate_punctuation_diff() safety layer
|
| 105 |
+
│ Max 3 punctuation patches cap
|
| 106 |
+
│ ctx.mutate_text() → OffsetMapper chain
|
| 107 |
+
│
|
| 108 |
+
└─ PatchSet.resolve_overlaps() → API Response
|
| 109 |
+
Deterministic greedy: priority DESC, confidence DESC, start ASC
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
---
|
| 113 |
+
|
| 114 |
+
## 2. Feature Inventory
|
| 115 |
+
|
| 116 |
+
### Core AI Features
|
| 117 |
+
|
| 118 |
+
| Feature | Backend API | Website | Extension | Key Files |
|
| 119 |
+
|---------|------------|---------|-----------|-----------|
|
| 120 |
+
| **Spelling** | ✅ `/api/spelling` + `/api/analyze` | ✅ Inline highlights, suggestions, apply/dismiss | ✅ Content script overlay + Popup + SidePanel | `nlp/spelling/araspell_service.py`, `araspell_rules.py` |
|
| 121 |
+
| **Grammar** | ✅ `/api/grammar` + `/api/analyze` | ✅ Via remote Gradio proxy + 14 rule-based postprocessors | ✅ Content script overlay + Popup + SidePanel | `nlp/grammar/grammar_service.py`, `grammar_rules.py` |
|
| 122 |
+
| **Punctuation** | ✅ `/api/punctuation` + `/api/analyze` | ✅ PuncAra-v1 local model | ✅ Content script overlay + Popup + SidePanel | `nlp/punctuation/punctuation_service.py` |
|
| 123 |
+
| **Summarization** | ✅ `/api/summarize` | ✅ Editor tab with length slider + paragraph/bullets mode | ✅ Popup tab + SidePanel tab | `model_loader.py`, `summaries-api.js` |
|
| 124 |
+
| **Autocomplete** | ✅ `/api/autocomplete` | ✅ Ghost text + dropdown, word-boundary triggered | ⚠️ Ghost text for textarea/input only, button-click in popup/sidepanel | `autocomplete.js`, `content-inline.js` |
|
| 125 |
+
| **Dialect→MSA** | ✅ `/api/dialect` | ✅ Dedicated editor tab | ✅ Popup + SidePanel tabs | `nlp/dialect/dialect_service.py` |
|
| 126 |
+
| **Quran Verification** | ✅ `/api/quran` | ✅ Dedicated editor tab + 13-language translation | ✅ SidePanel (with translation), Popup | `quran.py`, `quran_master.db` |
|
| 127 |
+
|
| 128 |
+
### Platform Features
|
| 129 |
+
|
| 130 |
+
| Feature | Website | Extension Popup | Extension SidePanel | Extension Content Script |
|
| 131 |
+
|---------|---------|----------------|---------------------|-------------------------|
|
| 132 |
+
| **Authentication** | ✅ Guest + Google OAuth + linking | ❌ None | ❌ None | ❌ None |
|
| 133 |
+
| **Cloud Documents** | ✅ Full CRUD (create/load/save/rename/delete) | ❌ None | ❌ None | ❌ None |
|
| 134 |
+
| **Cloud Summaries** | ✅ Save/load/delete (Supabase) | ❌ None | ❌ None | ❌ None |
|
| 135 |
+
| **Offline Sync** | ✅ LocalStorage queue + auto-flush | ❌ None | ❌ None | ❌ None |
|
| 136 |
+
| **Settings Sync** | ✅ Theme synced to cloud | ��� None | ❌ None | ❌ None |
|
| 137 |
+
| **Export** | ✅ TXT + DOCX + PDF | ✅ TXT only | ✅ TXT only | ❌ None |
|
| 138 |
+
| **Import** | ✅ TXT + DOCX (mammoth.js) | ❌ None | ❌ None | ❌ None |
|
| 139 |
+
| **Undo/Redo** | ✅ Custom 50-level stack | ❌ Browser default only | ❌ Browser default only | ❌ N/A |
|
| 140 |
+
| **Word Count Goal** | ✅ Configurable progress indicator | ❌ None | ❌ None | ❌ N/A |
|
| 141 |
+
| **Score Ring** | ✅ Animated SVG | ✅ Simplified SVG | ✅ Simplified SVG | ❌ None |
|
| 142 |
+
| **Dismissed Words** | ✅ Persisted in localStorage | ❌ None | ❌ None | ❌ None |
|
| 143 |
+
| **Theme Toggle** | ✅ Dark/Light + sync | ❌ Dark only | ❌ Dark only | N/A |
|
| 144 |
+
| **Keyboard Shortcuts** | ✅ Extensive (Alt+1-3, Ctrl+S, Ctrl+Q) | ❌ None | ❌ None | Tab for autocomplete only |
|
| 145 |
+
| **Rich Text Formatting** | ✅ Full toolbar (bold, italic, lists, links, etc.) | ❌ None | ❌ None | ❌ N/A |
|
| 146 |
+
| **Suggestion Feedback** | ✅ Thumbs up/down | ❌ None | ❌ None | ❌ None |
|
| 147 |
+
| **Draft Auto-save** | ✅ localStorage on every keystroke | ❌ Lost on close | ✅ chrome.storage.session | ❌ N/A |
|
| 148 |
+
| **Write-back to Page** | N/A | ❌ None | ✅ Selection-aware splice | ✅ Via background relay |
|
| 149 |
+
| **Quran Translation** | ✅ 13 languages | ❌ None | ✅ 13 languages | ❌ None |
|
| 150 |
+
|
| 151 |
+
---
|
| 152 |
+
|
| 153 |
+
## 3. Website vs Extension Comparison
|
| 154 |
+
|
| 155 |
+
### Authentication
|
| 156 |
+
|
| 157 |
+
| Aspect | Website | Extension | Gap |
|
| 158 |
+
|--------|---------|-----------|-----|
|
| 159 |
+
| Guest login | ✅ `signInAnonymously()` with 8s timeout | ❌ Zero auth code | **Critical** |
|
| 160 |
+
| Google OAuth | ✅ PKCE flow via Supabase | ❌ | **Critical** |
|
| 161 |
+
| Session restore | ✅ `getSession()` from localStorage | ❌ | **Critical** |
|
| 162 |
+
| Identity linking | ✅ Guest → Google upgrade | ❌ | **High** |
|
| 163 |
+
| Offline fallback | ✅ `enableOfflineAuthMode()` | ❌ | **High** |
|
| 164 |
+
| Auth-gated features | ✅ Documents, sync, settings | ❌ All features work without auth | **Critical** |
|
| 165 |
+
|
| 166 |
+
### AI Feature UX Comparison
|
| 167 |
+
|
| 168 |
+
| Feature | Website UX | Extension UX | Parity |
|
| 169 |
+
|---------|-----------|-------------|--------|
|
| 170 |
+
| Analyze (S+G+P) | Rich editor with inline colored highlights, suggestion sidebar with cards, popover tooltips, apply/dismiss per-suggestion, apply-all, score ring, error donut | **Content Script:** Transparent overlay with colored marks + tooltip on hover. **Popup/SidePanel:** Textarea input + suggestion cards + score ring | ⚠️ Functional but significant UX gap |
|
| 171 |
+
| Summarize | Editor tab with length slider, paragraph/bullets toggle, copy/export/save-to-cloud | Popup/SidePanel: textarea + radio buttons (short/medium/long) + copy + TXT download | ✅ Near parity |
|
| 172 |
+
| Autocomplete | Ghost text inside editor + dropdown, word-boundary triggered, 400ms debounce, Tab to accept | **Content Script:** Ghost text for textarea/input only (NOT contenteditable). **Popup/SidePanel:** Button-click only | ⚠️ Missing core inline UX on most web editors |
|
| 173 |
+
| Dialect | Dedicated tab, convert + copy + apply-to-editor | Popup/SidePanel: textarea + convert + copy | ✅ Near parity |
|
| 174 |
+
| Quran | Dedicated tab, verify + 13-language translation + modal + apply-to-editor (protected spans) | Popup: basic verify. SidePanel: full verify + 13-language translation + apply-to-page | ✅ SidePanel has full parity |
|
| 175 |
+
|
| 176 |
+
### Documents & Data
|
| 177 |
+
|
| 178 |
+
| Aspect | Website | Extension | Gap |
|
| 179 |
+
|--------|---------|-----------|-----|
|
| 180 |
+
| Create document | ✅ `createDocument()` via Supabase | ❌ No Supabase integration | **Critical** |
|
| 181 |
+
| List/search documents | ✅ Sidebar panel with search | ❌ | **Critical** |
|
| 182 |
+
| Auto-save + sync | ✅ 2.5s debounced via SyncManager | ❌ | **Critical** |
|
| 183 |
+
| Offline queue | ✅ LocalStorage persistence, auto-flush on reconnect | ❌ | **High** |
|
| 184 |
+
| Export PDF/DOCX | ✅ docx.js + html2pdf | ❌ TXT download only | **Medium** |
|
| 185 |
+
| Import TXT/DOCX | ✅ FileReader + mammoth.js | ❌ | **Low** |
|
| 186 |
+
| Conflict resolution | ✅ Last-write-wins timestamp comparison | ❌ N/A | N/A |
|
| 187 |
+
|
| 188 |
+
---
|
| 189 |
+
|
| 190 |
+
## 4. Missing Features
|
| 191 |
+
|
| 192 |
+
### Critical (Blocks Production Use)
|
| 193 |
+
|
| 194 |
+
| # | Feature | Impact | Recommended Solution |
|
| 195 |
+
|---|---------|--------|---------------------|
|
| 196 |
+
| C1 | **No API rate limiting** | Any client can overwhelm the free-tier HF Space with unlimited requests to compute-intensive NLP endpoints | Add Flask-Limiter: 30 req/min/IP for `/api/analyze`, 10/min for `/api/summarize` |
|
| 197 |
+
| C2 | **CORS wildcard `origins: "*"`** (`app.py:94`) | Any website can proxy through Bayan's API, enabling compute theft and abuse | Restrict to `["https://bayan10-bayan-api.hf.space", "chrome-extension://<ext-id>"]` |
|
| 198 |
+
| C3 | **Extension has zero authentication** | Extension users cannot access cloud documents, settings, or history — breaks SaaS value proposition | Implement Supabase auth via `chrome.identity.launchWebAuthFlow()` for Google OAuth |
|
| 199 |
+
|
| 200 |
+
### High (Important Feature Gap)
|
| 201 |
+
|
| 202 |
+
| # | Feature | Impact | Recommended Solution |
|
| 203 |
+
|---|---------|--------|---------------------|
|
| 204 |
+
| H1 | **Missing Supabase migration files** for `documents`, `summaries`, `settings` tables | Only `001_profiles.sql` exists. RLS policies are documented but not version-controlled. Database cannot be recreated from migrations. | Create `002_documents.sql`, `003_summaries.sql`, `004_settings.sql` with RLS |
|
| 205 |
+
| H2 | **Extension content script lacks autocomplete ghost text on contenteditable** | The flagship ghost-text feature only works on `<textarea>`/`<input>`, not on contenteditable elements (which most web editors use) | Port autocomplete logic to work with contenteditable in `content-inline.js` |
|
| 206 |
+
| H3 | **No document versioning or history** | Each cloud save overwrites previous content. Hard delete with no recovery. No revision history. | Add `document_versions` table or soft-delete with `deleted_at` column |
|
| 207 |
+
| H4 | **Backend monolith: `app.py` is 2,844 lines** | `analyze_text()` alone is 1,224 lines. Extremely difficult to maintain, test, or extend. | Split into `routes/`, `services/`, `middleware/` modules |
|
| 208 |
+
| H5 | **Extension popup/sidepanel have no DOCX/PDF export** | Users can only download as TXT from extension | Add at minimum "Copy as formatted text"; ideally add DOCX export |
|
| 209 |
+
|
| 210 |
+
### Medium (Improvement Needed)
|
| 211 |
+
|
| 212 |
+
| # | Feature | Impact | Recommended Solution |
|
| 213 |
+
|---|---------|--------|---------------------|
|
| 214 |
+
| M1 | **Grammar model depends on external Gradio Space** | Hard dependency on `mohammedahmedezz2004/bayan_arabic_grammarly_correction`. If Space sleeps (HF free tier), first request has 10-30s cold start. If down, grammar breaks entirely. | Host grammar model directly on Bayan Space, or add rule-only fallback |
|
| 215 |
+
| M2 | **No Content Security Policy** | Neither the website nor extension manifest declares a CSP. Website serves no CSP headers from Flask. | Add CSP headers in Flask and explicit CSP in extension manifest |
|
| 216 |
+
| M3 | **Extension dismissed-words whitelist missing** | Users must dismiss the same false-positive words repeatedly across sessions | Persist dismissed words in `chrome.storage.local` |
|
| 217 |
+
| M4 | **No i18n framework on website** | All strings hardcoded in Arabic HTML. Adding English support requires rewriting HTML. | Add simple i18n JSON loader (extension already has `_locales/ar/`) |
|
| 218 |
+
| M5 | **Sync conflict resolution is lossy** | Last-write-wins silently discards the losing version with no user notification, no merge attempt. Clock skew between client `Date.now()` and server `updated_at` can cause wrong winner. | Show conflict notification to user, or implement operational transform |
|
| 219 |
+
| M6 | **Only theme is synced in settings** | `settings_sync.js` only syncs `theme`. Other potential settings (font size, word goal, autocomplete toggle) are not synced. | Extend `preferences` JSONB column to include all user settings |
|
| 220 |
+
|
| 221 |
+
### Low (Nice to Have)
|
| 222 |
+
|
| 223 |
+
| # | Feature | Impact | Recommended Solution |
|
| 224 |
+
|---|---------|--------|---------------------|
|
| 225 |
+
| L1 | Extension only has Arabic locale | Cannot target non-Arabic Chrome Web Store users | Add `_locales/en/messages.json` |
|
| 226 |
+
| L2 | No analytics or telemetry | No visibility into usage patterns, error rates, or feature adoption | Add lightweight privacy-respecting event tracking |
|
| 227 |
+
| L3 | Vendor libraries loaded synchronously | `mammoth.browser.min.js` (340KB), `docx.umd.js` (1.2MB), `html2canvas.min.js` (210KB) block initial render even if never used | Lazy-load on first export action |
|
| 228 |
+
| L4 | No service worker for website | No offline caching for static assets | Add basic SW for asset caching |
|
| 229 |
+
| L5 | No onboarding flow | First-time users see empty editor with no guidance | Add sample text + guided tooltips |
|
| 230 |
+
|
| 231 |
+
---
|
| 232 |
+
|
| 233 |
+
## 5. Bugs Found
|
| 234 |
+
|
| 235 |
+
### Active Bugs
|
| 236 |
+
|
| 237 |
+
| # | Bug | Severity | Location | Details |
|
| 238 |
+
|---|-----|----------|----------|---------|
|
| 239 |
+
| B1 | **`/api/punctuation` has no `MAX_TEXT_LENGTH` check** | **High** | `app.py:596-647` | All other text endpoints enforce `MAX_TEXT_LENGTH = 5000`. Punctuation endpoint accepts unlimited input, allowing resource exhaustion via a single large request. |
|
| 240 |
+
| B2 | **Race condition in `_isApplyingSuggestion` timing** | **High** | `editor.js` | Guard resets after 400ms but `analyzeText()` is called after 300ms. 100ms window where a suggestion application triggers recursive analysis, corrupting state. |
|
| 241 |
+
| B3 | **Undo stack captures error overlay HTML** | **Medium** | `editor.js` | `pushUndoState()` saves `editor.innerHTML` including colored suggestion `<span>` elements. Undoing restores stale suggestion markup that doesn't correspond to current analysis. |
|
| 242 |
+
| B4 | **`getEditorText()` clones entire DOM on every keystroke** | **Medium** | `selection.js` | `editor.cloneNode(true)` called on every `input` event via `updateEditorStats()`. For large documents, this is a significant performance hit. |
|
| 243 |
+
| B5 | **Zero-width space from `formatFontSize` causes offset errors** | **Medium** | `format.js:126` | Inserts `` (zero-width space) when selection is collapsed. This invisible character is counted in text offsets, causing off-by-one errors in suggestion positions. |
|
| 244 |
+
| B6 | **`restoreSelection` broken for non-collapsed selections** | **Medium** | `selection.js` | For range selections, the start Range is created but never added to the Selection object. `getRangeAt(0)` then operates on the browser's stale selection state. |
|
| 245 |
+
| B7 | **Color picker reset removes ALL formatting** | **Medium** | `format.js:335` | Reset button calls `removeFormat` which strips ALL formatting (bold, italic, etc.), not just the color. |
|
| 246 |
+
| B8 | **`overlaySuggestions` skips `.quran-applied` check on rebuilds** | **Medium** | `renderer.js:349-351` | Initial text node walk (line 253-256) skips `.quran-applied` nodes, but the per-suggestion rebuild at line 349 does NOT, causing protected Quran text to be modified. |
|
| 247 |
+
| B9 | **`/api/quran` bypasses Content-Type check** | **Low** | `app.py` | Uses `request.get_json(force=True)` which accepts any Content-Type. All other endpoints properly check `request.is_json` first. |
|
| 248 |
+
| B10 | **`/api/quran` inconsistent response format** | **Low** | `app.py` | Returns bare `jsonify(result)` without wrapping in `{'status': 'success', ...}` format used by all other endpoints. |
|
| 249 |
+
| B11 | **`/api/autocomplete` `n` parameter unbounded** | **Low** | `app.py` | `n` is cast to int without bounds checking. `n=1000000` would attempt to generate a million suggestions. |
|
| 250 |
+
| B12 | **`updateSummaryLength()` is a no-op** | **Low** | `index.html:~1920` | Empty function body — the summary length slider label never updates to reflect the selected value. |
|
| 251 |
+
| B13 | **Extension overlay position breaks in scrollable containers** | **Medium** | `content-inline.js` | Overlay positioned with `getBoundingClientRect() + window.scrollY` (absolute). Breaks when text field is inside a scrollable `<div>` rather than the window. Tracks window scroll but not ancestor scroll. |
|
| 252 |
+
| B14 | **Infinite retry loop in autocomplete init** | **Low** | `autocomplete.js:31` | `setTimeout(init, 500)` with no retry limit if `#editor-container` is not found. |
|
| 253 |
+
| B15 | **Settings sync circular write** | **Low** | `settings-sync.js` | When cloud settings are loaded, `setTheme()` dispatches `bayan:themechange`, which triggers `onSettingsChanged()`, which saves the same theme back to cloud — wasteful round-trip. |
|
| 254 |
+
| B16 | **Sync queue not cleared on logout** | **Low** | `auth.js:128-156` | `signOut()` does not call `SyncQueue.clear()`. Pending queue entries (containing document content) persist for the next user. |
|
| 255 |
+
| B17 | **`_escapeSummaryAttr()` incomplete HTML escaping** | **Medium** | `summaries-ui.js` | Only escapes `"`, not `&`, `<`, `>`. Potential stored XSS vector if summary text contains HTML characters. |
|
| 256 |
+
| B18 | **`summaries-ui.js` null crash risk** | **Low** | `summaries-ui.js:87` | `item.summary_text.length` will throw TypeError if `summary_text` is null/undefined. |
|
| 257 |
+
|
| 258 |
+
### Previously Fixed Bugs
|
| 259 |
+
|
| 260 |
+
| # | Bug | Status |
|
| 261 |
+
|---|-----|--------|
|
| 262 |
+
| B-F1 | Score sparkline renders with only 2 data points | ✅ Fixed |
|
| 263 |
+
| B-F2 | `dismissAllFiltered()` only removed DOM without updating `window.currentSuggestions` | ✅ Fixed |
|
| 264 |
+
|
| 265 |
+
---
|
| 266 |
+
|
| 267 |
+
## 6. Security Issues
|
| 268 |
+
|
| 269 |
+
| # | Issue | Severity | Location | Details |
|
| 270 |
+
|---|-------|----------|----------|---------|
|
| 271 |
+
| S1 | **CORS wildcard `origins: "*"`** | **Critical** | `app.py:94` | `CORS(app, resources={r"/api/*": {"origins": "*"}})` allows any origin to call all API endpoints. Enables compute theft, DDoS via free proxy, third-party scraping of NLP capabilities. |
|
| 272 |
+
| S2 | **No API authentication on any endpoint** | **Critical** | `app.py`, all `/api/*` routes | No JWT, API key, session check, or rate limiting on any endpoint. Combined with wildcard CORS, any HTTP client can consume compute resources without limits. |
|
| 273 |
+
| S3 | **Debug endpoint publicly accessible** | **High** | `app.py:243-277` | `/api/debug-models` requires no authentication. Exposes: model load status, startup error messages, system memory usage (`/proc/meminfo` contents), HF_API_TOKEN existence. |
|
| 274 |
+
| S4 | **`trust_remote_code=True` for grammar model** | **High** | `model_loader.py:706` | Grammar model loaded with `trust_remote_code=True`, allowing arbitrary code execution from the HF model repository. All other models correctly use `False`. |
|
| 275 |
+
| S5 | **Unsafe pickle deserialization** | **High** | `autocomplete_service.py:100` | `pickle.load(f)` on a file downloaded from HuggingFace Hub. Pickle can execute arbitrary code during deserialization. |
|
| 276 |
+
| S6 | **Unsafe torch checkpoint loading** | **High** | `araspell_service.py:72` | `torch.load(model_path, weights_only=False)` disables PyTorch's safe loading, allowing arbitrary code execution via crafted checkpoint files. |
|
| 277 |
+
| S7 | **Missing RLS migration files for core tables** | **High** | `supabase/migrations/` | Only `001_profiles.sql` exists. `documents`, `summaries`, `settings` tables have RLS documented but not version-controlled. Cannot verify RLS is enabled in production from codebase. |
|
| 278 |
+
| S8 | **XSS risk in document content** | **Medium** | `documents-ui.js:196` | Document content stored as HTML and loaded into the editor. If `loadDocumentText()` uses `innerHTML` without sanitization, stored XSS is possible. `_escapeHtml()` helper exists but is only used for document list rendering, not content loading. |
|
| 279 |
+
| S9 | **Document CRUD relies solely on RLS** | **Medium** | `documents-api.js:68-148` | `loadDocument()`, `saveDocument()`, `renameDocument()`, `deleteDocument()` filter only by document `id`, not by `user_id`. If RLS were misconfigured, any authenticated user could access any user's documents. |
|
| 280 |
+
| S10 | **HTML injection risk in meta tag injection** | **Medium** | `app.py:189` | `f'<meta name="supabase-url" content="{SUPABASE_URL}">'` — if `SUPABASE_URL` contains `">`, it could break the HTML structure. No HTML escaping applied. |
|
| 281 |
+
| S11 | **Telemetry data leaked to clients** | **Medium** | `app.py:~2745` | `_tel_events` list containing internal pipeline diagnostics (filter rejections, grammar diffs, Jaccard scores) is returned in the API response. Exposes internal processing details. |
|
| 282 |
+
| S12 | **Extension Trusted Types passthrough** | **Low** | `content-inline.js:32-39` | `trustedTypes.createPolicy()` uses identity transform `(input) => input` — passes CSP enforcement but provides zero sanitization. All callers must ensure safety independently. |
|
| 283 |
+
| S13 | **Auth tokens in localStorage (no CSP)** | **Low** | `auth/client.js:27` | Supabase tokens stored in localStorage, vulnerable to XSS. No Content Security Policy configured in Flask to mitigate XSS risks. Standard Supabase pattern, but defense-in-depth gap. |
|
| 284 |
+
| S14 | **`DEBUG_TRACE = True` hardcoded** | **Low** | `app.py:90` | Verbose trace logging enabled unconditionally in production. May expose sensitive processing details in log aggregators. |
|
| 285 |
+
|
| 286 |
+
---
|
| 287 |
+
|
| 288 |
+
## 7. Performance Issues
|
| 289 |
+
|
| 290 |
+
| # | Issue | Severity | Location | Details |
|
| 291 |
+
|---|-------|----------|----------|---------|
|
| 292 |
+
| P1 | **Grammar model is a remote API call** | **High** | `grammar_service.py:97-100` | Every grammar correction requires a round-trip to an external Gradio Space. If the Space sleeps (HF free tier), first request has 10-30s cold start. 3 retries with exponential backoff, but latency is fundamentally unpredictable (3-8s typical). |
|
| 293 |
+
| P2 | **Duplicate morphological analysis in grammar rules** | **High** | `grammar_rules.py` | 7 separate grammar rule functions each call `self.mle.disambiguate(tokens)` independently: `fix_number_and_gender_agreement`, `fix_verbs_nasb_and_jazm`, `fix_subject_verb_agreement`, `fix_conditional_sentences`, `fix_demonstrative_agreement`, `fix_noun_adjective_agreement_advanced`, `fix_kana_and_inna`. For a 50-word sentence, this is 7 full morphological analysis passes that could be done once. |
|
| 294 |
+
| P3 | **MLM scoring per word in spelling** | **High** | `araspell_rules.py`, ContextualCorrector | `score_with_mlm` runs a full AraBERT forward pass for each OOV word. `refine_sentence_with_mask` calls `score_with_mlm` twice + `predict_masked_token` per OOV word. For a 20-word sentence with 5 OOV words, this is ~15 BERT forward passes. |
|
| 295 |
+
| P4 | **Tailwind CDN dev mode in production** | **Medium** | `index.html` | Full Tailwind CSS (~3MB uncompressed) downloaded via CDN development script on every page load. Should use a production build with purged CSS. |
|
| 296 |
+
| P5 | **`analyze_text()` is a 1,224-line function** | **Medium** | `app.py:1534-2758` | Contains entire 3-stage pipeline with all guards, filters, and telemetry inline. Cold start loads all imports. `_is_small_spelling_change()` is 513 lines. |
|
| 297 |
+
| P6 | **12+ `import re as _re_*` statements inside function body** | **Medium** | `app.py` | 12 separate `import re as _re_spell_guard`, `import re as _re_strip`, `import re as _re_emoji`, etc. inside `analyze_text()`. While Python caches modules, these are called on every request. Should be module-level. |
|
| 298 |
+
| P7 | **`getEditorText()` clones entire DOM per keystroke** | **Medium** | `selection.js` | Called on every `input` event via `updateEditorStats()`. `editor.cloneNode(true)` for large documents is expensive. |
|
| 299 |
+
| P8 | **Vendor JS loaded synchronously** | **Medium** | `index.html` | mammoth (340KB), docx.js (1.2MB), html2canvas (210KB) all block initial render even if never used. |
|
| 300 |
+
| P9 | **`overlaySuggestions` is O(N×M)** | **Medium** | `renderer.js:349` | Rebuilds text node map after EVERY suggestion application, where N = suggestions, M = text nodes. |
|
| 301 |
+
| P10 | **No API response caching on website** | **Medium** | `editor.js` | Every keystroke after 1s debounce triggers a full `/api/analyze` call. Extension background worker has LRU cache (20 entries, 5min TTL), but website doesn't cache at all. |
|
| 302 |
+
| P11 | **Extension content script injected on ALL sites** | **Medium** | `manifest.json:43-55` | `matches: ["https://*/*", "http://*/*"]` — content script loads on every page, even non-Arabic sites. |
|
| 303 |
+
| P12 | **Undo stack stores 50 full innerHTML snapshots** | **Low** | `editor.js` | For large documents with formatting, each snapshot can be 100KB+. 50 snapshots = 5MB+ of memory. |
|
| 304 |
+
| P13 | **CSS not minified** | **Low** | `components.css` | Single file at 3,639+ lines (~90KB). No CSS modules, no scoping, no minification. |
|
| 305 |
+
| P14 | **Draft auto-save serializes full editor HTML per keystroke** | **Low** | `editor.js` | `localStorage.setItem('bayan_editor_draft', editor.innerHTML)` on every input event. |
|
| 306 |
+
|
| 307 |
+
---
|
| 308 |
+
|
| 309 |
+
## 8. UX Problems
|
| 310 |
+
|
| 311 |
+
| # | Issue | Severity | Details |
|
| 312 |
+
|---|-------|----------|---------|
|
| 313 |
+
| U1 | **Native `prompt()`/`confirm()` dialogs mixed with custom UI** | **Medium** | `insertLink()` uses `prompt()`, `clearEditor()` uses `confirm()`, `_createNewDocument()`/`_startRename()` use `prompt()`, `setWordGoalUI()` uses `prompt()`. These break visual consistency and cannot be styled. `_confirmDelete()` correctly uses custom `showConfirmDialog`. |
|
| 314 |
+
| U2 | **Extension content script tooltip clips at viewport edge** | **Medium** | Tooltip for highlighted errors can overflow off-screen on narrow viewports. No boundary detection or repositioning logic. |
|
| 315 |
+
| U3 | **No loading skeleton on initial editor page** | **Medium** | Editor page shows blank white space during model initialization (~60s cold start on HF Spaces). No skeleton/shimmer to indicate loading state. |
|
| 316 |
+
| U4 | **Extension popup loses all state on close** | **Medium** | Popup has no state persistence. Clicking away destroys all analysis results. SidePanel correctly persists via `chrome.storage.session`. |
|
| 317 |
+
| U5 | **Extension ghost-text autocomplete only works on textarea/input** | **Medium** | Most web editors (Gmail compose, WordPress, Medium, Discourse, Slack) use contenteditable. Ghost text autocomplete is disabled on all of these. |
|
| 318 |
+
| U6 | **Inconsistent branding between popup and sidepanel** | **Low** | Popup uses `.bayan-*` class prefix, SidePanel uses `.sp-*`. Different color palettes and CSS variable naming (`--bayan-*` vs `--sp-*`). |
|
| 319 |
+
| U7 | **Mobile bottom-sheet for suggestions lacks smooth gestures** | **Low** | Website has responsive breakpoints but the suggestion panel bottom-sheet on mobile has no drag-to-dismiss or smooth gesture handling. |
|
| 320 |
+
| U8 | **Summary length slider label never updates** | **Low** | `updateSummaryLength()` is an empty function. Slider works but the label always shows "medium" regardless of position. |
|
| 321 |
+
| U9 | **Missing accessibility features** | **Low** | No skip navigation link, no focus trap in Quran modal, no keyboard navigation for suggestion cards (only Enter key), no `aria-live` regions for dynamic score updates. |
|
| 322 |
+
| U10 | **Protected sites disable contenteditable analysis entirely** | **Low** | Gmail, Google Docs, Notion, Sheets, Slides — contenteditable is disabled by protection list. Only `<textarea>`/`<input>` elements work on these sites. Expected but not communicated to users. |
|
| 323 |
+
|
| 324 |
+
---
|
| 325 |
+
|
| 326 |
+
## 9. Technical Debt
|
| 327 |
+
|
| 328 |
+
### Backend
|
| 329 |
+
|
| 330 |
+
| # | Item | Severity | Details |
|
| 331 |
+
|---|------|----------|---------|
|
| 332 |
+
| TD1 | **`analyze_text()`: 1,224-line function** | **High** | Contains entire 3-stage pipeline with all guards, filters, offset mapping, telemetry, and error handling. Should be decomposed into per-stage functions. |
|
| 333 |
+
| TD2 | **`_is_small_spelling_change()`: 513-line function** | **High** | Single function with deeply nested conditionals implementing 20+ safety guards. |
|
| 334 |
+
| TD3 | **Dead code: `SpellingModel`/`AutocompleteModel`/`GrammarModel`/`PunctuationModel` classes** | **Medium** | `model_loader.py:385-903`: Imported in `app.py:45-56` but NEVER instantiated. All models loaded through their respective service modules. The globals `spelling_model`, `autocomplete_model`, `grammar_model`, `punctuation_model` (lines 102-106) are always `None`. |
|
| 335 |
+
| TD4 | **Dead code: `hf_inference.py`** | **Medium** | All functions are stubs that return input unchanged or empty lists. Imported in `app.py:65` but functions are never called in the pipeline. |
|
| 336 |
+
| TD5 | **Two `RulesBasedCorrector` class definitions** | **Medium** | `araspell_rules.py`: First class at line ~38 with `KEYBOARD_NEIGHBORS`, second class at line ~540 with identical `KEYBOARD_NEIGHBORS`. Second class overwrites the first. |
|
| 337 |
+
| TD6 | **Question mark cue words defined 5 times** | **Medium** | `_EXCL_CUES = {'هل', 'أين', ...}` defined at 5 separate locations in `punctuation_service.py` and `punctuation_rules.py`. |
|
| 338 |
+
| TD7 | **12+ `import re` aliased inside function body** | **Medium** | `import re as _re_spell_guard`, `import re as _re_strip`, `import re as _re_emoji`, etc. — 12 aliased re imports inside `analyze_text()` instead of one module-level import. |
|
| 339 |
+
| TD8 | **`Grammrar` typo in path** | **Low** | `model_loader.py:36`: `GRAMMAR_PATH = MODEL_BASE_PATH / "Grammrar" / "Model"` — misspelled directory name. Works only because the actual directory has the same typo. |
|
| 340 |
+
| TD9 | **`ENABLE_*_MODEL` flags never checked** | **Low** | `app.py:59-63`: `ENABLE_DIALECT_MODEL`, `ENABLE_PUNCTUATION_MODEL`, etc. declared but never referenced. Features use lazy-loading regardless. |
|
| 341 |
+
| TD10 | **12+ test files at project root** | **Low** | `test_camel.py`, `test_colon.py`, `test_grammar_fast.py`, `test_mapper.py`, `debug_pc002.py`, etc. scattered in root instead of `tests/`. |
|
| 342 |
+
| TD11 | **`import json as _tel_json` and `import re as _re_struct` inside function** | **Low** | `app.py:2209, 2186`: Imports inside `analyze_text()` function body instead of module level. |
|
| 343 |
+
|
| 344 |
+
### Frontend (Website)
|
| 345 |
+
|
| 346 |
+
| # | Item | Severity | Details |
|
| 347 |
+
|---|------|----------|---------|
|
| 348 |
+
| TD12 | **`src/js/api.js` is dead code** | **Medium** | Uses ES6 `export` syntax but loaded via `<script>` tag (not `type="module"`). Exports are never imported. Website uses inline `fetch()` calls in `editor.js`. |
|
| 349 |
+
| TD13 | **`applySuggestionAtOffsets` and `applyAlternativeCorrection` ~90% identical** | **Medium** | `editor.js`: Nearly identical DOM manipulation, filtering, and count-updating code. Should be a single function with a correction text parameter. |
|
| 350 |
+
| TD14 | **`_sendFeedback()` defined but never called** | **Low** | `editor.js`: Feedback function exists but no UI element invokes it. |
|
| 351 |
+
| TD15 | **`renderer.js` `createSegments()` first pass unused** | **Low** | Lines 42-93: Event timeline with `events`/`activeSuggestions` produces `segments` that are never used. Only `finalSegments` from the second pass (lines 96-131) is returned. |
|
| 352 |
+
| TD16 | **33 script tags with implicit load-order dependency** | **Medium** | No module system, no dependency declaration. Mixed patterns: `api.js` uses ES6 `export`, `renderer.js`/`selection.js` use CommonJS guards, everything else is plain globals. |
|
| 353 |
+
| TD17 | **~1,124 lines of inline JavaScript in `index.html`** | **Medium** | Page navigation, tab switching, Quran/dialect/summarization logic, Element SDK integration, DOMContentLoaded init — all inline instead of in separate files. |
|
| 354 |
+
| TD18 | **CSS duplication and inconsistency** | **Low** | Multiple duplicate declarations in `components.css`: `.skeleton`, `input[type="range"]`, `.empty-state`, `.editor-stats`, `.footer-bar`, `.card-hover:hover`, `@keyframes fadeIn`. Legacy `--primary-color` coexists with canonical `--color-primary`. Undefined variables `--font-size-sm` and `--font-size-base` referenced. |
|
| 355 |
+
| TD19 | **No build system** | **Low** | No bundler, no tree-shaking, no code-splitting. All JS loaded via `<script>` tags. No asset hashing for cache busting. |
|
| 356 |
+
|
| 357 |
+
### Extension
|
| 358 |
+
|
| 359 |
+
| # | Item | Severity | Details |
|
| 360 |
+
|---|------|----------|---------|
|
| 361 |
+
| TD20 | **60-70% code duplication between `popup.js` and `sidepanel.js`** | **High** | `updateCounts()`, `showToast()`, `setLoading()`, `downloadTxt()`, tab switching, `renderSuggestions()`, summarize/dialect/quran/autocomplete handlers — all nearly identical in both files. Any bug fix must be applied in both places. |
|
| 362 |
+
| TD21 | **Dead code: `content.js`** | **Low** | 12-line stub file, not loaded by manifest. |
|
| 363 |
+
| TD22 | **Dead code: `bayan-state.js`** | **Low** | 127-line WeakRef-based field tracking module, not loaded by manifest or any HTML file. Content script uses local variables instead. |
|
| 364 |
+
| TD23 | **Dual API paths: background.js vs direct fetch** | **Medium** | Content script inline analysis goes through `background.js` (with caching, retry, timeout). Popup/SidePanel call `bayan-api.js` directly via `fetch()` (no caching, no retry, no timeout). Ghost-text autocomplete in content script also calls `fetch()` directly, bypassing background. |
|
| 365 |
+
| TD24 | **No timeouts on popup/sidepanel API calls** | **Medium** | `bayan-api.js` functions accept an optional `AbortSignal` but no caller passes one. If the API hangs, the loading overlay blocks indefinitely. |
|
| 366 |
+
| TD25 | **CSS variable duplication** | **Low** | Popup uses `--bayan-*` variables, sidepanel uses `--sp-*` variables, both defining the same color values. |
|
| 367 |
+
|
| 368 |
+
---
|
| 369 |
+
|
| 370 |
+
## 10. Recommended Roadmap
|
| 371 |
+
|
| 372 |
+
### Phase 1: Security Hardening (Critical — Before Any Growth)
|
| 373 |
+
|
| 374 |
+
**Timeline: 1-2 days** | **Priority: CRITICAL**
|
| 375 |
+
|
| 376 |
+
| # | Task | Effort |
|
| 377 |
+
|---|------|--------|
|
| 378 |
+
| 1 | **Restrict CORS** — Change `origins: "*"` to allowlist `["https://bayan10-bayan-api.hf.space", "chrome-extension://<ext-id>"]` | 30 min |
|
| 379 |
+
| 2 | **Add rate limiting** — Flask-Limiter: 30 req/min/IP for `/api/analyze`, 10/min for others | 1 hour |
|
| 380 |
+
| 3 | **Disable debug endpoint** — Guard `/api/debug-models` behind `app.debug` flag or remove | 15 min |
|
| 381 |
+
| 4 | **Fix `trust_remote_code`** — Change to `False` at `model_loader.py:706` | 5 min |
|
| 382 |
+
| 5 | **Add `MAX_TEXT_LENGTH` check to `/api/punctuation`** and `/api/analyze` | 15 min |
|
| 383 |
+
| 6 | **Bound `/api/autocomplete` `n` parameter** — Cap at `n=10` | 5 min |
|
| 384 |
+
| 7 | **Set `DEBUG_TRACE = False`** in production, or gate behind env var | 5 min |
|
| 385 |
+
| 8 | **Stop leaking telemetry** — Remove `_tel_events` from API response (or gate behind debug flag) | 15 min |
|
| 386 |
+
| 9 | **Escape HTML in meta tag injection** — Use `html.escape()` for Supabase URL/key injection | 10 min |
|
| 387 |
+
| 10 | **Clear sync queue on logout** — Add `SyncQueue.clear()` to `signOut()` | 10 min |
|
| 388 |
+
|
| 389 |
+
### Phase 2: Database & Migration Integrity (High)
|
| 390 |
+
|
| 391 |
+
**Timeline: 1-2 days** | **Priority: HIGH**
|
| 392 |
+
|
| 393 |
+
| # | Task | Effort |
|
| 394 |
+
|---|------|--------|
|
| 395 |
+
| 1 | **Create `002_documents.sql`** with proper schema + RLS policies | 2 hours |
|
| 396 |
+
| 2 | **Create `003_summaries.sql`** and `004_settings.sql` with RLS | 1 hour |
|
| 397 |
+
| 3 | **Add `user_id` filter to single-document operations** — defense-in-depth alongside RLS | 30 min |
|
| 398 |
+
| 4 | **Add soft-delete to documents** — `deleted_at` column instead of hard delete | 1 hour |
|
| 399 |
+
|
| 400 |
+
### Phase 3: Extension Auth Unification (High)
|
| 401 |
+
|
| 402 |
+
**Timeline: 3-5 days** | **Priority: HIGH**
|
| 403 |
+
|
| 404 |
+
| # | Task | Effort |
|
| 405 |
+
|---|------|--------|
|
| 406 |
+
| 1 | **Add Supabase client to extension** — UMD bundle in `shared/` | 1 day |
|
| 407 |
+
| 2 | **Implement auth flow** — `chrome.identity.launchWebAuthFlow()` for Google OAuth | 1 day |
|
| 408 |
+
| 3 | **Session persistence** — Store refresh token in `chrome.storage.local` | 4 hours |
|
| 409 |
+
| 4 | **Enable cloud documents in extension** — Wire up existing SidePanel document UI | 1 day |
|
| 410 |
+
| 5 | **Sync dismissed words** — Persist to `chrome.storage.local` and optionally to cloud | 2 hours |
|
| 411 |
+
|
| 412 |
+
### Phase 4: Backend Refactoring (High)
|
| 413 |
+
|
| 414 |
+
**Timeline: 5-7 days** | **Priority: HIGH**
|
| 415 |
+
|
| 416 |
+
| # | Task | Effort |
|
| 417 |
+
|---|------|--------|
|
| 418 |
+
| 1 | **Decompose `analyze_text()`** into `spelling_stage()`, `grammar_stage()`, `punctuation_stage()` | 2 days |
|
| 419 |
+
| 2 | **Cache morphological analysis** — Run `mle.disambiguate()` once, pass result to all 7 grammar rules | 4 hours |
|
| 420 |
+
| 3 | **Move 12+ `import re` to module level** — Single `import re` at top of file | 30 min |
|
| 421 |
+
| 4 | **Delete dead code** — `hf_inference.py` stubs, unused `model_loader.py` classes, `ENABLE_*` flags | 1 hour |
|
| 422 |
+
| 5 | **Split `app.py`** into `routes/`, `services/`, `middleware/` | 2 days |
|
| 423 |
+
| 6 | **Move root-level test files** into `tests/` | 30 min |
|
| 424 |
+
|
| 425 |
+
### Phase 5: Extension Code Quality (Medium)
|
| 426 |
+
|
| 427 |
+
**Timeline: 3-4 days** | **Priority: MEDIUM**
|
| 428 |
+
|
| 429 |
+
| # | Task | Effort |
|
| 430 |
+
|---|------|--------|
|
| 431 |
+
| 1 | **Extract shared logic** from `popup.js` and `sidepanel.js` into `shared/bayan-core.js` | 1.5 days |
|
| 432 |
+
| 2 | **Unify API path** — Route popup/sidepanel API calls through background.js for consistent caching/retry/timeout | 1 day |
|
| 433 |
+
| 3 | **Delete dead files** — `content.js`, `bayan-state.js` | 15 min |
|
| 434 |
+
| 4 | **Add AbortController timeouts** to `bayan-api.js` functions (60s default) | 2 hours |
|
| 435 |
+
| 5 | **Add English locale** — `_locales/en/messages.json` | 2 hours |
|
| 436 |
+
|
| 437 |
+
### Phase 6: Frontend Fixes & Polish (Medium)
|
| 438 |
+
|
| 439 |
+
**Timeline: 3-4 days** | **Priority: MEDIUM**
|
| 440 |
+
|
| 441 |
+
| # | Task | Effort |
|
| 442 |
+
|---|------|--------|
|
| 443 |
+
| 1 | **Fix `_isApplyingSuggestion` race condition** — Increase guard timeout from 400ms to 600ms, or use a completion callback instead of timer | 30 min |
|
| 444 |
+
| 2 | **Fix `restoreSelection` for range selections** — Add range to selection after creation | 30 min |
|
| 445 |
+
| 3 | **Fix undo stack** — Strip suggestion overlay spans before saving innerHTML snapshot | 1 hour |
|
| 446 |
+
| 4 | **Replace native `prompt()`/`confirm()` with custom dialogs** | 4 hours |
|
| 447 |
+
| 5 | **Fix color picker reset** — Only remove color/highlight, not all formatting | 30 min |
|
| 448 |
+
| 6 | **Switch Tailwind to production build** — Purge unused CSS, save ~3MB per page load | 2 hours |
|
| 449 |
+
| 7 | **Lazy-load vendor libs** — mammoth, docx, html2canvas on first use | 2 hours |
|
| 450 |
+
| 8 | **Delete dead `api.js`** and unused `createSegments()` first pass | 30 min |
|
| 451 |
+
|
| 452 |
+
### Phase 7: Performance Optimization (Low)
|
| 453 |
+
|
| 454 |
+
**Timeline: 2-3 days** | **Priority: LOW**
|
| 455 |
+
|
| 456 |
+
| # | Task | Effort |
|
| 457 |
+
|---|------|--------|
|
| 458 |
+
| 1 | **Add website-side API caching** — localStorage TTL cache like extension background worker | 4 hours |
|
| 459 |
+
| 2 | **Optimize `getEditorText()`** — Extract text without full DOM clone | 2 hours |
|
| 460 |
+
| 3 | **Fix `overlaySuggestions` O(N×M)** — Build text node map once, update incrementally | 4 hours |
|
| 461 |
+
| 4 | **Add CSS/JS minification** to Docker build | 2 hours |
|
| 462 |
+
| 5 | **Add loading skeletons** for editor page cold start | 2 hours |
|
| 463 |
+
| 6 | **Add `content_security_policy`** to extension manifest | 30 min |
|
| 464 |
+
|
| 465 |
+
---
|
| 466 |
+
|
| 467 |
+
## Summary Matrix
|
| 468 |
+
|
| 469 |
+
| Category | Critical | High | Medium | Low | Total |
|
| 470 |
+
|----------|---------|------|--------|-----|-------|
|
| 471 |
+
| **Missing Features** | 3 (C1-C3) | 5 (H1-H5) | 6 (M1-M6) | 5 (L1-L5) | **19** |
|
| 472 |
+
| **Bugs** | 0 | 2 (B1-B2) | 8 (B3-B8, B13, B17) | 6 (B9-B12, B14-B16, B18) | **18** |
|
| 473 |
+
| **Security** | 2 (S1-S2) | 4 (S3-S6, S7) | 4 (S8-S11) | 3 (S12-S14) | **14** |
|
| 474 |
+
| **Performance** | 0 | 3 (P1-P3) | 7 (P4-P10, P11) | 3 (P12-P14) | **14** |
|
| 475 |
+
| **UX** | 0 | 0 | 5 (U1-U5) | 5 (U6-U10) | **10** |
|
| 476 |
+
| **Tech Debt** | 0 | 3 (TD1-TD2, TD20) | 10 | 12 | **25** |
|
| 477 |
+
| **TOTAL** | **5** | **17** | **40** | **34** | **100** |
|
| 478 |
+
|
| 479 |
+
---
|
| 480 |
+
|
| 481 |
+
## Final Verdict
|
| 482 |
+
|
| 483 |
+
Bayan is a technically impressive Arabic NLP platform with a well-designed multi-stage correction pipeline (Spelling → Grammar → Punctuation), sophisticated offset mapping via PipelineContext/OffsetMapper/StageLocker, a mature contenteditable editor engine, and a Chrome extension that correctly follows Manifest V3 best practices.
|
| 484 |
+
|
| 485 |
+
### What Bayan Does Well
|
| 486 |
+
|
| 487 |
+
- **NLP Pipeline Architecture**: PipelineContext + PatchSet + StageLocker provide deterministic multi-stage coordination with overlap resolution and hierarchical locking. 20+ safety guards prevent hallucinations.
|
| 488 |
+
- **Editor Engine**: Custom contenteditable with character-offset-based selection save/restore, reverse-order suggestion processing to avoid offset invalidation, and overlay-only rendering that never modifies user DOM.
|
| 489 |
+
- **Extension Design**: Minimal permissions, proper HTML escaping throughout, thoughtful protected-site handling, LRU cache with collision-safe hashing, overlay-only rendering on 3rd-party sites.
|
| 490 |
+
- **Auth Architecture**: Clean layered design (config → client → session → auth → UI) with PKCE flow, guest-to-Google upgrade path, `window.__bayanAuth` facade for decoupled downstream consumption, and graceful offline degradation.
|
| 491 |
+
- **Sync System**: Offline-first with persistent localStorage queue, debounced flush, mutex-guarded sync, and automatic reconnection.
|
| 492 |
+
- **Benchmark Coverage**: 320 tests across 8 datasets (spelling, grammar, punctuation, entities, religious, structured, hallucination, collision) at 94.37% pass rate.
|
| 493 |
+
|
| 494 |
+
### What Must Be Fixed Before Growth
|
| 495 |
+
|
| 496 |
+
1. **Security** (5 critical/high items): Wildcard CORS + zero rate limiting + zero API auth = anyone can abuse compute. Debug endpoint leaks internals. `trust_remote_code=True` and `weights_only=False` allow arbitrary code execution from model repos.
|
| 497 |
+
2. **Extension Auth Gap**: Extension users cannot access cloud documents, settings, or history — breaks the SaaS value proposition entirely.
|
| 498 |
+
3. **Database Integrity**: No migration files for 3 of 4 core tables. RLS policies documented but unverifiable from codebase.
|
| 499 |
+
4. **Performance Bottleneck**: Grammar stage does 7 redundant morphological analysis passes. Spelling stage runs O(N) BERT forward passes for MLM scoring. Grammar depends on an external Gradio Space with unpredictable latency.
|
| 500 |
+
5. **Code Architecture**: `analyze_text()` at 1,224 lines and `_is_small_spelling_change()` at 513 lines are unmaintainable. 60-70% popup/sidepanel duplication means every bug fix must be applied twice.
|
| 501 |
+
|
| 502 |
+
### Bottom Line
|
| 503 |
+
|
| 504 |
+
Bayan is **80% of the way to a production-grade SaaS product**. The NLP pipeline, editor engine, and extension architecture are solid foundations. The remaining 20% is:
|
| 505 |
+
|
| 506 |
+
- **Week 1**: Security hardening (CORS, rate limiting, debug endpoint, model loading) + database migrations with RLS
|
| 507 |
+
- **Week 2**: Extension authentication + cloud document access
|
| 508 |
+
- **Week 3**: Backend decomposition + grammar performance optimization + extension code deduplication
|
| 509 |
+
|
| 510 |
+
Total estimated effort: **3-4 focused weeks** to reach production readiness.
|
{reports → archive/benchmark_reports}/Phase10_Post_IVtoOOV_Audit.md
RENAMED
|
File without changes
|
{reports → archive/benchmark_reports}/benchmark_audit.md
RENAMED
|
File without changes
|
{reports → archive/benchmark_reports}/benchmark_samples.md
RENAMED
|
File without changes
|
{reports → archive/benchmark_reports}/regression_benchmark_audit.md
RENAMED
|
File without changes
|
debug_pc002.py → archive/dev_tests/debug_pc002.py
RENAMED
|
File without changes
|
debug_pc023.py → archive/dev_tests/debug_pc023.py
RENAMED
|
File without changes
|
debug_pipeline.py → archive/dev_tests/debug_pipeline.py
RENAMED
|
File without changes
|
debug_punctuation.py → archive/dev_tests/debug_punctuation.py
RENAMED
|
File without changes
|
extract_failures.py → archive/dev_tests/extract_failures.py
RENAMED
|
File without changes
|
extract_grammar_fails.py → archive/dev_tests/extract_grammar_fails.py
RENAMED
|
File without changes
|
extract_pc023.py → archive/dev_tests/extract_pc023.py
RENAMED
|
File without changes
|
test_camel.py → archive/dev_tests/test_camel.py
RENAMED
|
File without changes
|
test_colon.py → archive/dev_tests/test_colon.py
RENAMED
|
File without changes
|
test_failures.py → archive/dev_tests/test_failures.py
RENAMED
|
File without changes
|
test_grammar_fast.py → archive/dev_tests/test_grammar_fast.py
RENAMED
|
File without changes
|
test_grammar_fixes.py → archive/dev_tests/test_grammar_fixes.py
RENAMED
|
File without changes
|
test_grammar_logic.py → archive/dev_tests/test_grammar_logic.py
RENAMED
|
File without changes
|
test_grammar_only.py → archive/dev_tests/test_grammar_only.py
RENAMED
|
File without changes
|
test_grammar_rules.py → archive/dev_tests/test_grammar_rules.py
RENAMED
|
File without changes
|
test_kana.py → archive/dev_tests/test_kana.py
RENAMED
|
File without changes
|
test_local.py → archive/dev_tests/test_local.py
RENAMED
|
File without changes
|
test_mapper.py → archive/dev_tests/test_mapper.py
RENAMED
|
File without changes
|
test_mapper_isolated.py → archive/dev_tests/test_mapper_isolated.py
RENAMED
|
File without changes
|
test_mlm.py → archive/dev_tests/test_mlm.py
RENAMED
|
File without changes
|
test_models.py → archive/dev_tests/test_models.py
RENAMED
|
File without changes
|
test_pc.py → archive/dev_tests/test_pc.py
RENAMED
|
File without changes
|
test_pc001.py → archive/dev_tests/test_pc001.py
RENAMED
|
File without changes
|
test_pc002.py → archive/dev_tests/test_pc002.py
RENAMED
|
File without changes
|
test_pc002_api.py → archive/dev_tests/test_pc002_api.py
RENAMED
|
File without changes
|
test_pc023.py → archive/dev_tests/test_pc023.py
RENAMED
|
File without changes
|
test_pc027.py → archive/dev_tests/test_pc027.py
RENAMED
|
File without changes
|
test_pc034.py → archive/dev_tests/test_pc034.py
RENAMED
|
File without changes
|
test_pc044.py → archive/dev_tests/test_pc044.py
RENAMED
|
File without changes
|
test_pos.py → archive/dev_tests/test_pos.py
RENAMED
|
File without changes
|
test_punc.py → archive/dev_tests/test_punc.py
RENAMED
|
File without changes
|
test_punc_rules.py → archive/dev_tests/test_punc_rules.py
RENAMED
|
File without changes
|
test_punctuation.py → archive/dev_tests/test_punctuation.py
RENAMED
|
File without changes
|
test_raw_punc.py → archive/dev_tests/test_raw_punc.py
RENAMED
|
File without changes
|
test_sv.py → archive/dev_tests/test_sv.py
RENAMED
|
File without changes
|
extension/IMPLEMENTATION_CHANGELOG.md → archive/phase_reports/extension_changelog.md
RENAMED
|
File without changes
|