diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 8de5b6fbd7a6083745df5db310f10b1445313f1a..def9cfb7aedef219212ab18438eeb68564ee1643 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -28,7 +28,7 @@ jobs: - name: Verify critical files exist run: | - for f in src/app.py src/model_loader.py src/hf_inference.py src/index.html \ + for f in src/app.py src/model_loader.py src/index.html \ src/nlp/__init__.py src/nlp/spelling/araspell_service.py \ src/nlp/grammar/grammar_service.py src/nlp/punctuation/punctuation_service.py \ Dockerfile Procfile requirements.txt; do @@ -36,11 +36,11 @@ jobs: done echo "✅ All critical files present" - - name: Verify API routes defined in app.py + - name: Verify API routes defined run: | for route in "/api/health" "/api/analyze" "/api/summarize" "/api/spelling" \ "/api/grammar" "/api/punctuation" "/api/quran"; do - grep -q "$route" src/app.py && echo " ✅ $route" || { echo " ❌ MISSING ROUTE: $route"; exit 1; } + grep -rq "$route" src/routes/ src/app.py && echo " ✅ $route" || { echo " ❌ MISSING ROUTE: $route"; exit 1; } done echo "✅ All API routes defined" diff --git a/.gitignore b/.gitignore index d0405c7c744d40b5d280061f5e47658ad159f9b9..3929af4605f7fcbb542584a5f9e88813792710a0 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,15 @@ node_modules/ # Test artifacts .pytest_cache/ test-results/ -extension/assets/icons/*.png \ No newline at end of file +extension/assets/icons/*.png + +# Build output +dist/ +src/js/bayan.bundle.js + +# Debug/temp output +out*.txt +local_debug.txt +pc_data.txt +camel_test_out.json +grammar_fails_output.md \ No newline at end of file diff --git a/BAYAN_COMPLETE_AUDIT.md b/BAYAN_COMPLETE_AUDIT.md deleted file mode 100644 index 7a19b94a606da9f4a87e4611621258a4b0d76e3f..0000000000000000000000000000000000000000 --- a/BAYAN_COMPLETE_AUDIT.md +++ /dev/null @@ -1,366 +0,0 @@ -# BAYAN — Complete Product, Codebase & Extension Deep Audit - -> **Audit Date:** 2026-06-26 -> **Auditor Perspective:** Product Manager + Senior Frontend + Backend Architect + Extension Engineer + SaaS Reviewer - ---- - -## 1. Current System Overview - -### Architecture Map - -``` -┌──────────────────────────────────────────────────────┐ -│ BAYAN ECOSYSTEM │ -│ │ -│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │ -│ │ Website │───▶│ Flask API │───▶│ NLP Pipeline │ │ -│ │ (SPA) │ │ (app.py) │ │ Spell/Gram/Punct│ │ -│ └─────────┘ └──────────┘ └─────────────────┘ │ -│ │ │ │ │ -│ │ │ ┌─────────────────┐ │ -│ │ ├─────────▶│ HF Models │ │ -│ │ │ │ Summarization │ │ -│ │ │ │ Grammar (Gradio)│ │ -│ │ │ └─────────────────┘ │ -│ │ │ │ -│ ┌─────────┐ ┌──────────┐ ┌─────────────────┐ │ -│ │Supabase │◀───│ Auth │───▶│ Documents DB │ │ -│ │ (Cloud) │ │ Module │ │ Settings Sync │ │ -│ └─────────┘ └──────────┘ └─────────────────┘ │ -│ │ -│ ┌────────────────────────────────────────────────┐ │ -│ │ Chrome Extension (MV3) │ │ -│ │ ┌──────────┐ ┌──────────┐ ┌───────────────┐ │ │ -│ │ │ Content │ │Background│ │ Side Panel │ │ │ -│ │ │ Script │ │ Worker │ │ + Popup │ │ │ -│ │ └──────────┘ └──────────┘ └───────────────┘ │ │ -│ └────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────┘ -``` - -### Technology Stack - -| Layer | Technology | Notes | -|-------|-----------|-------| -| **Frontend** | Vanilla JS, HTML, CSS (Tailwind CDN) | Custom `contenteditable` editor engine | -| **Backend** | Flask (Python) | Single monolith `app.py` — 2,844 lines | -| **NLP Pipeline** | Custom Python modules | Spelling, Grammar, Punctuation, Autocomplete, Dialect | -| **AI Models** | Transformer-based | Summarization (local), Grammar (Gradio proxy), Spelling (CAMeL + custom) | -| **Database** | Supabase (PostgreSQL) | Documents, profiles, user settings | -| **Auth** | Supabase Auth | Guest (anonymous), Google OAuth | -| **Deployment** | HuggingFace Spaces (Docker) | CPU-only free tier | -| **Extension** | Chrome MV3 | Background SW, Content Script, Side Panel, Popup | - -### File Structure Summary - -| Directory | Files | Purpose | -|-----------|-------|---------| -| `src/` | 6 core files | Backend + HTML + CSS | -| `src/js/` | 8 JS files + 7 subdirs | Frontend logic | -| `src/js/auth/` | 5 files | Supabase auth (client, session, UI) | -| `src/js/documents/` | 4 files | Local doc management + export | -| `src/js/documents-cloud/` | 3 files | Supabase CRUD for documents | -| `src/js/sync/` | 3 files | Offline queue + conflict resolution | -| `src/js/settings-sync/` | 2 files | User settings cloud persistence | -| `src/nlp/` | 6 subdirs | All NLP processing modules | -| `extension/` | 8 files + 4 subdirs | Chrome Extension | -| `extension/shared/` | 9 files | Shared utilities (api, renderer, patches) | -| `extension/sidepanel/` | 3 files | Side panel UI | -| `tests/` | 16 test files | Backend unit tests | -| `extension/tests/` | 8 files | Extension integration tests | - ---- - -## 2. Feature Inventory - -### Core AI Features - -| Feature | Backend API | Website Frontend | Extension | Files | -|---------|------------|-----------------|-----------|-------| -| **Spelling Correction** | ✅ `/api/spelling` + `/api/analyze` | ✅ Full (highlights, suggestions, apply) | ✅ Inline overlay + Popup + SidePanel | `nlp/spelling/`, `editor.js`, `renderer.js` | -| **Grammar Correction** | ✅ `/api/grammar` + `/api/analyze` | ✅ Full (via Gradio proxy to HF model) | ✅ Inline overlay + Popup + SidePanel | `nlp/grammar/`, `hf_inference.py` | -| **Punctuation** | ✅ `/api/punctuation` + `/api/analyze` | ✅ Full (PuncAra-v1 model) | ✅ Inline overlay + Popup + SidePanel | `nlp/punctuation/` | -| **Summarization** | ✅ `/api/summarize` | ✅ Full (tab in editor, length control) | ✅ Popup tab + SidePanel tab | `model_loader.py`, `summaries-api.js` | -| **AutoComplete** | ✅ `/api/autocomplete` | ✅ Ghost text + dropdown in editor | ⚠️ SidePanel text-box only, NO inline ghost text | `autocomplete.js`, sidepanel `btnAutocomplete` | -| **Dialect→MSA** | ✅ `/api/dialect` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `nlp/dialect/` | -| **Quran Verification** | ✅ `/api/quran` | ✅ Dedicated editor tab | ✅ SidePanel tab (basic text→text) | `quran.py`, `quran_master.db` | - -### Platform Features - -| Feature | Website | Extension (Popup) | Extension (SidePanel) | Extension (Content Script) | -|---------|---------|-------------------|----------------------|--------------------------| -| **Authentication** | ✅ Guest + Google | ❌ None | ⚠️ Partial (`initExtensionAuth()` exists but requires web page auth sync) | ⚠️ Listens for `BAYAN_AUTH_SYNC` message from web | -| **Document Save** | ✅ Supabase CRUD | ❌ None | ⚠️ UI exists (`btnNewDocument`, `btnSaveSelection`) but depends on auth | ❌ None | -| **Document Load/History** | ✅ Full panel | ❌ None | ⚠️ UI exists (`documentsList`, `historyList`) but depends on auth | ❌ None | -| **Export (PDF/DOCX/TXT)** | ✅ Full (mammoth.js, docx.js) | ❌ None | ❌ None | ❌ None | -| **Import (TXT/DOCX)** | ✅ Full | ❌ None | ❌ None | ❌ None | -| **Settings Sync** | ✅ Supabase | ❌ None | ⚠️ Placeholder (`syncExtensionSettings()`) | ❌ None | -| **Theme Toggle** | ✅ Full dark/light | ❌ Hardcoded dark | ✅ Dark only | N/A | -| **Focus Mode** | ✅ Full | N/A | ❌ None | N/A | -| **Score Ring** | ✅ Animated SVG | ✅ Simplified | ✅ Simplified | ❌ None | -| **Writing Score History** | ✅ Sparkline chart | ❌ None | ❌ None | ❌ None | -| **Error Donut Chart** | ✅ SVG donut | ❌ None | ❌ None | ❌ None | -| **Offline Mode** | ✅ Graceful degradation | ❌ No offline handling | ❌ No offline handling | ❌ No offline handling | -| **Keyboard Shortcuts** | ✅ Extensive (Alt+1-3, Ctrl+S, etc.) | ❌ None | ❌ None | ❌ None | - ---- - -## 3. Website vs Extension Comparison - -### Authentication Flow - -| Aspect | Website | Extension | Gap | -|--------|---------|-----------|-----| -| Guest login | ✅ `signInAnonymously()` | ❌ | **Critical** — extension users can't persist anything | -| Google OAuth | ✅ `signInWithOAuth()` | ❌ | **High** | -| Session restore | ✅ `restoreSession()` via Supabase | ❌ | **High** | -| Auth state sync | ✅ `onAuthStateChange()` | ⚠️ Listens for `BAYAN_AUTH_SYNC` postMessage but only works when user visits Bayan website with extension installed | **High** — unreliable | -| Auth-gated features | ✅ Documents, sync, settings | ⚠️ UI elements exist but non-functional without auth | **High** | - -### AI Feature Comparison - -| Feature | Website UX | Extension UX | Parity? | -|---------|-----------|-------------|---------| -| Analyze (S+G+P) | Rich editor with inline highlights, suggestion sidebar, popover tooltip, apply/dismiss per-suggestion | **Content Script:** Overlay marks + tooltip. **Popup/SidePanel:** Textarea + suggestion cards | ⚠️ Functional but UX gap | -| Summarize | Editor tab with radio buttons (short/medium/long) | Popup/SidePanel textarea with radio buttons | ✅ Near parity | -| AutoComplete | **Ghost text** inside editor (Tab to accept) | SidePanel has a text box with "إكمال" button but NO inline ghost text on 3rd party sites | **Medium** — missing the core UX | -| Dialect | Dedicated editor tab with "Convert" button | SidePanel tab with text box and "Convert" button | ✅ Near parity | -| Quran | Dedicated editor tab with search | SidePanel tab with text box and search | ✅ Near parity | - -### Documents - -| Aspect | Website | Extension | Gap | -|--------|---------|-----------|-----| -| Create document | ✅ `createDocument()` | ⚠️ Button exists in SidePanel but blocked by no auth | **High** | -| List documents | ✅ Desktop sidebar panel | ⚠️ `documentsList` in SidePanel workspace tab, blocked by no auth | **High** | -| Save/auto-save | ✅ Debounced sync via `SyncManager` | ❌ | **High** | -| Export PDF/DOCX | ✅ `export.js` | ❌ | **Medium** | -| Import | ✅ `import.js` (TXT, DOCX) | ❌ | **Low** | - ---- - -## 4. Missing Features - -### Critical (Blocks Production) - -| # | Issue | Impact | Solution | -|---|-------|--------|----------| -| C1 | **`.env` file committed to Git** | Supabase URL and anon key are in the repo. While anon key is safe for client use, this is a security anti-pattern and may expose the project URL. | Remove `.env` from Git history, use HF Spaces secrets exclusively. `.gitignore` has `.env` but it was committed before the rule was added. | -| C2 | **CORS wildcard `origins: "*"`** | Any website can call `/api/analyze`, `/api/summarize`, etc. directly. Abusers can drain compute. | Restrict CORS to `bayan10-bayan-api.hf.space` + extension origin `chrome-extension://`. | -| C3 | **No rate limiting on API** | No throttle on any endpoint. A single user can overwhelm the free-tier HF Space. | Add Flask-Limiter or simple in-memory token bucket. | - -### High (Important Feature Gap) - -| # | Issue | Impact | Solution | -|---|-------|--------|----------| -| H1 | Extension has no auth | Users cannot access cloud docs, settings, or history from extension | Implement Supabase auth in extension via `chrome.identity` or shared session from Bayan website | -| H2 | Extension content script lacks AutoComplete ghost text | The flagship "ghost text" feature doesn't work on 3rd-party sites | Port `autocomplete.js` logic into `content-inline.js` with `/api/autocomplete` calls | -| H3 | Extension popup/sidepanel have no export | Users cannot export corrected text as PDF/DOCX | Add "Copy as formatted text" or lightweight export | -| H4 | No `documents` table migration | `supabase/migrations/001_profiles.sql` exists but no migration creates the `documents` table that `documents-api.js` uses | Create `002_documents.sql` migration | -| H5 | Backend monolith: `app.py` is 2,844 lines | Extremely difficult to maintain, test, or extend | Split into `routes/`, `services/`, `middleware/` modules | - -### Medium (Improvement Needed) - -| # | Issue | Impact | Solution | -|---|-------|--------|----------| -| M1 | `src/js/api.js` uses ES module `export` syntax but is loaded via ` + + diff --git a/extension/popup.js b/extension/popup.js index 6f892d0600bc3b15b56a7c62c8a8617fabca8504..6a137062bb88de2d3a96a92548d255f72b1f4daa 100644 --- a/extension/popup.js +++ b/extension/popup.js @@ -35,14 +35,6 @@ document.addEventListener('DOMContentLoaded', () => { const summaryMeta = document.getElementById('summary-meta'); const btnCopySummary = document.getElementById('btn-copy-summary'); - // Score elements - const scoreValue = document.getElementById('score-value'); - const scoreCircle = document.getElementById('score-circle'); - const scoreHint = document.getElementById('score-hint'); - const countSpelling = document.getElementById('count-spelling'); - const countGrammar = document.getElementById('count-grammar'); - const countPunctuation = document.getElementById('count-punctuation'); - // ══════════════════════════════════════════════════════════ // State // ══════════════════════════════════════════════════════════ @@ -62,8 +54,6 @@ document.addEventListener('DOMContentLoaded', () => { */ let isStale = false; - const SCORE_CIRCUMFERENCE = 440; - // ══════════════════════════════════════════════════════════ // Tab switching // ══════════════════════════════════════════════════════════ @@ -81,15 +71,8 @@ document.addEventListener('DOMContentLoaded', () => { }); // ══════════════════════════════════════════════════════════ - // Character & word counter + // Character & word counter (shared: bayan-core.js) // ══════════════════════════════════════════════════════════ - function updateCounts(textarea, charEl, wordEl) { - const text = textarea.value; - const chars = text.length; - const words = text.trim() ? text.trim().split(/\s+/).length : 0; - if (charEl) charEl.textContent = chars.toLocaleString('ar-EG'); - if (wordEl) wordEl.textContent = words.toLocaleString('ar-EG'); - } inputText.addEventListener('input', () => { updateCounts(inputText, charCount, wordCount); @@ -149,40 +132,7 @@ document.addEventListener('DOMContentLoaded', () => { loadingTextEl.textContent = text; } - // ══════════════════════════════════════════════════════════ - // Toast - // ══════════════════════════════════════════════════════════ - function showToast(message, duration = 2500) { - const toast = document.getElementById('toast'); - toast.textContent = message; - toast.classList.add('is-visible'); - clearTimeout(toast._timer); - toast._timer = setTimeout(() => toast.classList.remove('is-visible'), duration); - } - - // ══════════════════════════════════════════════════════════ - // Score ring - // ══════════════════════════════════════════════════════════ - function updateScore(spelling, grammar, punctuation) { - const score = calculateWritingScore(spelling, grammar, punctuation); - const total = spelling + grammar + punctuation; - - scoreSection.classList.remove('is-hidden'); - - if (scoreValue) { - scoreValue.textContent = score > 0 || total > 0 ? score.toLocaleString('ar-EG') : '--'; - } - if (scoreCircle) { - const offset = SCORE_CIRCUMFERENCE - (score / 100) * SCORE_CIRCUMFERENCE; - scoreCircle.style.strokeDashoffset = String(offset); - } - if (scoreHint) { - scoreHint.textContent = getScoreHint(score, total); - } - if (countSpelling) countSpelling.textContent = spelling.toLocaleString('ar-EG'); - if (countGrammar) countGrammar.textContent = grammar.toLocaleString('ar-EG'); - if (countPunctuation) countPunctuation.textContent = punctuation.toLocaleString('ar-EG'); - } + // Score ring — shared via bayan-core.js (updateScore) // ══════════════════════════════════════════════════════════ // Render suggestions list @@ -218,6 +168,9 @@ document.addEventListener('DOMContentLoaded', () => { if (altText === suggestion.original) { // Dismiss — remove from list, no text change, no rebase needed currentSuggestions = removeSuggestion(currentSuggestions, suggestion.id); + if (suggestion.type === 'spelling' && typeof BayanAuth !== 'undefined') { + BayanAuth.addDismissedWord(suggestion.original); + } } else { // ═══════════════════════════════════════════════════ // HIGH-1 FIX: Apply + Rebase via atomic function @@ -287,7 +240,15 @@ document.addEventListener('DOMContentLoaded', () => { const data = await bayanAnalyze(text); if (data.status === 'success' || data.status === 'partial') { - const suggestions = sortSuggestions(data.suggestions || []); + let suggestions = sortSuggestions(data.suggestions || []); + + if (typeof BayanAuth !== 'undefined') { + const dismissed = await BayanAuth.getDismissedWords(); + if (dismissed.length > 0) { + suggestions = suggestions.filter(s => !(s.type === 'spelling' && dismissed.includes(s.original))); + } + } + currentSuggestions = suggestions; // MED-2: Snapshot the analyzed text — all offsets reference THIS string @@ -547,26 +508,8 @@ document.addEventListener('DOMContentLoaded', () => { // ══════════════════════════════════════════════════════════ // Phase 5: Download corrected text / summary as .txt - // Buttons are injected programmatically to avoid touching popup.html. + // downloadTxt shared via bayan-core.js // ══════════════════════════════════════════════════════════ - function downloadTxt(text, filename) { - if (!text) { showToast('لا يوجد نص للتنزيل'); return; } - try { - const blob = new Blob([text], { type: 'text/plain;charset=utf-8' }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = filename; - document.body.appendChild(a); - a.click(); - a.remove(); - setTimeout(() => URL.revokeObjectURL(url), 1000); - showToast('✓ تم تنزيل الملف'); - } catch (e) { - console.error('[Bayan] Download error:', e); - showToast('تعذّر التنزيل'); - } - } const DOWNLOAD_ICON = ''; @@ -592,6 +535,11 @@ document.addEventListener('DOMContentLoaded', () => { 'bayan-summary.txt' ); + // ══════════════════════════════════════════════════════════ + // Auth UI wiring (shared via bayan-core.js) + // ══════════════════════════════════════════════════════════ + bayanInitAuth(); + // ══════════════════════════════════════════════════════════ // Status check on load // ══════════════════════════════════════════════════════════ @@ -678,5 +626,27 @@ document.addEventListener('DOMContentLoaded', () => { console.warn('[Bayan] Context action check failed:', err); } })(); + + // U4: Persist popup state to chrome.storage.session + const _popupStorage = chrome.storage?.session || chrome.storage?.local; + if (_popupStorage) { + _popupStorage.get(['popup_state'], (d) => { + if (d.popup_state && inputText && !inputText.value) { + inputText.value = d.popup_state.text || ''; + if (charCount) charCount.textContent = inputText.value.length; + var wc = inputText.value.trim().split(/\s+/).filter(w => w).length; + if (wordCount) wordCount.textContent = wc; + } + }); + var _popupSaveTimer = null; + if (inputText) { + inputText.addEventListener('input', () => { + clearTimeout(_popupSaveTimer); + _popupSaveTimer = setTimeout(() => { + _popupStorage.set({ popup_state: { text: inputText.value, ts: Date.now() } }); + }, 1000); + }); + } + } }); diff --git a/extension/shared/bayan-api.js b/extension/shared/bayan-api.js index 869144cfe453be4b20ac73b9612d21f8fc3198b7..c1cace9e42db0c7724cdf0611c1cab818996f536 100644 --- a/extension/shared/bayan-api.js +++ b/extension/shared/bayan-api.js @@ -7,21 +7,50 @@ * Endpoints: analyze, summarize, dialect, quran, autocomplete, health. */ +const _API_TIMEOUT_MS = 60000; + +function _timedFetch(url, options = {}, callerSignal) { + const signals = [AbortSignal.timeout(_API_TIMEOUT_MS)]; + if (callerSignal) signals.push(callerSignal); + return fetch(url, { ...options, signal: AbortSignal.any(signals) }); +} + /** * Send text to the unified analysis pipeline. - * Backend: /api/analyze (Spelling → Grammar → Punctuation) + * Routes through background.js (INLINE_ANALYZE) for cache + retry benefits. + * Falls back to direct fetch if the message channel is unavailable. * * @param {string} text - Arabic text to analyze - * @param {AbortSignal} [signal] - Optional abort signal + * @param {AbortSignal} [signal] - Optional abort signal (used only in fallback path) * @returns {Promise} { original, corrected, suggestions[], timing_ms, status } */ async function bayanAnalyze(text, signal) { - const response = await fetch(`${CONFIG.API_BASE}/api/analyze`, { + if (typeof chrome !== 'undefined' && chrome.runtime && chrome.runtime.sendMessage) { + try { + const result = await new Promise((resolve, reject) => { + chrome.runtime.sendMessage({ type: 'INLINE_ANALYZE', text }, (response) => { + if (chrome.runtime.lastError) { + reject(new Error(chrome.runtime.lastError.message)); + return; + } + if (!response || response.error) { + reject(new Error(response?.error || 'No response from background')); + return; + } + resolve(response.data); + }); + }); + if (result) return result; + } catch (e) { + console.warn('[Bayan API] Background route failed, falling back to direct fetch:', e.message); + } + } + + const response = await _timedFetch(`${CONFIG.API_BASE}/api/analyze`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text }), - signal, - }); + }, signal); if (!response.ok) { throw new Error(`Analyze API error: ${response.status}`); } @@ -37,12 +66,12 @@ async function bayanAnalyze(text, signal) { * @param {boolean} [fullText=true] - Summarize full text or first paragraph * @returns {Promise} { summary, status, original_length, summary_length } */ -async function bayanSummarize(text, length = 2, fullText = true) { - const response = await fetch(`${CONFIG.API_BASE}/api/summarize`, { +async function bayanSummarize(text, length = 2, fullText = true, signal) { + const response = await _timedFetch(`${CONFIG.API_BASE}/api/summarize`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text, length, full_text: fullText }), - }); + }, signal); if (!response.ok) { throw new Error(`Summarize API error: ${response.status}`); } @@ -58,12 +87,11 @@ async function bayanSummarize(text, length = 2, fullText = true) { * @returns {Promise} { original_text, converted_text, status } */ async function bayanDialect(text, signal) { - const response = await fetch(`${CONFIG.API_BASE}/api/dialect`, { + const response = await _timedFetch(`${CONFIG.API_BASE}/api/dialect`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text }), - signal, - }); + }, signal); if (!response.ok) { throw new Error(`Dialect API error: ${response.status}`); } @@ -80,12 +108,11 @@ async function bayanDialect(text, signal) { * @returns {Promise} { matched_segment, full_verse, ... } or { error } */ async function bayanQuran(text, language = 'تدقيق الايات', signal) { - const response = await fetch(`${CONFIG.API_BASE}/api/quran`, { + const response = await _timedFetch(`${CONFIG.API_BASE}/api/quran`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text, language }), - signal, - }); + }, signal); // Quran endpoint returns 404 with a JSON {error} body on "no match" — // treat that as a normal (non-throwing) result so the UI can show it. if (!response.ok && response.status !== 404) { @@ -104,12 +131,11 @@ async function bayanQuran(text, language = 'تدقيق الايات', signal) { * @returns {Promise} { suggestions: string[], status } */ async function bayanAutocomplete(context, n = 3, signal) { - const response = await fetch(`${CONFIG.API_BASE}/api/autocomplete`, { + const response = await _timedFetch(`${CONFIG.API_BASE}/api/autocomplete`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ context, n }), - signal, - }); + }, signal); if (!response.ok) { throw new Error(`Autocomplete API error: ${response.status}`); } @@ -123,7 +149,7 @@ async function bayanAutocomplete(context, n = 3, signal) { * @returns {Promise} { status, mode, models } */ async function bayanHealthCheck() { - const response = await fetch(`${CONFIG.API_BASE}/api/health`, { + const response = await _timedFetch(`${CONFIG.API_BASE}/api/health`, { method: 'GET', }); if (!response.ok) { diff --git a/extension/shared/bayan-auth.js b/extension/shared/bayan-auth.js new file mode 100644 index 0000000000000000000000000000000000000000..d3ffe579e6aa27a01013393ad7e754033d284ef7 --- /dev/null +++ b/extension/shared/bayan-auth.js @@ -0,0 +1,325 @@ +/** + * Bayan Chrome Extension — Auth Module + * + * Lightweight Supabase auth via REST API + chrome.identity. + * No Supabase JS SDK required. + * + * Flow: + * 1. Fetch Supabase config from /api/config + * 2. Restore session from chrome.storage.local + * 3. Google OAuth via chrome.identity.launchWebAuthFlow() + * 4. Token refresh via Supabase REST /auth/v1/token + */ + +const BayanAuth = (() => { + const STORAGE_KEY = 'bayan_auth_session'; + const CONFIG_CACHE_KEY = 'bayan_supabase_config'; + const DISMISSED_WORDS_KEY = 'bayan_dismissed_words'; + const TOKEN_REFRESH_MARGIN_MS = 60_000; + + let _config = null; + let _session = null; + let _user = null; + let _listeners = []; + + function _notify(event) { + _listeners.forEach(fn => { + try { fn(event, _user, _session); } catch (e) { console.warn('[BayanAuth] listener error:', e); } + }); + } + + async function _fetchConfig() { + if (_config) return _config; + + const storage = chrome.storage?.local; + if (storage) { + try { + const cached = await storage.get([CONFIG_CACHE_KEY]); + if (cached[CONFIG_CACHE_KEY] && cached[CONFIG_CACHE_KEY].supabase_url) { + _config = cached[CONFIG_CACHE_KEY]; + } + } catch {} + } + + try { + const apiBase = typeof BAYAN !== 'undefined' ? BAYAN.API_BASE : 'https://bayan10-bayan-api.hf.space'; + const res = await fetch(`${apiBase}/api/config`, { method: 'GET' }); + if (res.ok) { + const data = await res.json(); + if (data.supabase_url && data.supabase_anon_key) { + _config = data; + if (storage) { + storage.set({ [CONFIG_CACHE_KEY]: data }).catch(() => {}); + } + } + } + } catch (e) { + console.warn('[BayanAuth] Failed to fetch config:', e.message); + } + + return _config; + } + + function _parseHashParams(url) { + try { + const hash = new URL(url).hash.substring(1); + return Object.fromEntries(new URLSearchParams(hash)); + } catch { + return {}; + } + } + + async function _fetchUser(accessToken) { + if (!_config) return null; + try { + const res = await fetch(`${_config.supabase_url}/auth/v1/user`, { + headers: { + 'apikey': _config.supabase_anon_key, + 'Authorization': `Bearer ${accessToken}`, + }, + }); + if (!res.ok) return null; + return await res.json(); + } catch { + return null; + } + } + + async function _refreshToken(refreshToken) { + if (!_config) return null; + try { + const res = await fetch(`${_config.supabase_url}/auth/v1/token?grant_type=refresh_token`, { + method: 'POST', + headers: { + 'apikey': _config.supabase_anon_key, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ refresh_token: refreshToken }), + }); + if (!res.ok) return null; + return await res.json(); + } catch { + return null; + } + } + + async function _saveSession(session) { + _session = session; + const storage = chrome.storage?.local; + if (storage) { + await storage.set({ [STORAGE_KEY]: session }).catch(() => {}); + } + } + + async function _clearSession() { + _session = null; + _user = null; + const storage = chrome.storage?.local; + if (storage) { + await storage.remove([STORAGE_KEY]).catch(() => {}); + } + } + + async function _processTokens(accessToken, refreshToken, expiresIn) { + const expiresAt = Date.now() + (expiresIn * 1000); + const session = { access_token: accessToken, refresh_token: refreshToken, expires_at: expiresAt }; + await _saveSession(session); + + const user = await _fetchUser(accessToken); + if (user && user.id) { + _user = { + id: user.id, + email: user.email || '', + name: user.user_metadata?.full_name || user.user_metadata?.name || '', + avatar: user.user_metadata?.avatar_url || '', + isAnonymous: user.is_anonymous || false, + }; + } + + _scheduleRefresh(); + _notify('SIGNED_IN'); + return true; + } + + let _refreshTimer = null; + + function _scheduleRefresh() { + if (_refreshTimer) clearTimeout(_refreshTimer); + if (!_session) return; + + const msUntilExpiry = _session.expires_at - Date.now(); + const refreshIn = Math.max(msUntilExpiry - TOKEN_REFRESH_MARGIN_MS, 5000); + + _refreshTimer = setTimeout(async () => { + if (!_session?.refresh_token) return; + const data = await _refreshToken(_session.refresh_token); + if (data && data.access_token) { + await _processTokens(data.access_token, data.refresh_token, data.expires_in || 3600); + } else { + await _clearSession(); + _notify('TOKEN_REFRESH_FAILED'); + } + }, refreshIn); + } + + // ── Public API ── + + async function init() { + await _fetchConfig(); + + const storage = chrome.storage?.local; + if (!storage) return; + + try { + const data = await storage.get([STORAGE_KEY]); + const saved = data[STORAGE_KEY]; + if (!saved || !saved.access_token) return; + + if (saved.expires_at && saved.expires_at > Date.now()) { + _session = saved; + const user = await _fetchUser(saved.access_token); + if (user && user.id) { + _user = { + id: user.id, + email: user.email || '', + name: user.user_metadata?.full_name || user.user_metadata?.name || '', + avatar: user.user_metadata?.avatar_url || '', + isAnonymous: user.is_anonymous || false, + }; + _scheduleRefresh(); + _notify('RESTORED'); + return; + } + } + + if (saved.refresh_token) { + const data = await _refreshToken(saved.refresh_token); + if (data && data.access_token) { + await _processTokens(data.access_token, data.refresh_token, data.expires_in || 3600); + return; + } + } + + await _clearSession(); + } catch (e) { + console.warn('[BayanAuth] init restore failed:', e); + } + } + + async function signInWithGoogle() { + const config = await _fetchConfig(); + if (!config || !config.supabase_url) { + console.warn('[BayanAuth] Supabase not configured'); + return { success: false, error: 'not_configured' }; + } + + const redirectUrl = chrome.identity.getRedirectURL(); + const authUrl = `${config.supabase_url}/auth/v1/authorize?provider=google&redirect_to=${encodeURIComponent(redirectUrl)}`; + + try { + const responseUrl = await new Promise((resolve, reject) => { + chrome.identity.launchWebAuthFlow( + { url: authUrl, interactive: true }, + (url) => { + if (chrome.runtime.lastError) { + reject(new Error(chrome.runtime.lastError.message)); + } else { + resolve(url); + } + } + ); + }); + + const params = _parseHashParams(responseUrl); + if (!params.access_token) { + return { success: false, error: 'no_token' }; + } + + await _processTokens(params.access_token, params.refresh_token || '', parseInt(params.expires_in || '3600', 10)); + return { success: true }; + } catch (e) { + console.error('[BayanAuth] Google sign-in failed:', e); + return { success: false, error: e.message }; + } + } + + async function signOut() { + if (_session && _config) { + try { + await fetch(`${_config.supabase_url}/auth/v1/logout`, { + method: 'POST', + headers: { + 'apikey': _config.supabase_anon_key, + 'Authorization': `Bearer ${_session.access_token}`, + }, + }); + } catch {} + } + + if (_refreshTimer) clearTimeout(_refreshTimer); + await _clearSession(); + _notify('SIGNED_OUT'); + } + + function getUser() { return _user; } + + function getAccessToken() { + if (!_session) return null; + if (_session.expires_at && _session.expires_at < Date.now()) return null; + return _session.access_token; + } + + function isAuthenticated() { + return !!(_user && _user.id && getAccessToken()); + } + + function onAuthStateChange(fn) { + _listeners.push(fn); + return () => { _listeners = _listeners.filter(f => f !== fn); }; + } + + // ── Dismissed words persistence ── + + async function getDismissedWords() { + const storage = chrome.storage?.local; + if (!storage) return []; + try { + const data = await storage.get([DISMISSED_WORDS_KEY]); + return data[DISMISSED_WORDS_KEY] || []; + } catch { + return []; + } + } + + async function addDismissedWord(word) { + const words = await getDismissedWords(); + if (!words.includes(word)) { + words.push(word); + await chrome.storage.local.set({ [DISMISSED_WORDS_KEY]: words }).catch(() => {}); + } + } + + async function removeDismissedWord(word) { + const words = await getDismissedWords(); + const filtered = words.filter(w => w !== word); + await chrome.storage.local.set({ [DISMISSED_WORDS_KEY]: filtered }).catch(() => {}); + } + + async function clearDismissedWords() { + await chrome.storage.local.remove([DISMISSED_WORDS_KEY]).catch(() => {}); + } + + return { + init, + signInWithGoogle, + signOut, + getUser, + getAccessToken, + isAuthenticated, + onAuthStateChange, + getDismissedWords, + addDismissedWord, + removeDismissedWord, + clearDismissedWords, + }; +})(); diff --git a/extension/shared/bayan-core.js b/extension/shared/bayan-core.js new file mode 100644 index 0000000000000000000000000000000000000000..a7f761f1d564c23918f77801873ee9717700aa37 --- /dev/null +++ b/extension/shared/bayan-core.js @@ -0,0 +1,180 @@ +/** + * Bayan Chrome Extension — Shared Core Utilities + * + * Functions extracted from popup.js and sidepanel.js to eliminate duplication. + * Loaded via + + + diff --git a/extension/sidepanel/sidepanel.js b/extension/sidepanel/sidepanel.js index 0f88dcf4e2e155cf10ed943c5b2068ccabad8c0d..0b5d4fa9357a72999ca25718f9c346353e753435 100644 --- a/extension/sidepanel/sidepanel.js +++ b/extension/sidepanel/sidepanel.js @@ -48,14 +48,6 @@ document.addEventListener('DOMContentLoaded', () => { const summaryMeta = document.getElementById('summary-meta'); const btnCopySummary = document.getElementById('btn-copy-summary'); - // Score - const scoreValue = document.getElementById('score-value'); - const scoreCircle = document.getElementById('score-circle'); - const scoreHint = document.getElementById('score-hint'); - const countSpelling = document.getElementById('count-spelling'); - const countGrammar = document.getElementById('count-grammar'); - const countPunctuation = document.getElementById('count-punctuation'); - // ══════════════════════════════════════════════════════════ // State // ══════════════════════════════════════════════════════════ @@ -70,7 +62,6 @@ document.addEventListener('DOMContentLoaded', () => { // so write-back replaces ONLY that selection, never the whole field. let sourceSelectionText = ''; - const SCORE_CIRCUMFERENCE = 440; const DEBOUNCE_MS = 500; // ══════════════════════════════════════════════════════════ @@ -90,15 +81,8 @@ document.addEventListener('DOMContentLoaded', () => { }); // ══════════════════════════════════════════════════════════ - // Character & word counter + // Character & word counter (shared: bayan-core.js) // ══════════════════════════════════════════════════════════ - function updateCounts(textarea, charEl, wordEl) { - const text = textarea.value; - const chars = text.length; - const words = text.trim() ? text.trim().split(/\s+/).length : 0; - if (charEl) charEl.textContent = chars.toLocaleString('ar-EG'); - if (wordEl) wordEl.textContent = words.toLocaleString('ar-EG'); - } inputText.addEventListener('input', () => { updateCounts(inputText, charCount, wordCount); @@ -139,21 +123,13 @@ document.addEventListener('DOMContentLoaded', () => { } // ══════════════════════════════════════════════════════════ - // Loading & Toast + // Loading (Toast shared via bayan-core.js) // ══════════════════════════════════════════════════════════ function setLoading(show, text = 'جارٍ التحليل...') { loadingOverlay.classList.toggle('is-hidden', !show); loadingTextEl.textContent = text; } - function showToast(message, duration = 2500) { - const toast = document.getElementById('toast'); - toast.textContent = message; - toast.classList.add('is-visible'); - clearTimeout(toast._timer); - toast._timer = setTimeout(() => toast.classList.remove('is-visible'), duration); - } - // ══════════════════════════════════════════════════════════ // Write-back to the page field (panel → background → content script) // The side panel is a separate document and cannot touch page DOM @@ -180,25 +156,7 @@ document.addEventListener('DOMContentLoaded', () => { } } - // ══════════════════════════════════════════════════════════ - // Score ring - // ══════════════════════════════════════════════════════════ - function updateScore(spelling, grammar, punctuation) { - const score = calculateWritingScore(spelling, grammar, punctuation); - const total = spelling + grammar + punctuation; - - scoreSection.classList.remove('is-hidden'); - - if (scoreValue) scoreValue.textContent = score > 0 || total > 0 ? score.toLocaleString('ar-EG') : '--'; - if (scoreCircle) { - const offset = SCORE_CIRCUMFERENCE - (score / 100) * SCORE_CIRCUMFERENCE; - scoreCircle.style.strokeDashoffset = String(offset); - } - if (scoreHint) scoreHint.textContent = getScoreHint(score, total); - if (countSpelling) countSpelling.textContent = spelling.toLocaleString('ar-EG'); - if (countGrammar) countGrammar.textContent = grammar.toLocaleString('ar-EG'); - if (countPunctuation) countPunctuation.textContent = punctuation.toLocaleString('ar-EG'); - } + // Score ring — shared via bayan-core.js (updateScore) // ══════════════════════════════════════════════════════════ // Render suggestions list @@ -232,6 +190,9 @@ document.addEventListener('DOMContentLoaded', () => { if (altText === suggestion.original) { currentSuggestions = removeSuggestion(currentSuggestions, suggestion.id); + if (suggestion.type === 'spelling' && typeof BayanAuth !== 'undefined') { + BayanAuth.addDismissedWord(suggestion.original); + } } else { const result = applyAndRebase(analyzedText, suggestion, altText, currentSuggestions); analyzedText = result.text; @@ -273,7 +234,15 @@ document.addEventListener('DOMContentLoaded', () => { const data = await bayanAnalyze(text); if (data.status === 'success' || data.status === 'partial') { - const suggestions = sortSuggestions(data.suggestions || []); + let suggestions = sortSuggestions(data.suggestions || []); + + if (typeof BayanAuth !== 'undefined') { + const dismissed = await BayanAuth.getDismissedWords(); + if (dismissed.length > 0) { + suggestions = suggestions.filter(s => !(s.type === 'spelling' && dismissed.includes(s.original))); + } + } + currentSuggestions = suggestions; analyzedText = data.original; @@ -661,26 +630,8 @@ document.addEventListener('DOMContentLoaded', () => { // ══════════════════════════════════════════════════════════ // Phase 5: Download corrected text / summary as .txt - // Buttons injected programmatically to avoid touching sidepanel.html. + // downloadTxt shared via bayan-core.js // ══════════════════════════════════════════════════════════ - function downloadTxt(text, filename) { - if (!text) { showToast('لا يوجد نص للتنزيل'); return; } - try { - const blob = new Blob([text], { type: 'text/plain;charset=utf-8' }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = filename; - document.body.appendChild(a); - a.click(); - a.remove(); - setTimeout(() => URL.revokeObjectURL(url), 1000); - showToast('✓ تم تنزيل الملف'); - } catch (e) { - console.error('[Bayan SP] Download error:', e); - showToast('تعذّر التنزيل'); - } - } const SP_DOWNLOAD_ICON = ''; @@ -693,6 +644,17 @@ document.addEventListener('DOMContentLoaded', () => { btn.innerHTML = SP_DOWNLOAD_ICON; btn.addEventListener('click', () => downloadTxt((getText() || '').trim(), filename)); anchorBtn.parentElement.appendChild(btn); + const docxBtn = document.createElement('button'); + docxBtn.className = 'sp-btn-icon'; + docxBtn.type = 'button'; + docxBtn.title = 'تنزيل كـ Word'; + docxBtn.innerHTML = ''; + docxBtn.addEventListener('click', () => { + if (typeof downloadDocx === 'function') { + downloadDocx((getText() || '').trim(), filename.replace('.txt', '.docx')); + } + }); + anchorBtn.parentElement.appendChild(docxBtn); } addDownloadButton(btnCopyResult, () => resultText.textContent, 'bayan-corrected.txt'); @@ -726,6 +688,216 @@ document.addEventListener('DOMContentLoaded', () => { if (btnCopyDialect) addApplyToPageButton(btnCopyDialect, () => dialectText.textContent, 'dialect'); if (btnCopyQuran) addApplyToPageButton(btnCopyQuran, () => quranText.textContent, 'quran'); + // ══════════════════════════════════════════════════════════ + // Auth UI wiring (shared via bayan-core.js) + // ══════════════════════════════════════════════════════════ + bayanInitAuth(); + + // ══════════════════════════════════════════════════════════ + // Cloud Documents (Phase 3.4) + // Uses BayanDocuments REST API (bayan-documents.js) + // ══════════════════════════════════════════════════════════ + const spDocTitle = document.getElementById('sp-doc-title'); + const spDocSave = document.getElementById('sp-doc-save'); + const spDocNew = document.getElementById('sp-doc-new'); + const spDocRefresh = document.getElementById('sp-doc-refresh'); + const spDocList = document.getElementById('sp-doc-list'); + + let currentDocId = null; + let currentDocTitle = 'لا يوجد مستند مفتوح'; + + function _escDocHtml(str) { + return String(str || '').replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); + } + + function _updateDocBar() { + if (spDocTitle) spDocTitle.textContent = currentDocTitle; + } + + async function _renderDocList() { + if (!spDocList) return; + + if (typeof BayanAuth === 'undefined' || !BayanAuth.isAuthenticated()) { + spDocList.innerHTML = ` + `; + const signinBtn = document.getElementById('sp-doc-signin-btn'); + if (signinBtn) { + signinBtn.addEventListener('click', () => { + const loginBtn = document.getElementById('btn-auth-login'); + if (loginBtn) loginBtn.click(); + }); + } + return; + } + + spDocList.innerHTML = '
جاري التحميل...
'; + + const docs = await BayanDocuments.loadDocuments(); + if (!docs.length) { + spDocList.innerHTML = ` +
+
📄
+
لا توجد مستندات بعد
+
أنشئ مستنداً جديداً للبدء
+
`; + return; + } + + spDocList.innerHTML = docs.map(doc => { + const date = new Date(doc.updated_at).toLocaleDateString('ar-EG', { month: 'short', day: 'numeric' }); + const isActive = doc.id === currentDocId; + return ` +
+ +
+ + +
+
`; + }).join(''); + + spDocList.querySelectorAll('.sp-doc-item-open').forEach(btn => { + btn.addEventListener('click', () => _openDoc(btn.dataset.docId)); + }); + spDocList.querySelectorAll('.sp-doc-rename').forEach(btn => { + btn.addEventListener('click', (e) => { e.stopPropagation(); _renameDoc(btn.dataset.docId, btn.dataset.docTitle); }); + }); + spDocList.querySelectorAll('.sp-doc-delete').forEach(btn => { + btn.addEventListener('click', (e) => { e.stopPropagation(); _deleteDoc(btn.dataset.docId, btn.dataset.docTitle); }); + }); + } + + async function _openDoc(id) { + setLoading(true, 'جاري تحميل المستند...'); + try { + const doc = await BayanDocuments.loadDocument(id); + if (!doc) { showToast('تعذّر تحميل المستند'); return; } + + currentDocId = doc.id; + currentDocTitle = doc.title; + _updateDocBar(); + + inputText.value = doc.content || ''; + updateCounts(inputText, charCount, wordCount); + analyzedText = ''; + currentSuggestions = []; + clearStale(); + scoreSection.classList.add('is-hidden'); + resultSection.classList.add('is-hidden'); + suggestionsSection.classList.add('is-hidden'); + timingSection.classList.add('is-hidden'); + + document.querySelector('[data-tab="correct"]')?.click(); + + _renderDocList(); + showToast('✓ تم فتح المستند'); + } catch (e) { + console.error('[Bayan SP] Open doc error:', e); + showToast('خطأ في تحميل المستند'); + } finally { + setLoading(false); + } + } + + async function _createDoc() { + if (typeof BayanAuth === 'undefined' || !BayanAuth.isAuthenticated()) { + showToast('سجّل دخولك أولاً'); return; + } + const title = prompt('اسم المستند الجديد:', 'مستند جديد'); + if (title === null) return; + + setLoading(true, 'جاري الإنشاء...'); + try { + const doc = await BayanDocuments.createDocument(title.trim() || 'مستند جديد', inputText.value || ''); + if (!doc) { showToast('تعذّر إنشاء المستند'); return; } + + currentDocId = doc.id; + currentDocTitle = doc.title; + _updateDocBar(); + await _renderDocList(); + showToast('✓ تم إنشاء المستند'); + } catch (e) { + console.error('[Bayan SP] Create doc error:', e); + showToast('خطأ في إنشاء المستند'); + } finally { + setLoading(false); + } + } + + async function _saveDoc() { + if (!currentDocId) { + _createDoc(); + return; + } + setLoading(true, 'جاري الحفظ...'); + try { + const ok = await BayanDocuments.saveDocument(currentDocId, inputText.value || ''); + if (ok) { + if (spDocSave) spDocSave.classList.remove('sp-doc-dirty'); + showToast('✓ تم الحفظ'); + } else { + showToast('تعذّر الحفظ'); + } + } catch (e) { + console.error('[Bayan SP] Save doc error:', e); + showToast('خطأ في الحفظ'); + } finally { + setLoading(false); + } + } + + async function _renameDoc(id, currentTitle) { + const newTitle = prompt('الاسم الجديد للمستند:', currentTitle); + if (!newTitle || newTitle === currentTitle) return; + + const ok = await BayanDocuments.renameDocument(id, newTitle); + if (ok) { + if (id === currentDocId) { currentDocTitle = newTitle; _updateDocBar(); } + await _renderDocList(); + showToast('✓ تم التسمية'); + } else { + showToast('تعذّر إعادة التسمية'); + } + } + + async function _deleteDoc(id, title) { + if (!confirm('هل تريد حذف "' + title + '"؟')) return; + + const ok = await BayanDocuments.deleteDocument(id); + if (ok) { + if (id === currentDocId) { + currentDocId = null; + currentDocTitle = 'لا يوجد مستند مفتوح'; + _updateDocBar(); + } + await _renderDocList(); + showToast('✓ تم حذف المستند'); + } else { + showToast('تعذّر الحذف'); + } + } + + if (spDocNew) spDocNew.addEventListener('click', _createDoc); + if (spDocSave) spDocSave.addEventListener('click', _saveDoc); + if (spDocRefresh) spDocRefresh.addEventListener('click', _renderDocList); + + inputText.addEventListener('input', () => { + if (currentDocId && spDocSave) spDocSave.classList.add('sp-doc-dirty'); + }); + + if (typeof BayanAuth !== 'undefined') { + BayanAuth.onAuthStateChange(() => _renderDocList()); + } + + _renderDocList(); + // ══════════════════════════════════════════════════════════ // Status check // ══════════════════════════════════════════════════════════ diff --git a/grammar_fails_output.md b/grammar_fails_output.md deleted file mode 100644 index 93a1a45931406ee41b8871957f4d739591974962..0000000000000000000000000000000000000000 --- a/grammar_fails_output.md +++ /dev/null @@ -1,37 +0,0 @@ -=== GRAMMAR FALSE NEGATIVES === -[G006] - sv_agree - IN: الأولاد لعب في الحديقة - EXP: لعبوا - RAW_GRAM: الأولاد لعبوَ في الحديقة - FINAL: الأولاد لعب في الحديقة. --------------------------------------------------- -[G009] - sv_agree - IN: العمال بنى المبنى - EXP: بنوا - RAW_GRAM: العمال بنى المبنى - FINAL: العمال بنى المبنى. --------------------------------------------------- -[G013] - gender - IN: الطالبة متفوق في دراسته - EXP: متفوقة/دراستها - RAW_GRAM: الطالب متفوق في دراسته - FINAL: الطالب متفوق في دراسته. --------------------------------------------------- -[G022] - five_nouns - IN: رأيت أخوك في المسجد - EXP: أخاك - RAW_GRAM: رأيت أخوك في المسجد - FINAL: رأيت أخوك في المسجد --------------------------------------------------- -[G026] - dual - IN: هاتان الطالبان مجتهدان - EXP: هذان - RAW_GRAM: هذان الطالبان مجتهدان - FINAL: هاتان الطالبات مجتهدان. --------------------------------------------------- -[G028] - nasb - IN: لم يفعلون الواجب بعد - EXP: يفعلوا - RAW_GRAM: لم يفعلوَ الواجب بعد - FINAL: لم يفعلون الواجب بعد --------------------------------------------------- diff --git a/out.txt b/out.txt deleted file mode 100644 index b8e272e6c0fcb4851cc6167f943f895f0484c7e7..0000000000000000000000000000000000000000 Binary files a/out.txt and /dev/null differ diff --git a/out2.txt b/out2.txt deleted file mode 100644 index 9398dec6d80ea6737f34d52223e0a5e07ad90d56..0000000000000000000000000000000000000000 Binary files a/out2.txt and /dev/null differ diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000000000000000000000000000000000000..cf92ac892847b61539f8d31db698da3e6c8e9263 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,1169 @@ +{ + "name": "BAYAN", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "@tailwindcss/cli": "^4.3.1", + "tailwindcss": "^4.3.1" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", + "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", + "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", + "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.31", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", + "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@parcel/watcher": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.1.tgz", + "integrity": "sha512-dfUnCxiN9H4ap84DvD2ubjw+3vUNpstxa0TneY/Paat8a3R4uQZDLSvWjmznAY/DoahqTHl9V46HF/Zs3F29pg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "detect-libc": "^1.0.3", + "is-glob": "^4.0.3", + "micromatch": "^4.0.5", + "node-addon-api": "^7.0.0" + }, + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "@parcel/watcher-android-arm64": "2.5.1", + "@parcel/watcher-darwin-arm64": "2.5.1", + "@parcel/watcher-darwin-x64": "2.5.1", + "@parcel/watcher-freebsd-x64": "2.5.1", + "@parcel/watcher-linux-arm-glibc": "2.5.1", + "@parcel/watcher-linux-arm-musl": "2.5.1", + "@parcel/watcher-linux-arm64-glibc": "2.5.1", + "@parcel/watcher-linux-arm64-musl": "2.5.1", + "@parcel/watcher-linux-x64-glibc": "2.5.1", + "@parcel/watcher-linux-x64-musl": "2.5.1", + "@parcel/watcher-win32-arm64": "2.5.1", + "@parcel/watcher-win32-ia32": "2.5.1", + "@parcel/watcher-win32-x64": "2.5.1" + } + }, + "node_modules/@parcel/watcher-android-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.5.1.tgz", + "integrity": "sha512-KF8+j9nNbUN8vzOFDpRMsaKBHZ/mcjEjMToVMJOhTozkDonQFFrRcfdLWn6yWKCmJKmdVxSgHiYvTCef4/qcBA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.5.1.tgz", + "integrity": "sha512-eAzPv5osDmZyBhou8PoF4i6RQXAfeKL9tjb3QzYuccXFMQU0ruIc/POh30ePnaOyD1UXdlKguHBmsTs53tVoPw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.5.1.tgz", + "integrity": "sha512-1ZXDthrnNmwv10A0/3AJNZ9JGlzrF82i3gNQcWOzd7nJ8aj+ILyW1MTxVk35Db0u91oD5Nlk9MBiujMlwmeXZg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-freebsd-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.5.1.tgz", + "integrity": "sha512-SI4eljM7Flp9yPuKi8W0ird8TI/JK6CSxju3NojVI6BjHsTyK7zxA9urjVjEKJ5MBYC+bLmMcbAWlZ+rFkLpJQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.5.1.tgz", + "integrity": "sha512-RCdZlEyTs8geyBkkcnPWvtXLY44BCeZKmGYRtSgtwwnHR4dxfHRG3gR99XdMEdQ7KeiDdasJwwvNSF5jKtDwdA==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-musl/-/watcher-linux-arm-musl-2.5.1.tgz", + "integrity": "sha512-6E+m/Mm1t1yhB8X412stiKFG3XykmgdIOqhjWj+VL8oHkKABfu/gjFj8DvLrYVHSBNC+/u5PeNrujiSQ1zwd1Q==", + "cpu": [ + "arm" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.5.1.tgz", + "integrity": "sha512-LrGp+f02yU3BN9A+DGuY3v3bmnFUggAITBGriZHUREfNEzZh/GO06FF5u2kx8x+GBEUYfyTGamol4j3m9ANe8w==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.5.1.tgz", + "integrity": "sha512-cFOjABi92pMYRXS7AcQv9/M1YuKRw8SZniCDw0ssQb/noPkRzA+HBDkwmyOJYp5wXcsTrhxO0zq1U11cK9jsFg==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.5.1.tgz", + "integrity": "sha512-GcESn8NZySmfwlTsIur+49yDqSny2IhPeZfXunQi48DMugKeZ7uy1FX83pO0X22sHntJ4Ub+9k34XQCX+oHt2A==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.5.1.tgz", + "integrity": "sha512-n0E2EQbatQ3bXhcH2D1XIAANAcTZkQICBPVaxMeaCVBtOpBZpWJuf7LwyWPSBDITb7In8mqQgJ7gH8CILCURXg==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.5.1.tgz", + "integrity": "sha512-RFzklRvmc3PkjKjry3hLF9wD7ppR4AKcWNzH7kXR7GUe0Igb3Nz8fyPwtZCSquGrhU5HhUNDr/mKBqj7tqA2Vw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-ia32": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.5.1.tgz", + "integrity": "sha512-c2KkcVN+NJmuA7CGlaGD1qJh1cLfDnQsHjE89E60vUEMlqduHGCdCLJCID5geFVM0dOtA3ZiIO8BoEQmzQVfpQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.5.1.tgz", + "integrity": "sha512-9lHBdJITeNR++EvSQVUcaZoWupyHfXe1jZvGZ06O/5MflPcuPLtEphScIBL+AiCWBO46tDSHzWyD0uDmmZqsgA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@tailwindcss/cli": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/cli/-/cli-4.3.1.tgz", + "integrity": "sha512-ZWPy20rF+TBfTImxDMG3Wr75Y3RpaPlo9lc+oJbInlMyjT+XPkTVKVIL5RZ7JirXuIahcfHoLNFRmDorKi+JQQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@parcel/watcher": "2.5.1", + "@tailwindcss/node": "4.3.1", + "@tailwindcss/oxide": "4.3.1", + "enhanced-resolve": "5.21.6", + "mri": "^1.2.0", + "picocolors": "^1.1.1", + "tailwindcss": "4.3.1" + }, + "bin": { + "tailwindcss": "dist/index.mjs" + } + }, + "node_modules/@tailwindcss/node": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/node/-/node-4.3.1.tgz", + "integrity": "sha512-6NDaqRoAMSXD1mr/RXu0HBvNE9a2n5tHPsxu9XHLws8o4Twes5rBM2205SUUiJ9goAtadrN6xTGX0UDEwp/N4A==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/remapping": "^2.3.5", + "enhanced-resolve": "5.21.6", + "jiti": "^2.7.0", + "lightningcss": "1.32.0", + "magic-string": "^0.30.21", + "source-map-js": "^1.2.1", + "tailwindcss": "4.3.1" + } + }, + "node_modules/@tailwindcss/oxide": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.3.1.tgz", + "integrity": "sha512-yVPyo8RNkabVr3O2EhHEE0Rewu7YKzc1DhIqfL46LKveFrmu9XbDazNOJY7/GRuvw1h6u3utWnR29H/p5JPlgA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 20" + }, + "optionalDependencies": { + "@tailwindcss/oxide-android-arm64": "4.3.1", + "@tailwindcss/oxide-darwin-arm64": "4.3.1", + "@tailwindcss/oxide-darwin-x64": "4.3.1", + "@tailwindcss/oxide-freebsd-x64": "4.3.1", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.3.1", + "@tailwindcss/oxide-linux-arm64-gnu": "4.3.1", + "@tailwindcss/oxide-linux-arm64-musl": "4.3.1", + "@tailwindcss/oxide-linux-x64-gnu": "4.3.1", + "@tailwindcss/oxide-linux-x64-musl": "4.3.1", + "@tailwindcss/oxide-wasm32-wasi": "4.3.1", + "@tailwindcss/oxide-win32-arm64-msvc": "4.3.1", + "@tailwindcss/oxide-win32-x64-msvc": "4.3.1" + } + }, + "node_modules/@tailwindcss/oxide-android-arm64": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.3.1.tgz", + "integrity": "sha512-SVlyf61g374l5cHyg8x9kf5xmLcOaxvOTsbsqDnSsDJaKOEFZ7GCvi84VAVGpxojYOs1+3K6M0UjXfqPU8vmOQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-darwin-arm64": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.3.1.tgz", + "integrity": "sha512-hVnWLwv+e/l7c4WKyVtHVrIPvYdqWHjRB3MDIqARynzFtnQg85kmQEFCbV9Ja0VVx4xXTIiDWY60Y7iz/iNoDA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-darwin-x64": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.3.1.tgz", + "integrity": "sha512-Cf7abu0WVgbhU7ANgPUnSAvm7nCvMweusHb8FnaHlLfv/Caq4GYaEZg7ZImzzmjx4lIAfuS8q+eLIS7A7IzxIg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-freebsd-x64": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.3.1.tgz", + "integrity": "sha512-ZZqzX2Y+GXtXXfqSfpJhDm60OoZfvLHLCgm+J7NVqgHHJjG/m9ugZI77RwTsVd4fnBJuCFP6Ae6kTJb71UdS8g==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.3.1.tgz", + "integrity": "sha512-/Ah/xik0LaMYfv9DZ0S/t4pBlBNYOcqtRwusjgovHkvT8ixueWCLyJjsaF5kQIckjb4IT8Q6K6p/iPmZMixYgg==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.3.1.tgz", + "integrity": "sha512-gqdFoVJlw444GvpnheZLHmvTzSxI/cOUUh2KSNejQjTcYkW062SVD+En0rUgD+QV91bz1XGIGtt1HJd48xUGbQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-linux-arm64-musl": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.3.1.tgz", + "integrity": "sha512-Bwv9KwOvE0VKa86xPFif9b9c3Y1NxOV1P0gLti/IYaWEsQYZXDlxfGEtA8mdDZ7SG3wyNXAWYT5SIn3giL57oA==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-gnu/-/oxide-linux-x64-gnu-4.3.1.tgz", + "integrity": "sha512-Ymi8O8T15HYQdOUWUtTI6ldN0neHP85FC+Qz32xTcZ7iJXtem/x8ITev0o1e9e5rkqj4lONZfTRLvkmin1+tKg==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-linux-x64-musl": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.3.1.tgz", + "integrity": "sha512-M+P/91qJ6uILLw4k2G93GMDRAXj61SMvFQYt39AqvUqYgExXpLL5aepfns7sj4HiAQeolirQF9E0lzRvdf4zPQ==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-wasm32-wasi": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.3.1.tgz", + "integrity": "sha512-zsM8uOeqvVGHsAXsJxsT28ttosFahLJKCLOTUBqRAtKnVgGSRitds9T432QiT8b77Yga7JIBkulIRRlJPtYhRA==", + "bundleDependencies": [ + "@napi-rs/wasm-runtime", + "@emnapi/core", + "@emnapi/runtime", + "@tybys/wasm-util", + "@emnapi/wasi-threads", + "tslib" + ], + "cpu": [ + "wasm32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "dependencies": { + "@emnapi/core": "^1.10.0", + "@emnapi/runtime": "^1.10.0", + "@emnapi/wasi-threads": "^1.2.1", + "@napi-rs/wasm-runtime": "^1.1.4", + "@tybys/wasm-util": "^0.10.2", + "tslib": "^2.8.1" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.3.1.tgz", + "integrity": "sha512-aiNvSq9BsVk8V513lDKlrCFAgf8qBMPZTpgEhInL+NwQqs97mYmupVMrPrgBBSL8Pv/0zXu9MrMF9rMun1ZeNg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/@tailwindcss/oxide-win32-x64-msvc": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.3.1.tgz", + "integrity": "sha512-xDEyu1rg290472FEGaKHnzyDyh5QH+AlWvsU5hMoMtPpzmKlRI0jaYKCgSHDYtaQWZOYbMaduSyCwFwY4n1HmA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 20" + } + }, + "node_modules/braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "license": "MIT", + "dependencies": { + "fill-range": "^7.1.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/detect-libc": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz", + "integrity": "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "detect-libc": "bin/detect-libc.js" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/enhanced-resolve": { + "version": "5.21.6", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.21.6.tgz", + "integrity": "sha512-aNnGCvbJ/RIyWo1IuhNdVjnNF+EjH9wpzpNHt+ci/m9He9LJvUN8wrCcXjp9cWsGNAuvSpVFTx/vraAFQ8qGjQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "graceful-fs": "^4.2.4", + "tapable": "^2.3.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "license": "MIT", + "dependencies": { + "to-regex-range": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true, + "license": "ISC" + }, + "node_modules/is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extglob": "^2.1.1" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/jiti": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/jiti/-/jiti-2.7.0.tgz", + "integrity": "sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ==", + "dev": true, + "license": "MIT", + "bin": { + "jiti": "lib/jiti-cli.mjs" + } + }, + "node_modules/lightningcss": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz", + "integrity": "sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==", + "dev": true, + "license": "MPL-2.0", + "dependencies": { + "detect-libc": "^2.0.3" + }, + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "lightningcss-android-arm64": "1.32.0", + "lightningcss-darwin-arm64": "1.32.0", + "lightningcss-darwin-x64": "1.32.0", + "lightningcss-freebsd-x64": "1.32.0", + "lightningcss-linux-arm-gnueabihf": "1.32.0", + "lightningcss-linux-arm64-gnu": "1.32.0", + "lightningcss-linux-arm64-musl": "1.32.0", + "lightningcss-linux-x64-gnu": "1.32.0", + "lightningcss-linux-x64-musl": "1.32.0", + "lightningcss-win32-arm64-msvc": "1.32.0", + "lightningcss-win32-x64-msvc": "1.32.0" + } + }, + "node_modules/lightningcss-android-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-android-arm64/-/lightningcss-android-arm64-1.32.0.tgz", + "integrity": "sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz", + "integrity": "sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.32.0.tgz", + "integrity": "sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.32.0.tgz", + "integrity": "sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.32.0.tgz", + "integrity": "sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.32.0.tgz", + "integrity": "sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.32.0.tgz", + "integrity": "sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==", + "cpu": [ + "arm64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-gnu": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-gnu/-/lightningcss-linux-x64-gnu-1.32.0.tgz", + "integrity": "sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "glibc" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.32.0.tgz", + "integrity": "sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==", + "cpu": [ + "x64" + ], + "dev": true, + "libc": [ + "musl" + ], + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.32.0.tgz", + "integrity": "sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.32.0", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.32.0.tgz", + "integrity": "sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss/node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, + "node_modules/magic-string": { + "version": "0.30.21", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz", + "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.5" + } + }, + "node_modules/micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "license": "MIT", + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, + "node_modules/mri": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/mri/-/mri-1.2.0.tgz", + "integrity": "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true, + "license": "ISC" + }, + "node_modules/picomatch": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz", + "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8.6" + }, + "funding": { + "url": "https://github.com/sponsors/jonschlinkert" + } + }, + "node_modules/source-map-js": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", + "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/tailwindcss": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.3.1.tgz", + "integrity": "sha512-hk+TB1m+K8CYNrP6rjQaq/Y+4Zylwpa87mLYBKCunwnnQ9p+fHb7kmSfGqyEJoxF/O6CDyABWVFEafNSYKll+Q==", + "dev": true, + "license": "MIT" + }, + "node_modules/tapable": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.3.3.tgz", + "integrity": "sha512-uxc/zpqFg6x7C8vOE7lh6Lbda8eEL9zmVm/PLeTPBRhh1xCgdWaQ+J1CUieGpIfm2HdtsUpRv+HshiasBMcc6A==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, + "node_modules/to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-number": "^7.0.0" + }, + "engines": { + "node": ">=8.0" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000000000000000000000000000000000000..47b729eaaf8a83e04eaa91d17eb76e8525725418 --- /dev/null +++ b/package.json @@ -0,0 +1,6 @@ +{ + "devDependencies": { + "@tailwindcss/cli": "^4.3.1", + "tailwindcss": "^4.3.1" + } +} diff --git a/pc_data.txt b/pc_data.txt deleted file mode 100644 index 8f7ea1a7a1329f0bbf8e3ccdef20a5e909280e65..0000000000000000000000000000000000000000 Binary files a/pc_data.txt and /dev/null differ diff --git a/requirements.txt b/requirements.txt index 490e25b9dd56c91d95a44f9285e6620716375f28..2ead1148115009124bd52fc5253d4a90f4208018 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,9 @@ huggingface_hub>=0.20.0 transformers torch -datasets camel-tools sentencepiece -scikit-learn numpy -pandas flask flask-cors jellyfish @@ -14,5 +11,4 @@ python-Levenshtein gunicorn python-dotenv gradio_client -rapidfuzz Flask-Limiter diff --git a/src/README.md b/src/README.md deleted file mode 100644 index 2d0110a93c25617df6b178713ea6fa80a0c70458..0000000000000000000000000000000000000000 --- a/src/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# src/ — Source Code - -## Structure - -``` -src/ -├── app.py # Flask application — routes + pipeline orchestration -├── model_loader.py # Model loading (Spelling, Grammar, Summarization, Autocomplete) -├── hf_inference.py # HuggingFace API inference wrappers -├── index.html # Main web UI (single-page app) -├── favicon.svg # Site icon -├── css/ # Stylesheets -│ ├── tokens.css # Design tokens (colors, spacing) -│ ├── base.css # Base styles -│ └── components.css # Component styles -├── js/ # Frontend JavaScript modules -│ ├── editor.js # Editor logic, events, debouncing -│ ├── renderer.js # Offset-based highlight rendering -│ ├── selection.js # Cursor/selection save & restore -│ ├── ui.js # Tooltips, suggestion lists, scores -│ ├── api.js # Backend API fetch wrappers -│ ├── format.js # Text formatting -│ ├── theme.js # Theme switching -│ ├── autocomplete.js # Autocomplete UI -│ ├── auth/ # Authentication (Supabase) -│ ├── documents/ # Local document management -│ ├── documents-cloud/ # Cloud document sync -│ ├── summaries/ # Text summarization UI -│ ├── settings-sync/ # Settings sync -│ ├── sync/ # Real-time sync engine -│ └── vendor/ # Third-party libraries -└── nlp/ # NLP pipeline modules - ├── spelling/ # AraSpell spelling correction - │ ├── araspell_rules.py - │ └── araspell_service.py - ├── grammar/ # Grammar correction - │ ├── grammar_rules.py - │ └── grammar_service.py - ├── punctuation/ # Punctuation restoration - │ ├── punctuation_rules.py - │ └── punctuation_service.py - ├── autocomplete/ # Text autocomplete - ├── dialect/ # Dialect detection - ├── pipeline_context.py # Shared pipeline state - ├── stage_locker.py # Cross-stage text locking - └── correction_patch.py # Correction patch utilities -``` - -## API Contract - -- **Input**: Arabic text string (UTF-8) -- **Output**: JSON with `corrected`, `suggestions[]` (each with `start`, `end`, `replacement`, `explanation`), and `timing_ms` - -## Running - -```bash -cd src && gunicorn app:app --bind 0.0.0.0:7860 --timeout 120 --workers 1 -``` \ No newline at end of file diff --git a/src/app.py b/src/app.py index f75187cd8a225615e460ec213437e3f03d813da7..cb999026c5309fc688f17ecfdfff8845d94e229f 100644 --- a/src/app.py +++ b/src/app.py @@ -1,85 +1,28 @@ """ -Flask backend server for Arabic text summarization. +Flask backend server for Arabic text analysis. Provides API endpoints for the Bayan web application. + +Routes are registered via Blueprints in routes/ package. +Analysis pipeline logic lives in services/ package. """ import os import logging import time -from flask import Flask, request, jsonify, Response +from flask import Flask from flask_cors import CORS -from pathlib import Path -import traceback -import difflib -import re - -# Quran search -import sys -_quran_root = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -sys.path.insert(0, _quran_root) -try: - from quran import search_bayan - logger_quran_ok = True -except Exception as _quran_err: - logger_quran_ok = False - import logging as _ql - _ql.getLogger('app').warning(f'[QURAN] Failed to import quran module: {_quran_err}') - _ql.getLogger('app').warning(f'[QURAN] Searched path: {_quran_root}') - _ql.getLogger('app').warning(f'[QURAN] Files in root: {os.listdir(_quran_root) if os.path.isdir(_quran_root) else "DIR NOT FOUND"}') - -# Pipeline hardening modules -from nlp.pipeline_context import PipelineContext -from nlp.punctuation.punctuation_rules import validate_punctuation_diff -# Load .env file from project root (one level up from src/) -try: - from dotenv import load_dotenv - _env_path = Path(__file__).parent.parent / '.env' - load_dotenv(dotenv_path=_env_path) -except ImportError: - pass # python-dotenv not installed; rely on environment variables directly - -SUPABASE_URL = os.environ.get('SUPABASE_URL', '') -SUPABASE_ANON_KEY = os.environ.get('SUPABASE_ANON_KEY', '') +from config import ( + _ALLOWED_ORIGINS, USE_HF_API, HUGGINGFACE_SUMMARIZATION_REPO, +) +from middleware.rate_limit import limiter +import state from model_loader import ( SummarizationModel, - SpellingModel, - AutocompleteModel, - GrammarModel, - PunctuationModel, SUMMARIZATION_PATH, - SPELLING_PATH, - AUTOCOMPLETE_PATH, - GRAMMAR_PATH, - PUNCTUATION_PATH -) - -# Optional Pipeline Stages -ENABLE_DIALECT_MODEL = False -ENABLE_PUNCTUATION_MODEL = True -ENABLE_GRAMMAR_MODEL = True -ENABLE_AUTOCOMPLETE_MODEL = False - -# HuggingFace Inference API — used in production to avoid RAM limits -from hf_inference import ( - hf_summarize, - hf_correct_spelling, - hf_add_punctuation, - hf_autocomplete, - check_hf_api_available, -) - -HUGGINGFACE_SUMMARIZATION_REPO = os.environ.get( - "SUMMARIZATION_REPO_ID", - "bayan10/summarization-model", ) -# When HF_API_TOKEN is set, use remote HF Inference API instead of local models. -# This avoids loading 500MB+ models into RAM on the free tier. -HF_API_TOKEN = os.environ.get('HF_API_TOKEN', '') -USE_HF_API = bool(HF_API_TOKEN) - # Configure logging logging.basicConfig( level=logging.INFO, @@ -87,85 +30,48 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -DEBUG_TRACE = True # Toggleable trace logging - # Initialize Flask app app = Flask(__name__, static_folder='.', static_url_path='') -CORS(app, resources={r"/api/*": {"origins": "*"}}) # CORS for API routes only -# Configuration -MAX_TEXT_LENGTH = 5000 # Maximum characters for input text -MAX_SUMMARY_LENGTH = 512 # Maximum tokens for summary -MIN_TEXT_LENGTH = 10 # Minimum characters for summarization +# CORS: restrict to known origins (website + extension) +CORS(app, resources={r"/api/*": {"origins": _ALLOWED_ORIGINS}}) -# Global model instances -summarization_model = None -spelling_model = None -autocomplete_model = None -grammar_model = None -punctuation_model = None +# Rate limiting +limiter.init_app(app) -# ── Directional Blocks: prevent meaning-changing substitutions ── -# Used by both spelling confidence filter and grammar diff filter. -_DIRECTIONAL_BLOCKS = { - # Demonstratives: هذه (correct feminine) → هذة (misspelling) = ALWAYS wrong - 'هذه': {'هذة'}, - 'هذا': {'هذة', 'هذه'}, # masculine → don't flip to feminine forms - # Verb/particle confusion: كان (was) ↔ كأن (as if) = ALWAYS wrong - 'كان': {'كأن'}, - 'كأن': {'كان'}, - 'كانت': {'كأنت'}, # H016: كانت → كأنت = ALWAYS wrong - 'كانوا': {'كأنوا'}, # also block plural form - # Preposition confusion: different meanings, both valid - 'إلى': {'على', 'علي'}, - 'على': {'إلى', 'علي'}, - 'علي': {'على'}, # proper name vs preposition - # Conjunction: لكن (correct) ↔ لاكن (misspelling of لكن, never valid) - 'لكن': {'لاكن'}, # correct → misspelling = ALWAYS wrong - # Demonstrative: ذلك (correct) ↔ ذالك (common misspelling) - 'ذلك': {'ذالك'}, # correct → misspelling = ALWAYS wrong - # Pronoun suffix: ه→ة corruption (G037: عمله→عملة) - 'عمله': {'عملة'}, # عمله (his work) → عملة (currency) = WRONG - 'لسانه': {'لسانة'}, # his tongue - 'بيته': {'بيتة'}, # his house - 'كتابه': {'كتابة'}, # his book → writing -} +# Register Blueprints +from routes.core import core_bp +from routes.nlp import nlp_bp +app.register_blueprint(core_bp) +app.register_blueprint(nlp_bp) def load_models(): - """Load models. In HF API mode, load summarization locally; other models gracefully degrade.""" - global summarization_model, spelling_model, autocomplete_model, grammar_model, punctuation_model - + """Load summarization model. Other models use lazy-loading via their services.""" if USE_HF_API: logger.info("HF_API_TOKEN is set — HF API mode enabled") logger.info("NOTE: HF Spaces free tier has NO outbound DNS. Loading summarization model locally.") logger.info("Spelling, punctuation, autocomplete will gracefully degrade (return input unchanged).") - # Fall through to load summarization model locally - + loaded = [] failed = [] - - # Store startup errors for diagnostics - global _startup_errors - _startup_errors = [] - # Load only the Summarization model locally. try: logger.info(f"Loading summarization model from Hugging Face: {HUGGINGFACE_SUMMARIZATION_REPO}") try: - summarization_model = SummarizationModel(HUGGINGFACE_SUMMARIZATION_REPO) + state.summarization_model = SummarizationModel(HUGGINGFACE_SUMMARIZATION_REPO) except Exception as remote_error: logger.warning(f"Remote load failed, falling back to local model: {remote_error}") - _startup_errors.append(f"remote_load: {str(remote_error)[:200]}") + state._startup_errors.append(f"remote_load: {str(remote_error)[:200]}") logger.info(f"Loading summarization model from local path: {SUMMARIZATION_PATH}") - summarization_model = SummarizationModel(SUMMARIZATION_PATH) + state.summarization_model = SummarizationModel(SUMMARIZATION_PATH) loaded.append("summarization") logger.info("Summarization model loaded successfully") except Exception as e: import traceback err_detail = traceback.format_exc() failed.append(("summarization", str(e))) - _startup_errors.append(f"summarization_load_failed: {err_detail[-500:]}") + state._startup_errors.append(f"summarization_load_failed: {err_detail[-500:]}") logger.error(f"Failed to load summarization model: {str(e)}") logger.info(f"Models loaded: {loaded}") @@ -174,2612 +80,8 @@ def load_models(): return len(loaded) > 0 -_startup_errors = [] - - -@app.route('/') -def index(): - """Serve the main HTML file with Supabase credentials injected.""" - html_path = Path(__file__).parent / 'index.html' - html = html_path.read_text(encoding='utf-8') - - # Inject Supabase credentials into the meta tags - html = html.replace( - '', - f'' - ) - html = html.replace( - '', - f'' - ) - - return Response(html, mimetype='text/html') - - -@app.route('/api/health', methods=['GET']) -def health_check(): - """Health check endpoint for production monitoring.""" - if USE_HF_API: - health = { - 'status': 'healthy', - 'mode': 'hf_spaces_local', - 'models': { - 'summarization': summarization_model is not None, - 'spelling': _spelling_available(), - 'autocomplete': _autocomplete_available(), - 'grammar': _grammar_available(), - 'punctuation': _punctuation_available(), - 'dialect': _dialect_available() - }, - 'note': 'Free tier: summarization local, other models return input unchanged', - 'supabase': { - 'configured': bool(SUPABASE_URL and SUPABASE_ANON_KEY), - }, - 'environment': 'huggingface_spaces', - } - status_code = 200 if summarization_model is not None else 503 - return jsonify(health), status_code - - health = { - 'status': 'healthy', - 'mode': 'local_models', - 'models': { - 'summarization': summarization_model is not None, - 'spelling': spelling_model is not None, - 'autocomplete': autocomplete_model is not None, - 'grammar': grammar_model is not None, - 'punctuation': punctuation_model is not None, - 'dialect': _dialect_available() - }, - 'supabase': { - 'configured': bool(SUPABASE_URL and SUPABASE_ANON_KEY), - }, - 'environment': 'render' if os.environ.get('RENDER') else 'local', - } - status_code = 200 if health['models']['summarization'] else 503 - return jsonify(health), status_code - - -@app.route('/api/debug-models', methods=['GET']) -def debug_models(): - """Debug endpoint: report model status and startup errors.""" - from hf_inference import debug_test_all_models - results = debug_test_all_models() - - # Memory info - import os - try: - import resource - mem = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss - mem_info = f"{mem} KB" - except Exception: - mem_info = "N/A" - - # /proc/meminfo on Linux - proc_mem = {} - try: - with open('/proc/meminfo', 'r') as f: - for line in f: - if any(k in line for k in ['MemTotal', 'MemFree', 'MemAvailable', 'SwapTotal']): - parts = line.split() - proc_mem[parts[0].rstrip(':')] = parts[1] + ' ' + (parts[2] if len(parts) > 2 else '') - except Exception: - proc_mem = {"error": "cannot read /proc/meminfo"} - - return jsonify({ - 'status': 'debug', - 'hf_api_token_set': bool(HF_API_TOKEN), - 'summarization_model_loaded': summarization_model is not None, - 'startup_errors': _startup_errors, - 'memory': mem_info, - 'proc_meminfo': proc_mem, - 'models': results, - }), 200 - - -def _spelling_available(): - """Check if spelling model is loaded (without triggering lazy load).""" - try: - from nlp.spelling.araspell_service import is_loaded - return is_loaded() - except Exception: - return False - - -def _grammar_available(): - """Check if grammar model is loaded (without triggering lazy load).""" - try: - from nlp.grammar.grammar_service import is_loaded - return is_loaded() - except Exception: - return False - - -def _punctuation_available(): - """Check if punctuation model is loaded (without triggering lazy load).""" - try: - from nlp.punctuation.punctuation_service import is_loaded - return is_loaded() - except Exception: - return False - - -def _autocomplete_available(): - """Check if autocomplete model is loaded (without triggering lazy load).""" - try: - from nlp.autocomplete.autocomplete_service import _instance - return _instance is not None and _instance.is_ready() - except Exception: - return False - - -def _dialect_available(): - """Check if dialect model is loaded (without triggering lazy load).""" - try: - from nlp.dialect.dialect_service import is_loaded - return is_loaded() - except Exception: - return False - - -@app.route('/api/spelling', methods=['POST']) -def spelling_correction(): - """ - Correct spelling in Arabic text. - - Request JSON: - { - "text": "Arabic text with spelling errors" - } - - Response JSON: - { - "original_text": "...", - "corrected_text": "...", - "status": "success" - } - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - text = data.get('text', '').strip() - - if not text: - return jsonify({'error': 'Text is required', 'status': 'error'}), 400 - - if len(text) > MAX_TEXT_LENGTH: - return jsonify({ - 'error': f'Text too long. Maximum {MAX_TEXT_LENGTH} characters.', - 'status': 'error' - }), 400 - - logger.info(f"Spelling correction request: text_length={len(text)}") - - from nlp.spelling.araspell_service import get_spelling_model - checker = get_spelling_model() - corrected = checker.correct(text) - - return jsonify({ - 'original_text': text, - 'corrected_text': corrected, - 'status': 'success' - }), 200 - - except RuntimeError as e: - logger.error(f"Spelling model error: {e}") - return jsonify({ - 'error': f'Spelling model unavailable: {str(e)[:200]}', - 'status': 'error' - }), 503 - except Exception as e: - logger.error(f"Spelling correction error: {e}") - return jsonify({ - 'error': f'Spelling correction failed: {str(e)[:200]}', - 'status': 'error' - }), 500 - - -@app.route('/api/summarize', methods=['POST']) -def summarize(): - """ - Summarize Arabic text. - - Expected JSON payload: - { - "text": "Arabic text to summarize", - "length": 1-3 (1=short, 2=medium, 3=long), - "full_text": true/false (whether to summarize full text or just first paragraph) - } - """ - if summarization_model is None: - return jsonify({ - 'error': 'Summarization model not loaded. Please check server logs.', - 'status': 'error' - }), 503 - - try: - # Validate request - if not request.is_json: - return jsonify({ - 'error': 'Request must be JSON', - 'status': 'error' - }), 400 - - data = request.get_json() - - # Validate input text - text = data.get('text', '').strip() - if not text: - return jsonify({ - 'error': 'Text is required', - 'status': 'error' - }), 400 - - if len(text) < MIN_TEXT_LENGTH: - return jsonify({ - 'error': f'Text must be at least {MIN_TEXT_LENGTH} characters', - 'status': 'error' - }), 400 - - if len(text) > MAX_TEXT_LENGTH: - return jsonify({ - 'error': f'Text must be at most {MAX_TEXT_LENGTH} characters', - 'status': 'error' - }), 400 - - # Get parameters - length = int(data.get('length', 2)) # Default to medium - length = max(1, min(3, length)) # Clamp between 1 and 3 - - full_text = data.get('full_text', True) - - # Calculate max_length based on length parameter - # Short: ~30% of input, Medium: ~50%, Long: ~70% - input_length = len(text.split()) - length_multipliers = {1: 0.3, 2: 0.5, 3: 0.7} - max_length = max(20, int(input_length * length_multipliers[length])) - max_length = min(max_length, MAX_SUMMARY_LENGTH) - - # Generate summary - logger.info(f"Generating summary: length={length}, max_length={max_length}, text_length={len(text)}") - - # Always use local model (HF Spaces free tier has no outbound DNS for API calls) - summary = summarization_model.summarize(text, max_length=max_length, min_length=max(10, max_length // 3)) - - return jsonify({ - 'summary': summary, - 'status': 'success', - 'original_length': len(text), - 'summary_length': len(summary) - }) - - except ValueError as e: - logger.error(f"Validation error: {str(e)}") - return jsonify({ - 'error': f'Invalid input: {str(e)}', - 'status': 'error' - }), 400 - - except Exception as e: - logger.error(f"Error during summarization: {str(e)}") - logger.error(traceback.format_exc()) - return jsonify({ - 'error': 'An error occurred during summarization. Please try again.', - 'status': 'error', - 'details': str(e) if app.debug else None - }), 500 - - - -@app.route('/api/autocomplete', methods=['POST']) -def autocomplete(): - """ - Get autocomplete suggestions for Arabic text. - COMPLETELY INDEPENDENT — has zero interaction with /api/analyze. - - Request JSON: - { - "context": "", - "n": 5 (optional) - } - - Response JSON: - { - "status": "success", - "suggestions": ["word1", "word2", ...] - } - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - context = data.get('context', '').strip() - n = int(data.get('n', 3)) - - if not context or len(context) < 3: - return jsonify({'suggestions': [], 'status': 'success'}) - - # Extract last ~200 chars (trimmed to word boundary) - from nlp.autocomplete.autocomplete_rules import extract_context - context = extract_context(context, max_chars=200) - - # Lazy-load the model on first request - from nlp.autocomplete.autocomplete_service import get_autocomplete_model - ac_model = get_autocomplete_model() - - if not ac_model.is_ready(): - return jsonify({'suggestions': [], 'status': 'success'}) - - t0 = time.time() - suggestions = ac_model.predict(context, n=n) - elapsed = int((time.time() - t0) * 1000) - logger.info(f"[AUTOCOMPLETE] {elapsed}ms | mode={ac_model.get_mode()} | context='{context[:80]}' | suggestions={suggestions}") - - return jsonify({ - 'suggestions': suggestions, - 'status': 'success' - }) - - except Exception as e: - logger.error(f"Error during autocomplete: {str(e)}") - logger.error(traceback.format_exc()) - return jsonify({ - 'suggestions': [], - 'status': 'success' # Graceful degradation — never fail the UI - }) - - -@app.route('/api/grammar', methods=['POST']) -def grammar_correction(): - """ - Correct grammar in Arabic text. - - Request JSON: - { - "text": "Arabic text with grammar errors" - } - - Response JSON: - { - "original_text": "...", - "corrected_text": "...", - "status": "success" - } - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - text = data.get('text', '').strip() - - if not text: - return jsonify({'error': 'Text is required', 'status': 'error'}), 400 - - if len(text) > MAX_TEXT_LENGTH: - return jsonify({ - 'error': f'Text too long. Maximum {MAX_TEXT_LENGTH} characters.', - 'status': 'error' - }), 400 - - logger.info(f"Grammar correction request: text_length={len(text)}") - - from nlp.grammar.grammar_service import get_grammar_model - checker = get_grammar_model() - corrected = checker.correct(text) - - return jsonify({ - 'original_text': text, - 'corrected_text': corrected, - 'status': 'success' - }), 200 - - except RuntimeError as e: - logger.error(f"Grammar model error: {e}") - return jsonify({ - 'error': f'Grammar model unavailable: {str(e)[:200]}', - 'status': 'error' - }), 503 - except Exception as e: - logger.error(f"Error during grammar correction: {str(e)}") - logger.error(traceback.format_exc()) - return jsonify({ - 'error': 'An error occurred during grammar correction.', - 'status': 'error', - 'details': str(e) if app.debug else None - }), 500 - - -@app.route('/api/punctuation', methods=['POST']) -def add_punctuation(): - """ - Add punctuation to Arabic text using PuncAra-v1. - - Request JSON: - { - "text": "Arabic text without punctuation" - } - - Response JSON: - { - "status": "success", - "original_text": "...", - "corrected_text": "..." - } - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - text = data.get('text', '').strip() - - if not text: - return jsonify({'error': 'Text is required', 'status': 'error'}), 400 - - logger.info(f"Adding punctuation for text of length: {len(text)}") - from nlp.punctuation.punctuation_service import get_punctuation_model - punc_checker = get_punctuation_model() - punctuated = punc_checker.correct(text) - - return jsonify({ - 'original_text': text, - 'corrected_text': punctuated, - 'status': 'success' - }) - - except RuntimeError as e: - logger.error(f"Punctuation model error: {e}") - return jsonify({ - 'error': f'Punctuation model unavailable: {str(e)[:200]}', - 'status': 'error' - }), 503 - except Exception as e: - logger.error(f"Error during punctuation: {str(e)}") - logger.error(traceback.format_exc()) - return jsonify({ - 'error': 'An error occurred during punctuation.', - 'status': 'error', - 'details': str(e) if app.debug else None - }), 500 - - -def get_word_positions(text): - """ - Returns a list of tuples (word, start_char_index, end_char_index) - for all whitespace-separated words in the text. - """ - positions = [] - for m in re.finditer(r'\S+', text): - positions.append((m.group(), m.start(), m.end())) - return positions - - -class OffsetMapper: - """ - Single source of truth for coordinate transformations between - two consecutive versions of CURRENT_TEXT. - - CONTRACT: - Input: text_before (str), text_after (str) - — two consecutive states of CURRENT_TEXT - Stores: Internal diff operations (PRIVATE) - API: - reverse_map_offset(pos) → text_after pos → text_before pos - forward_map_range(start, end) → text_before range → text_after range - - TERMINOLOGY: - text_before = CURRENT_TEXT before this stage's mutation - text_after = CURRENT_TEXT after this stage's mutation - forward = text_before → text_after - reverse = text_after → text_before - - RULES: - All external code uses reverse_map_offset() or forward_map_range(). - ._opcodes is PRIVATE — no external access. - """ - - def __init__(self, text_before, text_after): - self._text_before = text_before - self._text_after = text_after - self._opcodes = [] # PRIVATE — (i1, i2, j1, j2) tuples - self._build() - - def _build(self): - s = difflib.SequenceMatcher(None, self._text_before, self._text_after) - for tag, i1, i2, j1, j2 in s.get_opcodes(): - self._opcodes.append((i1, i2, j1, j2)) - - def reverse_map_offset(self, pos_in_after, is_end=False): - """ - Map a single position from text_after → text_before. - (CURRENT_TEXT after mutation → CURRENT_TEXT before mutation) - - Used by PipelineContext.map_to_original() to walk the mapper - chain in reverse, ultimately reaching ORIGINAL_TEXT coordinates. - """ - matches = [] - for i1, i2, j1, j2 in self._opcodes: - if j1 <= pos_in_after <= j2: - matches.append((i1, i2, j1, j2)) - - if not matches: - return len(self._text_before) - - mapped_positions = [] - for i1, i2, j1, j2 in matches: - if j2 == j1: # insertion point in text_before (deleted in text_after) - # If we're mapping an 'end' coordinate, we want to encompass the deleted text (i2). - # If we're mapping a 'start' coordinate, we want the start of the deletion (i1). - mapped_positions.append(i2 if is_end else i1) - else: - ratio = (pos_in_after - j1) / (j2 - j1) - mapped_positions.append(round(i1 + ratio * (i2 - i1))) - - # If is_end is True, maximize the mapped offset (include as much as possible) - # If is_end is False, minimize the mapped offset - return max(mapped_positions) if is_end else min(mapped_positions) - - def forward_map_range(self, start_in_before, end_in_before): - """ - Map a range from text_before → text_after. - (CURRENT_TEXT before mutation → CURRENT_TEXT after mutation) - - Used ONLY by StageLocker.update_via_mapper() to shift locked - spans after a text mutation. - - MONOTONICITY GUARD: If independent point mapping produces an - inverted range (start > end) due to non-monotonic edits, - the end is clamped to max(new_start, new_end). - """ - new_start = self._forward_map_pos(start_in_before) - new_end = self._forward_map_pos(end_in_before) - # Monotonicity guard: prevent inverted ranges - new_end = max(new_start, new_end) - return new_start, new_end - - def _forward_map_pos(self, pos): - """Map a single position text_before → text_after. PRIVATE.""" - for i1, i2, j1, j2 in self._opcodes: - if i1 <= pos <= i2: - if i2 == i1: - return j1 - ratio = (pos - i1) / (i2 - i1) - return int(j1 + ratio * (j2 - j1)) - if self._opcodes: - last = self._opcodes[-1] - return last[3] + (pos - last[1]) - return pos - - - -def get_word_diffs(original, corrected): - """ - Identify differences between original and corrected text at the word level. - Returns a list of suggestions with start and end character offsets. - """ - orig_words = get_word_positions(original) - corr_words = get_word_positions(corrected) - s = difflib.SequenceMatcher(None, [w[0] for w in orig_words], [w[0] for w in corr_words]) - suggestions = [] - - for tag, i1, i2, j1, j2 in s.get_opcodes(): - if tag == 'replace': - if i1 < len(orig_words) and i2 - 1 < len(orig_words): - start_char = orig_words[i1][1] - end_char = orig_words[i2-1][2] - suggestions.append({ - 'start': start_char, - 'end': end_char, - 'original': original[start_char:end_char], - 'correction': " ".join([w[0] for w in corr_words[j1:j2]]), - 'type': 'generic' - }) - elif tag == 'delete': - if i1 < len(orig_words) and i2 - 1 < len(orig_words): - start_char = orig_words[i1][1] - end_char = orig_words[i2-1][2] - suggestions.append({ - 'start': start_char, - 'end': end_char, - 'original': original[start_char:end_char], - 'correction': '', - 'type': 'generic' - }) - elif tag == 'insert': - pos = orig_words[i1][1] if i1 < len(orig_words) else len(original) - suggestions.append({ - 'start': pos, - 'end': pos, - 'original': '', - 'correction': " ".join([w[0] for w in corr_words[j1:j2]]), - 'type': 'generic' - }) - - return suggestions - - -def _levenshtein(a, b): - """Damerau-Levenshtein distance — transpositions count as 1 edit. - - Better for Arabic typos like اقصتاديا→اقتصاديا (swap صت→تص): - Standard Levenshtein says edit=2, Damerau says edit=1. - - FIX-45: Upgraded from standard Levenshtein. - """ - m, n = len(a), len(b) - if m == 0: - return n - if n == 0: - return m - # Use (m+2)x(n+2) matrix to handle transpositions safely - dp = [[0] * (n + 1) for _ in range(m + 1)] - for i in range(m + 1): - dp[i][0] = i - for j in range(n + 1): - dp[0][j] = j - for i in range(1, m + 1): - for j in range(1, n + 1): - cost = 0 if a[i - 1] == b[j - 1] else 1 - dp[i][j] = min( - dp[i - 1][j] + 1, # deletion - dp[i][j - 1] + 1, # insertion - dp[i - 1][j - 1] + cost, # substitution - ) - # Transposition: swap adjacent characters (counts as 1 edit) - if (i > 1 and j > 1 - and a[i - 1] == b[j - 2] - and a[i - 2] == b[j - 1]): - dp[i][j] = min(dp[i][j], dp[i - 2][j - 2] + 1) - return dp[m][n] - - -def _is_small_spelling_change(orig_word, corr_word, vocab_manager=None): - """ - Heuristic: only accept small spelling edits and ignore - aggressive changes (to avoid over-editing). - - CRITICAL: If both words are in-vocabulary (both are valid Arabic words), - only accept known orthographic fixes (ه→ة, hamza whitelist). - This prevents the model from corrupting correct words (e.g. وكان→وكأن). - - Returns: - float: 0.0 = reject, 0.5 = dampened confidence (rare word risk), - 0.9 = normal confidence. Phase 2 (BUG-034/035/036/037/E8). - """ - if not orig_word or not corr_word: - return 0.0 - if orig_word == corr_word: - return 0.0 - - # ── FIX-39: Edit distance hallucination guard (from legacy AraSpell OutputValidator) ── - # Block corrections where the edit distance is too high relative to word length. - # This catches model hallucinations like والممرضات→والرضا, شجعتهم→يجعلهم, طبخ→طبي. - _ed_dist = _levenshtein(orig_word, corr_word) - _max_len = max(len(orig_word), len(corr_word)) - if _max_len >= 3 and _ed_dist > max(2, _max_len * 0.4): - logger.info( - f"[SPELLING] Blocked hallucination: '{orig_word}'→'{corr_word}' " - f"(edit_dist={_ed_dist}, max_allowed={max(2, int(_max_len * 0.4))})" - ) - return 0.0 - - # ── FIX-42a: Length ratio guard ── - # Block corrections that shrink the word significantly (>30% shorter). - # Catches: والممرضات(9)→والرضا(6), للطالبه(7)→للطالب(6), شجعتهم(6)→يجعلهم(6) - # These often indicate the model hallucinated a different word. - _orig_len = len(orig_word) - _corr_len = len(corr_word) - if _orig_len >= 5 and _corr_len < _orig_len * 0.7: - logger.info( - f"[SPELLING] Blocked length shrink: '{orig_word}'→'{corr_word}' " - f"(len {_orig_len}→{_corr_len}, ratio={_corr_len/_orig_len:.2f})" - ) - return 0.0 - - # ── FIX-42b: First-letter change guard ── - # Block corrections that change the first character (after stripping common prefixes). - # Catches: افهمه→تفهمة (أ→ت), واحتاج→وتحتاج (ا→ت). - # The first root letter almost never changes in a typo — it's a hallucination. - if _orig_len >= 3 and _corr_len >= 3: - # Strip common prefixes (ال, و, ف, ب, ل, ك) to compare root starts - _PREFIXES = ('وال', 'فال', 'بال', 'كال', 'لل', 'ال', 'و', 'ف', 'ب', 'ل', 'ك') - _o_root = orig_word - _c_root = corr_word - for _pfx in _PREFIXES: - if _o_root.startswith(_pfx) and len(_o_root) > len(_pfx) + 1: - _o_root = _o_root[len(_pfx):] - break - for _pfx in _PREFIXES: - if _c_root.startswith(_pfx) and len(_c_root) > len(_pfx) + 1: - _c_root = _c_root[len(_pfx):] - break - # If roots start with different letters AND this isn't an orthographic pair - # AND roots have same length (true consonant swap, not a character addition) - # Exception: الولاد→الأولاد has roots ولاد(4)→أولاد(5) — different length = allow - _HAMZA_CHARS = set('أإآاء') - _STOP_WORDS = {"التي", "الذي", "الذين", "هذا", "هذه", "هؤلاء", "تلك", "ذلك"} - if (_o_root and _c_root and _o_root[0] != _c_root[0] - and len(_o_root) == len(_c_root) # same-length roots only - and not (_o_root[0] in _HAMZA_CHARS and _c_root[0] in _HAMZA_CHARS) - and corr_word not in _STOP_WORDS): - logger.info( - f"[SPELLING] Blocked first-letter change: '{orig_word}'→'{corr_word}' " - f"(root '{_o_root[0]}'→'{_c_root[0]}')" - ) - return 0.0 - - # ── GUARD 1: Numeral protection (Phase 1, BUG-011/012/E1) ── - # Reject corrections that remove/change/introduce digits. - # Numeral hallucination is a complete-replacement failure mode. - _DIGITS = set('0123456789٠١٢٣٤٥٦٧٨٩') - if any(c in _DIGITS for c in orig_word): - return 0.0 # Never "correct" text containing numerals - if any(c in _DIGITS for c in corr_word): - return 0.0 # Never introduce digits that weren't in original - - # ── GUARD 2: Directional confusable-word rules (Phase 1, BUG-004/005/E4) ── - # For known function words, only allow corrections TOWARD the valid form. - # This prevents meaning-changing substitutions that pass orthographic checks. - # - # ── B5 KNOWN LIMITATION (BUG-025/026): Shadda Duplication ── - # AraSpell duplicates shadda-bearing words in ISOLATION: إنّ→إن إن, أنّ→أن أن. - # In sentence context (e.g., "إنّ العلم نور"), the model handles shadda correctly. - # This is an isolation-only AraSpell quirk — no pipeline filter needed. - # _DIRECTIONAL_BLOCKS is defined at module level (line ~100) - if corr_word in _DIRECTIONAL_BLOCKS.get(orig_word, set()): - return 0.0 - - # Check with common prefixes stripped (و+كان→و+كأن etc.) - _CLITIC_PREFIXES = ('و', 'ف', 'ب', 'ل', 'ك') - for _pfx in _CLITIC_PREFIXES: - if (orig_word.startswith(_pfx) and corr_word.startswith(_pfx) - and len(orig_word) > len(_pfx) + 1): - _orig_stem = orig_word[len(_pfx):] - _corr_stem = corr_word[len(_pfx):] - if _corr_stem in _DIRECTIONAL_BLOCKS.get(_orig_stem, set()): - return 0.0 - - # ── FIX-30: Prefix-stripping protection ── - # Block corrections that strip a clitic prefix from a valid compound: - # وبالمستشفيات → والمستشفيات (stripped ب from وب prefix chain) - # فبالتالي → وبالتالي (swapped ف→و) - # These destroy the meaning of the prefix (بال = by the, و = and, ف = so/then) - _COMPOUND_PREFIXES = ['وبال', 'فبال', 'وال', 'فال', 'بال', 'كال', 'ول', 'فل', - 'وب', 'فب', 'وك', 'فك'] - for _cpfx in _COMPOUND_PREFIXES: - if orig_word.startswith(_cpfx) and len(orig_word) > len(_cpfx) + 2: - if not corr_word.startswith(_cpfx): - # Original has compound prefix but correction doesn't — check if - # the stem is the same (meaning only the prefix was stripped) - _stem = orig_word[len(_cpfx):] - for _alt_pfx in _COMPOUND_PREFIXES + list(_CLITIC_PREFIXES) + ['ال', '']: - if corr_word.startswith(_alt_pfx): - _corr_stem2 = corr_word[len(_alt_pfx):] - if _stem == _corr_stem2 or _levenshtein(_stem, _corr_stem2) <= 1: - return 0.0 - break # Only check the longest matching prefix - - # Ignore tokens that contain non-letters (numbers / punctuation) - # Arabic letters range plus basic Latin letters. - if re.search(r'[^ء-يآأإىa-zA-Z]', orig_word): - return 0.0 - if re.search(r'[^ء-يآأإىa-zA-Z]', corr_word): - return 0.0 - - # Fix S2: Reject corrections that drop feminine marker (ه/ة) - # e.g. بارده→بارد, منخفظه→منخفض — these are WORSE than no correction - feminine_endings = ('ه', 'ة') - if orig_word.endswith(feminine_endings) and not corr_word.endswith(feminine_endings): - # Only reject if the correction is just the word minus the ending - if corr_word == orig_word[:-1] or len(corr_word) < len(orig_word): - return 0.0 - - # ── FIX-41: Block corrections that ADD trailing ا/ي to IV words ── - # Model sometimes adds accusative markers: واجب→واجبا, معطف→معطفا. - # If the original word is IV and the correction just appends a letter, reject. - if vocab_manager and len(corr_word) == len(orig_word) + 1 and corr_word.startswith(orig_word): - _appended_char = corr_word[-1] - if _appended_char in ('ا', 'ي', 'و') and vocab_manager.is_iv(orig_word): - logger.info( - f"[SPELLING] Blocked trailing '{_appended_char}' addition: " - f"'{orig_word}'→'{corr_word}' (original is IV)" - ) - return 0.0 - - # CRITICAL: If both words are valid Arabic words, only accept known fixes. - # This prevents the spelling model from changing one correct word to another - # (e.g. وكان→وكأن, which changes "and was" to "as if" — a meaning change). - if vocab_manager: - orig_iv = vocab_manager.is_iv(orig_word) - corr_iv = vocab_manager.is_iv(corr_word) - if orig_iv and corr_iv: - # Both are valid words — only accept known orthographic fixes: - # 1. ه→ة at word end (feminine marker fix) - # B3 (BUG-014/015): EXCEPT when ه is a pronoun suffix (preceded by ت). - # Pattern: verb+ته = "verb + him/it", NOT ta marbuta. - # E.g., فتأملته (fataamaltahu) → فتأملتة is WRONG. - if (orig_word.endswith('ه') and corr_word.endswith('ة') - and orig_word[:-1] == corr_word[:-1]): - # FIX-38: Expanded pronoun suffix guard. - # ه at end can be: (a) ta marbuta (should be ة) OR (b) pronoun "him/it". - # The old guard only blocked ته. But كله (كل+ه), احبه (احب+ه), - # عنده (عند+ه) are ALL pronoun suffixes — the ه is NOT ta marbuta. - # Strategy (from legacy AraSpell WordAligner): if the STEM (word without ه) - # is itself IV, then ه is likely a pronoun suffix → block the change. - # If the stem is NOT IV, ه is likely a misspelled ة → allow. - # - # FIX-50: Whitelist bypass — known feminine nouns always allowed. - # BERT vocab includes subword fragments (الحكوم, المدرس) as IV, - # causing false pronoun detection. These known words bypass the guard. - _KNOWN_FEMININE = { - 'الحكومه', 'المدرسه', 'الشركه', 'الجامعه', 'المدينه', - 'القصه', 'المكتبه', 'الطائره', 'الوزاره', 'المديره', - 'المعلمه', 'الطالبه', 'القريه', 'الحديقه', 'المحكمه', - 'المنطقه', 'الدوله', 'السياره', 'الغرفه', 'المحطه', - 'الوظيفه', 'العائله', 'الحياه', 'الصلاه', - 'حكومه', 'مدرسه', 'شركه', 'جامعه', 'مدينه', - 'قصه', 'مكتبه', 'طائره', 'وزاره', 'مديره', - 'معلمه', 'طالبه', 'قريه', 'حديقه', 'محكمه', - 'منطقه', 'دوله', 'سياره', 'غرفه', 'محطه', - 'وظيفه', 'عائله', 'حياه', 'صلاه', - } - if orig_word in _KNOWN_FEMININE: - return 0.9 - stem = orig_word[:-1] - if len(stem) >= 2 and vocab_manager.is_iv(stem): - logger.info( - f"[SPELLING] Blocked ه→ة (pronoun suffix): " - f"'{orig_word}'→'{corr_word}' (stem '{stem}' is IV → ه is pronoun)" - ) - return 0.0 - return 0.9 - # 2. ة→ه at word end (less common but valid) - if (orig_word.endswith('ة') and corr_word.endswith('ه') - and orig_word[:-1] == corr_word[:-1]): - return 0.9 - # 3. Word is in the hamza whitelist (known common errors) - # CRITICAL (Phase 5 fix, BUG-016/027): only accept if the correction - # MATCHES the whitelist target — not any arbitrary correction. - # FIX-02: This check now ALWAYS accepts whitelist matches, bypassing IV-IV guard. - from nlp.spelling.araspell_rules import AraSpellPostProcessor - if orig_word in AraSpellPostProcessor.HAMZA_WHITELIST: - expected = AraSpellPostProcessor.HAMZA_WHITELIST[orig_word] - if corr_word == expected: - return 0.9 - else: - logger.info( - f"[SPELLING] Whitelist mismatch: '{orig_word}'→'{corr_word}' " - f"(expected '{expected}') — rejected" - ) - return 0.0 - # 4. Check prefixed hamza (و+whitelist word, etc.) - for prefix in AraSpellPostProcessor.HAMZA_PREFIXES: - if orig_word.startswith(prefix) and len(orig_word) > len(prefix) + 1: - remainder = orig_word[len(prefix):] - if remainder in AraSpellPostProcessor.HAMZA_WHITELIST: - expected = prefix + AraSpellPostProcessor.HAMZA_WHITELIST[remainder] - if corr_word == expected: - return 0.9 - else: - logger.info( - f"[SPELLING] Prefixed whitelist mismatch: '{orig_word}'→'{corr_word}' " - f"(expected '{expected}') — rejected" - ) - return 0.0 - # 5. FIX-02: Alif maqsura fix (ي↔ى at end) — both IV but correction is valid - if (orig_word.endswith('ي') and corr_word.endswith('ى') - and orig_word[:-1] == corr_word[:-1]): - return 0.85 - if (orig_word.endswith('ى') and corr_word.endswith('ي') - and orig_word[:-1] == corr_word[:-1]): - return 0.85 - # ── Phase 12 (A7): Vocab-aware IV-IV override ── - # Allow keyboard-adjacent single edits when correction is significantly - # more common. Prevents blocking genuine typos where both happen to be IV. - if len(orig_word) == len(corr_word): - from nlp.spelling.araspell_rules import RulesBasedCorrector - edit_dist = _levenshtein(orig_word, corr_word) - if edit_dist == 1: - orig_rank = vocab_manager.get_frequency_rank(orig_word) - corr_rank = vocab_manager.get_frequency_rank(corr_word) - if corr_rank < orig_rank and corr_rank < 5000: - # Check keyboard proximity for extra safety - for a, b in zip(orig_word, corr_word): - if a != b: - if RulesBasedCorrector.is_keyboard_neighbor(a, b): - logger.info( - f"[SPELLING] Vocab-override (IV-IV): " - f"'{orig_word}'(rank={orig_rank})→" - f"'{corr_word}'(rank={corr_rank}) " - f"keyboard-adjacent '{a}'→'{b}'" - ) - return 0.5 - break - # 6. FIX-49: Trailing و removal (المصنعو→المصنع) - # Common model artifact — original has trailing و that should be removed - if (orig_word.endswith('و') and corr_word == orig_word[:-1] - and len(corr_word) >= 3): - return 0.8 - # 7. FIX-49b: Trailing و→وا (حضرو→حضروا) - # Missing alif after waw al-jama'a - if (orig_word.endswith('و') and corr_word == orig_word + 'ا' - and len(orig_word) >= 3): - return 0.8 - # Both are valid words and change is NOT a known fix — REJECT - # This prevents وكان→وكأن, etc. - return 0.0 - - dist = _levenshtein(orig_word, corr_word) - max_len = max(len(orig_word), len(corr_word)) - - # Tighter filter for OOV words: reject edits that change word roots - # Allow max 2 edits at max 50% of word length - if dist > 2 or (dist / max_len) > 0.5: - return 0.0 - - # CRITICAL: Only allow ORTHOGRAPHIC fixes (ه↔ة, ا↔أ↔إ↔آ, ي↔ى). - # Any other letter change means the word's ROOT is different - # (e.g. عضلية→عملية ض→م = completely different word!) - ORTHO_PAIRS = { - ('ه', 'ة'), ('ة', 'ه'), - ('ا', 'أ'), ('أ', 'ا'), ('ا', 'إ'), ('إ', 'ا'), ('ا', 'آ'), ('آ', 'ا'), - ('ي', 'ى'), ('ى', 'ي'), - ('ؤ', 'و'), ('و', 'ؤ'), # hamza on waw - ('ئ', 'ي'), ('ي', 'ئ'), # hamza on ya - ('ء', 'أ'), ('أ', 'ء'), # standalone hamza ↔ hamza on alef - ('ء', 'ؤ'), ('ؤ', 'ء'), # standalone hamza ↔ hamza on waw - ('ء', 'ئ'), ('ئ', 'ء'), # standalone hamza ↔ hamza on ya - } - # ── Phase 12 (A2): Phonetically confusable pairs ── - # Arabic letters commonly confused due to similar pronunciation. - # From AraSpell.py ContextualCorrector.CONFUSION_PAIRS. - PHONETIC_PAIRS = { - ('ض', 'ظ'), ('ظ', 'ض'), # emphatic d/z - ('ذ', 'ز'), ('ز', 'ذ'), # z variants - ('ص', 'س'), ('س', 'ص'), # s variants - ('ط', 'ت'), ('ت', 'ط'), # t variants - ('ق', 'ك'), ('ك', 'ق'), # k/q variants - ('د', 'ض'), ('ض', 'د'), # d/emphatic-d - ('غ', 'ق'), ('ق', 'غ'), # gh/q - } - - from nlp.spelling.araspell_rules import RulesBasedCorrector - - # ── Phase 13: Adjacent character transposition detection ── - # Transpositions (e.g., العصوبات→الصعوبات) have Levenshtein=2 but are a - # single adjacent swap. Detect and accept when OOV→IV. - if len(orig_word) == len(corr_word) and dist == 2: - _transposition_found = False - for _ti in range(len(orig_word) - 1): - if (orig_word[_ti] == corr_word[_ti + 1] and - orig_word[_ti + 1] == corr_word[_ti] and - orig_word[:_ti] == corr_word[:_ti] and - orig_word[_ti + 2:] == corr_word[_ti + 2:]): - _transposition_found = True - break - if _transposition_found: - if vocab_manager: - _orig_oov = not vocab_manager.is_iv(orig_word) - _corr_iv = vocab_manager.is_iv(corr_word) - if _orig_oov and _corr_iv: - logger.info( - f"[SPELLING] Transposition accepted (OOV→IV): " - f"'{orig_word}'→'{corr_word}'" - ) - return 0.6 # Dampened confidence for transpositions - elif _orig_oov and not _corr_iv: - # Both OOV — still accept transposition with lower confidence - logger.info( - f"[SPELLING] Transposition accepted (OOV→OOV): " - f"'{orig_word}'→'{corr_word}' (low confidence)" - ) - return 0.5 - else: - return 0.6 # No vocab manager — accept with dampened confidence - - # ── Phase 13: Single character insertion detection ── - # When the original has one extra character (user typed an extra letter), - # e.g., الكتتاب→الكتاب (extra ت). Levenshtein=1, lengths differ by 1. - if len(orig_word) == len(corr_word) + 1 and dist == 1: - # Find where the extra character is in orig_word - _insertion_valid = False - for _di in range(len(orig_word)): - # Try removing character at position _di from orig_word - _candidate = orig_word[:_di] + orig_word[_di + 1:] - if _candidate == corr_word: - _insertion_valid = True - break - if _insertion_valid: - if vocab_manager: - _orig_oov = not vocab_manager.is_iv(orig_word) - _corr_iv = vocab_manager.is_iv(corr_word) - if _orig_oov and _corr_iv: - # FIX-35: Don't strip verb conjugation suffixes. - # Only block ن (feminine plural: ذهبن→ذهب) and - # ت (feminine past: كتبت→كتب) — these are the - # suffixes grammar commonly adds that spelling - # would try to strip. Other endings (ة,ا,ي,و,ه) - # are more likely genuine typos than grammar fixes. - _CONJUGATION_SUFFIXES = {'ن', 'ت'} - _removed_char = None - for _di2 in range(len(orig_word)): - if orig_word[:_di2] + orig_word[_di2 + 1:] == corr_word: - _removed_char = orig_word[_di2] - _removed_pos = _di2 - break - if (_removed_char in _CONJUGATION_SUFFIXES - and _removed_pos == len(orig_word) - 1 - and len(corr_word) >= 3): - logger.info( - f"[SPELLING] Rejected suffix strip: " - f"'{orig_word}'→'{corr_word}' " - f"(removing suffix '{_removed_char}' likely strips conjugation)" - ) - return 0.0 - logger.info( - f"[SPELLING] Insertion fix accepted (OOV→IV): " - f"'{orig_word}'→'{corr_word}' (extra char removed)" - ) - return 0.7 - else: - return 0.6 - - # ── Phase 13: Single character deletion detection ── - # When the original is missing one character (user missed a key), - # e.g., الكتب→الكتاب (missing ا). Levenshtein=1, lengths differ by 1. - if len(corr_word) == len(orig_word) + 1 and dist == 1: - # Find where the missing character should be in corr_word - _deletion_valid = False - for _di in range(len(corr_word)): - # Try removing character at position _di from corr_word - _candidate = corr_word[:_di] + corr_word[_di + 1:] - if _candidate == orig_word: - _deletion_valid = True - break - if _deletion_valid: - if vocab_manager: - _orig_oov = not vocab_manager.is_iv(orig_word) - _corr_iv = vocab_manager.is_iv(corr_word) - if _orig_oov and _corr_iv: - logger.info( - f"[SPELLING] Deletion fix accepted (OOV→IV): " - f"'{orig_word}'→'{corr_word}' (missing char added)" - ) - return 0.7 - else: - return 0.6 - - # Check every character pair — reject if ANY non-orthographic change - if len(orig_word) != len(corr_word): - # Length change = structural change, not just orthographic - # Exception: if diff is just adding/removing ا at start (hamza) - if abs(len(orig_word) - len(corr_word)) > 1: - return 0.0 - - # ── FIX: Block Grammar Changes masked as Spelling Typos (Dual → Plural) ── - if orig_word.endswith('ان') and corr_word.endswith('ات') and orig_word[:-2] == corr_word[:-2]: - logger.info( - f"[SPELLING] Blocked grammatical change (Dual→Plural): " - f"'{orig_word}'→'{corr_word}'" - ) - return 0.0 - - # ── Phase 12 (A1): Keyboard-neighbor and phonetic acceptance ── - # Check each differing character: ortho → full accept, keyboard/phonetic → dampened - _has_keyboard_or_phonetic = False - for a, b in zip(orig_word, corr_word): - if a != b: - if (a, b) in ORTHO_PAIRS: - continue # Orthographic — fully accepted - elif RulesBasedCorrector.is_keyboard_neighbor(a, b) or (a, b) in PHONETIC_PAIRS: - _has_keyboard_or_phonetic = True # Mark for dampened confidence - else: - return 0.0 # Not ortho, not keyboard, not phonetic → reject - # If we reached here, all diffs are ortho or keyboard/phonetic - if _has_keyboard_or_phonetic: - logger.info( - f"[SPELLING] Keyboard/phonetic typo accepted: " - f"'{orig_word}'→'{corr_word}' (dampened to 0.6)" - ) - return 0.6 # Dampened confidence for keyboard/phonetic typos - - # ── B3 (BUG-014/015): Pronoun suffix guard (OOV path) ── - # Same guard as IV-IV path: block ه→ة when preceded by ت - if (orig_word.endswith('ه') and corr_word.endswith('ة') - and len(orig_word) >= 3 and orig_word[-2] == 'ت' - and orig_word[:-1] == corr_word[:-1]): - logger.info( - f"[SPELLING] Blocked ه→ة at pronoun suffix (OOV path): " - f"'{orig_word}'→'{corr_word}'" - ) - return 0.0 - - # ── Phase 2 (BUG-034/035/036/037/E8): Confidence dampening ── - # If the original word might be a valid rare word (OOV in model but - # potentially real Arabic), dampen confidence so users can reject easily. - if vocab_manager: - orig_iv = vocab_manager.is_iv(orig_word) - corr_iv = vocab_manager.is_iv(corr_word) - - # Phase 2.2: Use frequency rank if available. - # If the original word is a known word (even rare), require a - # meaningfully higher confidence bar before replacing it. - orig_rank = vocab_manager.get_frequency_rank(orig_word) # 999999 if unknown - corr_rank = vocab_manager.get_frequency_rank(corr_word) # 999999 if unknown - if orig_iv and corr_iv and orig_rank < 999999: - # Original is a known ranked word — correction should be more common - # If correction is rarer or similarly ranked, dampen confidence - if corr_rank >= orig_rank: - logger.info( - f"[SPELLING] Dampened (freq): '{orig_word}'(rank={orig_rank})" - f"→'{corr_word}'(rank={corr_rank}) — corr not more common" - ) - return 0.5 - - if not orig_iv and corr_iv: - # OOV→IV: original might be a rare word being "corrected" to common - # Dampen confidence to 0.5 (lower than normal 0.9) - logger.info( - f"[SPELLING] Dampened confidence: '{orig_word}'→'{corr_word}' " - f"(OOV→IV, possible rare word)" - ) - return 0.5 - - # ── B2 (BUG-006/009/010/013): Hamza-removal dampening ── - # Hamza changes (أ→ا, إ→ا, ء→ا, etc.) between same-length words are - # ambiguous — could be a valid fix OR a corruption. Always dampen these - # to 0.5 regardless of vocab_manager status. This prevents BUG-009 - # (قرأ→قرا) and BUG-013 (خطأ→خطا) from leaking at full confidence. - _HAMZA_CHARS = set('أإآؤئء') - if len(orig_word) == len(corr_word): - has_hamza_diff = False - for a, b in zip(orig_word, corr_word): - if a != b: - if a in _HAMZA_CHARS or b in _HAMZA_CHARS: - has_hamza_diff = True - else: - has_hamza_diff = False - break # Non-hamza difference, don't apply this guard - if has_hamza_diff: - logger.info( - f"[SPELLING] Dampened (hamza-only): '{orig_word}'→'{corr_word}'" - ) - return 0.5 - - return 0.9 - - -def _is_spelling_only_change(original: str, correction: str) -> bool: - """ - Detect if a grammar model's correction is actually a spelling/orthographic fix - (hamza, ه→ة, ا→أ, etc.) rather than a true grammar change. - - Used to re-label grammar patches as 'spelling' for correct UI icons. - """ - if not original or not correction: - return False - - # Normalize: strip diacritics for comparison - import re as _re - strip_diacritics = lambda t: _re.sub(r'[\u064B-\u065F\u0670]', '', t) - o = strip_diacritics(original) - c = strip_diacritics(correction) - - if o == c: - return True # Only diacritical difference - - # Check word-by-word for single-word changes - o_words = o.split() - c_words = c.split() - - if len(o_words) != len(c_words): - return False # Word count changed = grammar (word split/merge) - - all_spelling = True - for ow, cw in zip(o_words, c_words): - if ow == cw: - continue - if _is_orthographic_variant(ow, cw): - continue - all_spelling = False - break - - return all_spelling - - -def _is_orthographic_variant(word1: str, word2: str) -> bool: - """ - Check if two words differ only by common Arabic orthographic variations: - - Hamza placement: ا↔أ↔إ↔آ, ى↔ي, ه↔ة - - These are spelling differences, not grammar. - """ - if len(word1) != len(word2): - # Allow ه→ة at end (same length since both are 1 char) - # But also allow small length diffs for hamza additions - if abs(len(word1) - len(word2)) > 1: - return False - # Check if only difference is a trailing ة↔ه - if (word1[:-1] == word2[:-1] and - {word1[-1], word2[-1]} <= {'ه', 'ة'}): - return True - return False - - # Same length: check char-by-char - SPELLING_EQUIVALENCES = { - frozenset({'ا', 'أ'}), frozenset({'ا', 'إ'}), frozenset({'ا', 'آ'}), - frozenset({'أ', 'إ'}), frozenset({'أ', 'آ'}), frozenset({'إ', 'آ'}), - frozenset({'ى', 'ي'}), frozenset({'ه', 'ة'}), - frozenset({'ؤ', 'و'}), frozenset({'ئ', 'ي'}), frozenset({'ئ', 'ء'}), - } - diff_count = 0 - for c1, c2 in zip(word1, word2): - if c1 == c2: - continue - if frozenset({c1, c2}) in SPELLING_EQUIVALENCES: - diff_count += 1 - else: - return False # Non-orthographic difference = grammar - return diff_count > 0 # At least one orthographic difference - - -@app.route('/api/dialect', methods=['POST']) -def convert_dialect(): - """ - Convert dialect Arabic text to Modern Standard Arabic (MSA). - - Request JSON: - { - "text": "عايز اشتكي من موظف في فرعكم" - } - - Response JSON: - { - "status": "success", - "original_text": "...", - "converted_text": "..." - } - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - text = data.get('text', '').strip() - - if not text: - return jsonify({'error': 'Text is required', 'status': 'error'}), 400 - - if len(text) > MAX_TEXT_LENGTH: - return jsonify({ - 'error': f'Text too long. Maximum {MAX_TEXT_LENGTH} characters.', - 'status': 'error' - }), 400 - - logger.info(f"[DIALECT] Conversion request: text_length={len(text)}") - - from nlp.dialect.dialect_service import get_dialect_model - converter = get_dialect_model() - t0 = time.time() - result = converter.convert(text) - elapsed = int((time.time() - t0) * 1000) - - logger.info(f"[DIALECT] {elapsed}ms | input='{text[:80]}' | output='{result[:80]}'") - - return jsonify({ - 'original_text': text, - 'converted_text': result, - 'status': 'success' - }), 200 - - except RuntimeError as e: - logger.error(f"Dialect model error: {e}") - return jsonify({ - 'error': f'Dialect model unavailable: {str(e)[:200]}', - 'status': 'error' - }), 503 - except Exception as e: - logger.error(f"Error during dialect conversion: {e}") - logger.error(traceback.format_exc()) - return jsonify({ - 'error': 'An error occurred during dialect conversion.', - 'status': 'error', - 'details': str(e) if app.debug else None - }), 500 - - -@app.route('/api/quran', methods=['POST']) -def quran_verify(): - """ - Quran text verification and translation. - Accepts: {text: str, language: str (optional, default='تدقيق الايات')} - Returns: {matched_segment, full_verse} or {error} - """ - try: - if not logger_quran_ok: - return jsonify({'error': 'Quran search module not available'}), 503 - - data = request.get_json(force=True) - text = data.get('text', '').strip() - language = data.get('language', 'تدقيق الايات').strip() - - if not text: - return jsonify({'error': 'النص المُدخل فارغ'}), 400 - - if len(text) > 2000: - return jsonify({'error': 'النص طويل جداً (الحد الأقصى 2000 حرف)'}), 400 - - app.logger.info(f'[QURAN] Query: "{text[:60]}..." lang={language}') - start_time = time.time() - - result = search_bayan(text, target_type=language) - - elapsed = int((time.time() - start_time) * 1000) - app.logger.info(f'[QURAN] Done in {elapsed}ms') - - if 'error' in result: - return jsonify(result), 404 - - return jsonify(result) - - except Exception as e: - app.logger.error(f'[QURAN] Error: {e}') - app.logger.error(traceback.format_exc()) - return jsonify({'error': 'حدث خطأ أثناء البحث في القرآن الكريم'}), 500 - - -@app.route('/api/analyze', methods=['POST']) -def analyze_text(): - """ - Perform sequential analysis (Spelling -> Grammar -> Punctuation) - and return word-level suggestions with offsets. - """ - try: - if not request.is_json: - return jsonify({'error': 'Request must be JSON', 'status': 'error'}), 400 - - data = request.get_json() - text = data.get('text', '').strip() - - if not text: - return jsonify({'error': 'Text is required', 'status': 'error'}), 400 - - # ── Input Sanitization (Fix 3: prevent pathological model inputs) ── - # Strip HTML tags — prevents AraSpell from doing exhaustive edit-distance - # on tag characters like + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + تخطّي إلى المحرر
@@ -1186,1130 +1171,6 @@ -