| const loginPanel = document.querySelector("#loginPanel"); |
| const workspace = document.querySelector("#workspace"); |
| const loginForm = document.querySelector("#loginForm"); |
| const codeInput = document.querySelector("#codeInput"); |
| const loginError = document.querySelector("#loginError"); |
| const logoutButton = document.querySelector("#logoutButton"); |
| const uploadForm = document.querySelector("#uploadForm"); |
| const pdfInput = document.querySelector("#pdfInput"); |
| const fileName = document.querySelector("#fileName"); |
| const createButton = document.querySelector("#createButton"); |
| const voiceSelect = document.querySelector("#voiceSelect"); |
| const speedSelect = document.querySelector("#speedSelect"); |
| const ocrModeSelect = document.querySelector("#ocrModeSelect"); |
| const pageLimitSelect = document.querySelector("#pageLimitSelect"); |
| const uploadError = document.querySelector("#uploadError"); |
| const engineNotice = document.querySelector("#engineNotice"); |
| const jobTitle = document.querySelector("#jobTitle"); |
| const jobState = document.querySelector("#jobState"); |
| const progressBar = document.querySelector("#progressBar"); |
| const progressMeter = document.querySelector(".meter"); |
| const progressPercent = document.querySelector("#progressPercent"); |
| const stageLabel = document.querySelector("#stageLabel"); |
| const stagePhaseLabel = document.querySelector("#stagePhaseLabel"); |
| const stageDetailTitle = document.querySelector("#stageDetailTitle"); |
| const stageDetailText = document.querySelector("#stageDetailText"); |
| const stageItemProgress = document.querySelector("#stageItemProgress"); |
| const stageItemLabel = document.querySelector("#stageItemLabel"); |
| const stageItemValue = document.querySelector("#stageItemValue"); |
| const stageItemBar = document.querySelector("#stageItemBar"); |
| const stageMetaText = document.querySelector("#stageMetaText"); |
| const stageList = document.querySelector("#stageList"); |
| const jobMessage = document.querySelector("#jobMessage"); |
| const qualityHint = document.querySelector("#qualityHint"); |
| const pagesValue = document.querySelector("#pagesValue"); |
| const charactersValue = document.querySelector("#charactersValue"); |
| const engineValue = document.querySelector("#engineValue"); |
| const extractionValue = document.querySelector("#extractionValue"); |
| const qualityValue = document.querySelector("#qualityValue"); |
| const audioFormatValue = document.querySelector("#audioFormatValue"); |
| const audioPanel = document.querySelector("#audioPanel"); |
| const audioPlayer = document.querySelector("#audioPlayer"); |
| const downloadLink = document.querySelector("#downloadLink"); |
| const browserSpeechPanel = document.querySelector("#browserSpeechPanel"); |
| const browserSpeechMessage = document.querySelector("#browserSpeechMessage"); |
| const browserSpeakButton = document.querySelector("#browserSpeakButton"); |
| const browserStopButton = document.querySelector("#browserStopButton"); |
| const historyPanel = document.querySelector("#historyPanel"); |
| const historyList = document.querySelector("#historyList"); |
|
|
| let pollTimer = null; |
| let maxUploadMb = null; |
| let cloudMode = false; |
| let hostedShellMode = false; |
| let remoteWorkerMode = false; |
| let workerBaseUrl = ""; |
| let cloudTtsReady = false; |
| let cloudTtsMaxChars = 900; |
| let workerUnavailableDiagnostic = null; |
| let currentAudioUrl = null; |
| const defaultVoiceCatalog = { |
| default: "silma-local", |
| cloud: [ |
| { id: "mms-ara", label: "Arabic Standard" }, |
| { id: "silma-tts", label: "SILMA Arabic" }, |
| ], |
| local: [ |
| { id: "silma-local", label: "1. SILMA Arabic - Most natural" }, |
| { id: "espeak-ar-clear", label: "2. Local Arabic Clear - Fast fallback" }, |
| { id: "espeak-ar", label: "3. Local Arabic - Standard fallback" }, |
| { id: "espeak-ar-male", label: "Local Arabic Low" }, |
| ], |
| }; |
| let voiceCatalog = defaultVoiceCatalog; |
| let ocrWorker = null; |
| let pdfjsLib = null; |
| let tesseractCreateWorker = null; |
| let activeJobId = null; |
| let pendingUnlockCode = ""; |
| let sessionUnlockCode = ""; |
| let installedArabicOcrModels = []; |
| let browserOcrProgress = { page: 0, total: 0 }; |
| let browserSpeechText = ""; |
| let browserSpeechSourceName = ""; |
|
|
| const ocrModeLabels = { |
| tesseract: "1. Tesseract Arabic - Best readable", |
| "tesseract-fast": "2. Tesseract Arabic - Faster readable", |
| paddleocr: "3. PaddleOCR Arabic - Faster fallback", |
| "arabic-max": "Maximum Arabic OCR - slower", |
| arabic: "Arabic OCR comparison - slower", |
| "qari-ocr": "QARI Arabic books (best)", |
| "tawkeed-ocr": "Tawkeed Arabic OCR", |
| "katib-ocr": "KATIB Arabic OCR (lighter)", |
| "arabic-qwen-ocr": "Arabic-Qwen OCR", |
| "arabic-glm-ocr": "Arabic-GLM OCR v2", |
| "baseer-ocr": "Baseer Arabic OCR", |
| "paddleocr-vl": "PaddleOCR-VL heavy", |
| best: "Best scan test", |
| surya: "Surya heavy OCR", |
| easyocr: "General Arabic OCR", |
| auto: "Auto fallback", |
| }; |
|
|
| const qualityLabels = { |
| good: "Good", |
| warning: "Check", |
| poor: "Poor", |
| }; |
|
|
| const pageLimitLabels = { |
| 0: "Full book", |
| 5: "Quick test", |
| 10: "Longer test", |
| }; |
|
|
| const speedLabels = { |
| 0.9: "Slower", |
| 1: "Normal", |
| 1.15: "Faster", |
| }; |
|
|
| const mixedPdfOcrMissingPageRatio = 0.15; |
|
|
| const statusLabels = { |
| queued: "Queued", |
| reading: "Reading", |
| speaking: "Creating", |
| complete: "Ready", |
| failed: "Failed", |
| }; |
|
|
| const defaultStageSteps = [ |
| { id: "upload", label: "Upload" }, |
| { id: "text", label: "Text scan" }, |
| { id: "ocr", label: "Arabic OCR" }, |
| { id: "voice", label: "Voice" }, |
| { id: "ready", label: "Ready" }, |
| ]; |
| const progressRanges = { |
| textStart: 5, |
| textEnd: 18, |
| ocrStart: 18, |
| ocrRenderEnd: 30, |
| ocrEnd: 72, |
| voiceStart: 72, |
| voiceEnd: 98, |
| }; |
|
|
| function rangeProgress(start, end, current, total) { |
| const safeTotal = Math.max(1, Number(total) || 1); |
| const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0)); |
| return Math.round(start + (safeCurrent / safeTotal) * (end - start)); |
| } |
|
|
| function setAuthenticated(authenticated) { |
| loginPanel.classList.toggle("hidden", authenticated); |
| workspace.classList.toggle("hidden", !authenticated); |
| logoutButton.classList.toggle("hidden", !authenticated); |
| if (authenticated) { |
| uploadError.textContent = ""; |
| loadHealth().then(loadHistory); |
| } |
| } |
|
|
| function setError(target, message) { |
| target.textContent = message || ""; |
| } |
|
|
| function formatDiagnosticMessage(diagnostic, fallback = "") { |
| const message = diagnostic?.message || fallback || ""; |
| const nextSteps = Array.isArray(diagnostic?.nextSteps) |
| ? diagnostic.nextSteps.filter(Boolean).slice(0, 3) |
| : []; |
| if (!nextSteps.length) return message; |
| return `${message} Next: ${nextSteps.join(" ")}`; |
| } |
|
|
| async function readJson(response) { |
| const payload = await response.json().catch(() => ({})); |
| if (!response.ok) { |
| const error = new Error(payload.detail || "Request failed"); |
| error.status = response.status; |
| throw error; |
| } |
| return payload; |
| } |
|
|
| function describeNetworkError(error) { |
| if (error?.name === "TypeError") { |
| if (remoteWorkerMode && workerBaseUrl) { |
| return ( |
| "The OCR/TTS worker could not be reached from this browser. The Space may be waking up, or the browser blocked the cross-site upload. " + |
| "Wait a minute, refresh, unlock with the code again, and retry." |
| ); |
| } |
| return "The site could not be reached. Check the connection and try again."; |
| } |
| return error?.message || "Request failed"; |
| } |
|
|
| function apiUrl(path) { |
| if (!remoteWorkerMode || path.startsWith("http")) return path; |
| return `${workerBaseUrl}${path}`; |
| } |
|
|
| function apiFetch(path, options = {}) { |
| const credentials = remoteWorkerMode ? "include" : "same-origin"; |
| return fetch(apiUrl(path), { credentials, ...options }); |
| } |
|
|
| async function checkSession() { |
| const payload = await fetch("/api/session").then(readJson); |
| setAuthenticated(payload.authenticated); |
| } |
|
|
| async function loadHealth() { |
| try { |
| let payload = await fetch("/api/health").then(readJson); |
| let engines = payload.engines; |
| hostedShellMode = engines.deployment?.platform === "vercel"; |
| workerBaseUrl = engines.deployment?.workerBaseUrl || ""; |
| remoteWorkerMode = Boolean(hostedShellMode && workerBaseUrl); |
| workerUnavailableDiagnostic = null; |
| let workerDiagnostic = null; |
|
|
| if (remoteWorkerMode) { |
| cloudMode = false; |
| cloudTtsReady = false; |
| maxUploadMb = payload.maxUploadMb || 512; |
| voiceCatalog = defaultVoiceCatalog; |
| renderVoiceOptions(); |
| engineNotice.textContent = "The OCR/TTS worker is connected. You can upload now while voice details finish checking."; |
| engineNotice.classList.remove("warning"); |
| } |
|
|
| if (remoteWorkerMode) { |
| await loginRemoteWorker(); |
| try { |
| payload = await apiFetch("/api/health").then(readJson); |
| } catch (error) { |
| if (error.status === 401) { |
| await requireWorkerUnlock(); |
| return; |
| } |
| workerDiagnostic = await loadWorkerDiagnostic(error); |
| workerUnavailableDiagnostic = workerDiagnostic; |
| remoteWorkerMode = false; |
| } |
| if (remoteWorkerMode) { |
| engines = payload.engines; |
| } |
| } else if (hostedShellMode && !workerBaseUrl) { |
| workerDiagnostic = await loadWorkerDiagnostic({ message: engines.deployment?.nextAction || "" }); |
| workerUnavailableDiagnostic = workerDiagnostic; |
| } |
|
|
| cloudMode = hostedShellMode && !remoteWorkerMode && !workerBaseUrl; |
| cloudTtsReady = Boolean(cloudMode && engines.cloudTts?.available); |
| cloudTtsMaxChars = engines.cloudTts?.maxChunkChars || 900; |
| voiceCatalog = engines.voices || voiceCatalog || defaultVoiceCatalog; |
| installedArabicOcrModels = engines.ocr?.arabicTrainedStack?.installed || []; |
| renderVoiceOptions(); |
| if (!cloudMode || remoteWorkerMode) { |
| setSelectValue(ocrModeSelect, engines.ocr?.preferred); |
| } |
| maxUploadMb = cloudMode ? (cloudTtsReady ? engines.cloudTts?.maxPdfMb || 512 : null) : payload.maxUploadMb; |
| ocrModeSelect.disabled = Boolean(cloudMode && !remoteWorkerMode); |
|
|
| const deploymentStatus = engines.deployment || {}; |
| if (workerDiagnostic) { |
| deploymentStatus.workerDiagnostic = workerDiagnostic; |
| } |
| const deploymentAction = deploymentStatus.nextAction || ""; |
|
|
| if (remoteWorkerMode) { |
| const label = engines.preferred === "silma" ? "SILMA Arabic voice" : "Arabic worker"; |
| engineNotice.textContent = |
| deploymentStatus.productionReady === false |
| ? deploymentAction || "The worker is connected, but Vercel production settings still need attention." |
| : `${label} is ready on the OCR/TTS worker. Uploads up to ${payload.maxUploadMb} MB are accepted.`; |
| engineNotice.classList.toggle("warning", deploymentStatus.productionReady === false); |
| return; |
| } |
|
|
| if (cloudMode && cloudTtsReady) { |
| engineNotice.textContent = |
| formatDiagnosticMessage(deploymentStatus.workerDiagnostic) || |
| "Temporary cloud voice test mode is enabled. For reliable downloadable audio on Vercel, add WORKER_BASE_URL and redeploy."; |
| engineNotice.classList.add("warning"); |
| return; |
| } |
|
|
| if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) { |
| engineNotice.textContent = |
| formatDiagnosticMessage(workerDiagnostic) || |
| "The OCR/TTS worker is configured but not reachable yet. Check the Hugging Face Space URL, CORS_ORIGINS, and Space status, then redeploy."; |
| engineNotice.classList.add("warning"); |
| return; |
| } |
|
|
| if (cloudMode) { |
| engineNotice.textContent = |
| formatDiagnosticMessage(deploymentStatus.workerDiagnostic) || |
| deploymentAction || |
| "Downloadable Vercel audio needs the OCR/TTS worker. Add WORKER_BASE_URL in Vercel, then redeploy."; |
| engineNotice.classList.add("warning"); |
| return; |
| } |
|
|
| if (engines.readyForArabic) { |
| const label = engines.preferred === "piper" ? "Neural Arabic voice" : "Arabic voice"; |
| const installedArabicStack = engines.ocr?.arabicTrainedStack?.installed || installedArabicOcrModels; |
| const installedArabicText = installedArabicStack.length |
| ? ` Installed Arabic OCR: ${installedArabicStack.join(", ")}.` |
| : ""; |
| const ocrLabel = |
| engines.ocr?.preferred === "arabic-max" |
| ? "Maximum Arabic OCR is ready, but slower" |
| : engines.ocr?.preferred === "arabic" |
| ? "Arabic OCR comparison is ready, but slower" |
| : engines.ocr?.preferred === "qari-ocr" |
| ? "QARI Arabic book OCR is ready" |
| : engines.ocr?.preferred === "tawkeed-ocr" |
| ? "Tawkeed Arabic OCR is ready" |
| : engines.ocr?.preferred === "katib-ocr" |
| ? "KATIB Arabic OCR is ready" |
| : engines.ocr?.preferred === "arabic-qwen-ocr" |
| ? "Arabic-Qwen OCR is ready" |
| : engines.ocr?.preferred === "arabic-glm-ocr" |
| ? "Arabic-GLM OCR is ready" |
| : engines.ocr?.preferred === "baseer-ocr" |
| ? "Baseer Arabic OCR is ready" |
| : engines.ocr?.preferred === "paddleocr" |
| ? "PaddleOCR Arabic is ready, but less readable" |
| : engines.ocr?.preferred === "paddleocr-vl" |
| ? "PaddleOCR-VL heavy OCR is ready" |
| : engines.ocr?.preferred === "surya" |
| ? "Surya heavy OCR is ready" |
| : engines.ocr?.preferred === "tesseract" |
| ? "Rank 1 readable Tesseract Arabic OCR is ready" |
| : engines.ocr?.preferred === "tesseract-fast" |
| ? "Rank 2 faster Tesseract Arabic OCR is ready" |
| : engines.ocr?.preferred === "best" |
| ? "Best Arabic OCR test mode is ready" |
| : engines.ocr?.preferred |
| ? `${ocrModeLabels[engines.ocr.preferred] || "Arabic OCR"} is ready` |
| : "Arabic OCR is not ready"; |
| engineNotice.textContent = |
| `${label} is ready. ${ocrLabel}. Uploads up to ${payload.maxUploadMb} MB are accepted.${installedArabicText}`; |
| engineNotice.classList.remove("warning"); |
| return; |
| } |
|
|
| engineNotice.textContent = `A better Arabic voice is not installed yet. Upload limit: ${payload.maxUploadMb} MB.`; |
| engineNotice.classList.add("warning"); |
| } catch (error) { |
| engineNotice.textContent = "Unable to check the voice."; |
| engineNotice.classList.add("warning"); |
| } |
| } |
|
|
| async function loadWorkerDiagnostic(error) { |
| try { |
| const diagnostic = await fetch("/api/worker-diagnostics").then(readJson); |
| if (diagnostic?.message) return diagnostic; |
| } catch (_diagnosticError) { |
| |
| } |
| return { |
| status: "browser-worker-error", |
| reachable: false, |
| workerBaseUrl, |
| message: `The OCR/TTS worker could not be reached from this browser. ${error.message}`, |
| }; |
| } |
|
|
| async function requireWorkerUnlock() { |
| pendingUnlockCode = ""; |
| sessionUnlockCode = ""; |
| remoteWorkerMode = false; |
| cloudMode = false; |
| await fetch("/api/logout", { method: "POST" }).catch(() => null); |
| setAuthenticated(false); |
| setError(loginError, "Enter the code once more to unlock the OCR worker."); |
| engineNotice.textContent = "The OCR worker needs to be unlocked."; |
| engineNotice.classList.add("warning"); |
| } |
|
|
| async function loginRemoteWorker() { |
| const code = pendingUnlockCode || sessionUnlockCode; |
| if (!remoteWorkerMode || !code) return false; |
| const formData = new FormData(); |
| formData.append("code", code); |
| try { |
| await fetch(`${workerBaseUrl}/api/login`, { |
| method: "POST", |
| body: formData, |
| credentials: "include", |
| }).then(readJson); |
| pendingUnlockCode = ""; |
| return true; |
| } catch (_error) { |
| return false; |
| } |
| } |
|
|
| async function ensureWorkerSessionForUpload() { |
| if (!remoteWorkerMode) return true; |
| try { |
| const session = await apiFetch("/api/session").then(readJson); |
| if (session.authenticated) return true; |
| } catch (_error) { |
| |
| } |
| if (await loginRemoteWorker()) return true; |
| try { |
| const session = await apiFetch("/api/session").then(readJson); |
| return Boolean(session.authenticated); |
| } catch (_error) { |
| return false; |
| } |
| } |
|
|
| async function loadHistory() { |
| if (cloudMode && !remoteWorkerMode) { |
| historyPanel.classList.add("hidden"); |
| return; |
| } |
| try { |
| const payload = await apiFetch("/api/jobs").then(readJson); |
| const jobs = payload.jobs || []; |
| renderHistory(jobs); |
| resumeActiveJob(jobs); |
| } catch (error) { |
| historyPanel.classList.add("hidden"); |
| } |
| } |
|
|
| function renderHistory(jobs) { |
| const visibleJobs = jobs.filter((job) => job.filename); |
| historyPanel.classList.toggle("hidden", visibleJobs.length === 0); |
| historyList.innerHTML = ""; |
| for (const job of visibleJobs) { |
| const item = document.createElement("li"); |
| const details = document.createElement("div"); |
| const name = document.createElement("strong"); |
| const meta = document.createElement("span"); |
| name.textContent = job.filename; |
| meta.textContent = `${statusLabels[job.status] || job.status} - ${formatPages(job)}`; |
| if (job.pageLimit) { |
| meta.textContent += ` - ${pageLimitLabels[job.pageLimit] || `${job.pageLimit} pages`}`; |
| } |
| if (job.ttsSpeed && job.ttsSpeed !== 1) { |
| meta.textContent += ` - ${speedLabels[job.ttsSpeed] || `${job.ttsSpeed}x`}`; |
| } |
| if (job.ocrEngine && job.extraction && job.extraction !== "embedded") { |
| meta.textContent += ` - ${ocrModeLabels[job.ocrEngine] || job.ocrEngine}`; |
| } |
| if (job.textQuality) { |
| meta.textContent += ` - ${qualityLabels[job.textQuality] || job.textQuality}`; |
| } |
| if (job.audioFormat) { |
| meta.textContent += ` - ${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}`; |
| } |
| details.append(name, meta); |
| item.append(details); |
| const actions = document.createElement("div"); |
| actions.className = "history-actions"; |
| if (["queued", "reading", "speaking"].includes(job.status)) { |
| const watchButton = document.createElement("button"); |
| watchButton.type = "button"; |
| watchButton.className = "link-button"; |
| watchButton.textContent = "Watch"; |
| watchButton.addEventListener("click", () => startPolling(job.id)); |
| actions.append(watchButton); |
| } |
| if (["failed", "complete"].includes(job.status)) { |
| const settingsButton = document.createElement("button"); |
| settingsButton.type = "button"; |
| settingsButton.className = "link-button"; |
| settingsButton.textContent = "Use settings"; |
| settingsButton.addEventListener("click", () => applyJobSettings(job)); |
| actions.append(settingsButton); |
| } |
| if (job.downloadUrl) { |
| const link = document.createElement("a"); |
| link.href = apiUrl(job.downloadUrl); |
| link.textContent = "Download"; |
| actions.append(link); |
| } |
| if (actions.children.length) item.append(actions); |
| historyList.append(item); |
| } |
| } |
|
|
| function resumeActiveJob(jobs) { |
| if (activeJobId || pollTimer) return; |
| const activeJob = jobs.find((job) => ["queued", "reading", "speaking"].includes(job.status)); |
| if (activeJob) startPolling(activeJob.id); |
| } |
|
|
| function applyJobSettings(job) { |
| setSelectValue(voiceSelect, job.voiceId); |
| setSelectValue(speedSelect, String(job.ttsSpeed || 1)); |
| setSelectValue(ocrModeSelect, job.ocrEngine); |
| setSelectValue(pageLimitSelect, String(job.pageLimit || 0)); |
| describeOcrMode(); |
| setError(uploadError, `Settings loaded for ${job.filename || "that job"}. Choose the PDF again, then create audio.`); |
| pdfInput.focus(); |
| } |
|
|
| function setSelectValue(select, value) { |
| if (value === undefined || value === null || value === "") return; |
| const text = String(value); |
| if ([...select.options].some((option) => option.value === text)) { |
| select.value = text; |
| } |
| } |
|
|
| loginForm.addEventListener("submit", async (event) => { |
| event.preventDefault(); |
| setError(loginError, ""); |
| const formData = new FormData(); |
| pendingUnlockCode = codeInput.value.trim(); |
| sessionUnlockCode = pendingUnlockCode; |
| formData.append("code", pendingUnlockCode); |
| try { |
| await fetch("/api/login", { method: "POST", body: formData }).then(readJson); |
| codeInput.value = ""; |
| setAuthenticated(true); |
| } catch (error) { |
| setError(loginError, error.message); |
| } |
| }); |
|
|
| logoutButton.addEventListener("click", async () => { |
| if (remoteWorkerMode) { |
| await apiFetch("/api/logout", { method: "POST" }).catch(() => null); |
| } |
| await fetch("/api/logout", { method: "POST" }).then(readJson); |
| pendingUnlockCode = ""; |
| sessionUnlockCode = ""; |
| setAuthenticated(false); |
| }); |
|
|
| pdfInput.addEventListener("change", () => { |
| describeFile(pdfInput.files[0]); |
| }); |
|
|
| ocrModeSelect.addEventListener("change", () => { |
| describeOcrMode(); |
| }); |
|
|
| browserSpeakButton.addEventListener("click", () => { |
| speakBrowserSpeechText(); |
| }); |
|
|
| browserStopButton.addEventListener("click", () => { |
| if (canUseBrowserSpeech()) window.speechSynthesis.cancel(); |
| jobMessage.textContent = "Browser read-aloud stopped."; |
| }); |
|
|
| uploadForm.addEventListener("submit", async (event) => { |
| event.preventDefault(); |
| setError(uploadError, ""); |
| const file = pdfInput.files[0]; |
| if (!file) { |
| setError(uploadError, "Choose a PDF first."); |
| return; |
| } |
| if (!file.name.toLowerCase().endsWith(".pdf")) { |
| setError(uploadError, "Choose a PDF file."); |
| return; |
| } |
| if (maxUploadMb && file.size > maxUploadMb * 1024 * 1024) { |
| setError(uploadError, `This PDF is larger than ${maxUploadMb} MB.`); |
| return; |
| } |
|
|
| if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) { |
| setError( |
| uploadError, |
| formatDiagnosticMessage(workerUnavailableDiagnostic) || |
| "The OCR/TTS worker is configured but not reachable. Open the Hugging Face Space, confirm it is running, set CORS_ORIGINS to this Vercel URL, then redeploy.", |
| ); |
| return; |
| } |
|
|
| if (cloudMode && !remoteWorkerMode) { |
| setError( |
| uploadError, |
| formatDiagnosticMessage( |
| workerUnavailableDiagnostic, |
| "This Vercel site needs WORKER_BASE_URL before it can create downloadable Arabic audio. Deploy the Hugging Face Space worker, set Vercel WORKER_BASE_URL to that https://*.hf.space URL, remove the temporary Hugging Face cloud TTS variables, then redeploy.", |
| ), |
| ); |
| updateStage({ |
| id: "upload", |
| label: "Worker needed", |
| phase: "Vercel setup needed", |
| detail: "Downloadable audio on Vercel is created by the OCR/TTS worker, not the temporary cloud voice fallback.", |
| progress: 0, |
| steps: defaultStageSteps.map((step) => ({ ...step, state: "pending" })), |
| }); |
| return; |
| } |
|
|
| await createLocalAudio(file); |
| }); |
|
|
| uploadForm.addEventListener("dragover", (event) => { |
| event.preventDefault(); |
| uploadForm.classList.add("is-dragging"); |
| }); |
|
|
| uploadForm.addEventListener("dragleave", () => { |
| uploadForm.classList.remove("is-dragging"); |
| }); |
|
|
| uploadForm.addEventListener("drop", (event) => { |
| event.preventDefault(); |
| uploadForm.classList.remove("is-dragging"); |
| const file = event.dataTransfer.files[0]; |
| if (!file) return; |
| const transfer = new DataTransfer(); |
| transfer.items.add(file); |
| pdfInput.files = transfer.files; |
| describeFile(file); |
| }); |
|
|
| async function createLocalAudio(file) { |
| const formData = new FormData(); |
| formData.append("pdf", file); |
| formData.append("voice_id", voiceSelect.value); |
| formData.append("tts_speed", speedSelect.value); |
| formData.append("ocr_engine", ocrModeSelect.value); |
| formData.append("page_limit", pageLimitSelect.value); |
| resetJob(file.name, "Uploading PDF."); |
| createButton.disabled = true; |
| try { |
| if (remoteWorkerMode && !(await ensureWorkerSessionForUpload())) { |
| throw new Error("Enter the code again so this browser can upload directly to the OCR/TTS worker."); |
| } |
| const payload = await apiFetch("/api/jobs", { method: "POST", body: formData }).then(readJson); |
| startPolling(payload.jobId); |
| } catch (error) { |
| setError(uploadError, describeNetworkError(error)); |
| updateProgress(0); |
| setJobState("failed"); |
| createButton.disabled = false; |
| } |
| } |
|
|
| async function createCloudAudio(file) { |
| const browserSpeechAvailable = canUseBrowserSpeech(); |
| if (!cloudTtsReady && !browserSpeechAvailable) { |
| setError( |
| uploadError, |
| formatDiagnosticMessage( |
| workerUnavailableDiagnostic, |
| "Downloadable Vercel audio needs WORKER_BASE_URL. Browser read-aloud can be used for a short temporary test.", |
| ), |
| ); |
| return; |
| } |
|
|
| resetJob(file.name, "Reading PDF in this browser."); |
| createButton.disabled = true; |
| let speechTextForFallback = ""; |
| try { |
| const extracted = await extractPdfText(file, getSelectedPageLimit()); |
| const speechText = prepareTextForSpeech(extracted.text); |
| speechTextForFallback = speechText; |
| const chunks = chunkText(speechText, cloudTtsMaxChars); |
| const quality = assessTextQuality(extracted.text, speechText); |
| pagesValue.textContent = formatPages({ pages: extracted.pages, totalPages: extracted.totalPages }); |
| charactersValue.textContent = speechText.length.toLocaleString(); |
| engineValue.textContent = "cloud"; |
| extractionValue.textContent = extracted.extraction; |
| qualityValue.textContent = formatQuality(quality); |
| showQualityHint(quality); |
|
|
| if (!chunks.length || !quality.readyForTts) { |
| const reason = quality.reasons.length ? ` ${quality.reasons.join("; ")}.` : ""; |
| throw new Error(`Text quality is poor, so audio was not created.${reason}`); |
| } |
|
|
| if (!cloudTtsReady) { |
| showBrowserSpeechFallback( |
| speechText, |
| file.name, |
| formatDiagnosticMessage(workerUnavailableDiagnostic, "Downloadable Vercel audio needs WORKER_BASE_URL."), |
| ); |
| return; |
| } |
|
|
| audioFormatValue.textContent = "WAV"; |
| setJobState("speaking", "Creating"); |
| updateStage({ |
| id: "voice", |
| label: "Voice", |
| phase: "Creating voice", |
| detail: "Creating Arabic audio from the cleaned text.", |
| progress: progressRanges.voiceStart, |
| step: 4, |
| totalSteps: 5, |
| itemProgress: stageItem("Audio part", 0, chunks.length), |
| steps: stepStates("voice"), |
| }); |
|
|
| const audioBuffers = []; |
| for (let index = 0; index < chunks.length; index += 1) { |
| const label = `Creating audio part ${index + 1} of ${chunks.length}`; |
| jobMessage.textContent = label; |
| updateStage({ |
| id: "voice", |
| label: "Voice", |
| phase: "Creating audio parts", |
| detail: label, |
| progress: rangeProgress(progressRanges.voiceStart, progressRanges.voiceEnd, index + 1, chunks.length), |
| step: 4, |
| totalSteps: 5, |
| itemProgress: stageItem("Audio part", index + 1, chunks.length), |
| steps: stepStates("voice"), |
| }); |
| const response = await fetch("/api/cloud-tts", { |
| method: "POST", |
| headers: { "Content-Type": "application/json" }, |
| body: JSON.stringify({ text: chunks[index], voiceId: voiceSelect.value }), |
| }); |
| if (!response.ok) { |
| const payload = await response.json().catch(() => ({})); |
| throw new Error(payload.detail || "Cloud voice failed."); |
| } |
| audioBuffers.push(await response.arrayBuffer()); |
| } |
|
|
| const audioBlob = mergeWavBuffers(audioBuffers); |
| showAudio(audioBlob, file.name); |
| setJobState("complete"); |
| jobMessage.textContent = "Audio is ready."; |
| updateStage({ |
| id: "ready", |
| label: "Ready", |
| phase: "Audio ready", |
| detail: "Audio is ready to play or download.", |
| progress: 100, |
| step: 5, |
| totalSteps: 5, |
| steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), |
| }); |
| } catch (error) { |
| if (speechTextForFallback && canUseBrowserSpeech()) { |
| const fallbackMessage = formatDiagnosticMessage( |
| workerUnavailableDiagnostic, |
| `${error.message} Browser read-aloud is available now, but downloadable Vercel audio needs WORKER_BASE_URL.`, |
| ); |
| showBrowserSpeechFallback( |
| speechTextForFallback, |
| file.name, |
| fallbackMessage, |
| ); |
| setError(uploadError, fallbackMessage); |
| return; |
| } |
| setError(uploadError, error.message); |
| setJobState("failed"); |
| jobMessage.textContent = "Processing failed."; |
| updateStage({ |
| id: "ocr", |
| label: "Stopped", |
| phase: "Processing stopped", |
| detail: "Processing stopped before audio could be created.", |
| progress: 0, |
| steps: defaultStageSteps.map((step, index) => ({ |
| ...step, |
| state: index === 2 ? "failed" : index < 2 ? "done" : "pending", |
| })), |
| }); |
| } finally { |
| createButton.disabled = false; |
| } |
| } |
|
|
| async function extractPdfText(file, pageLimit = 0) { |
| const pdfjs = await getPdfJs(); |
| const data = await file.arrayBuffer(); |
| const document = await pdfjs.getDocument({ data }).promise; |
| const pageTexts = []; |
| const pagesToProcess = effectivePageCount(document.numPages, pageLimit); |
| for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) { |
| const page = await document.getPage(pageNumber); |
| const content = await page.getTextContent(); |
| const pageText = cleanText(content.items.map((item) => item.str || "").join(" ")); |
| pageTexts.push(pageText); |
| pagesValue.textContent = pageNumber; |
| jobMessage.textContent = `Reading page ${pageNumber} of ${pagesToProcess}`; |
| updateStage({ |
| id: "text", |
| label: "Text scan", |
| phase: "Checking text layer", |
| detail: `Checking the PDF text layer: page ${pageNumber} of ${pagesToProcess}.`, |
| progress: rangeProgress(progressRanges.textStart, progressRanges.textEnd, pageNumber, pagesToProcess), |
| step: 2, |
| totalSteps: 5, |
| itemProgress: stageItem("PDF page", pageNumber, pagesToProcess), |
| steps: stepStates("text"), |
| }); |
| } |
| const embeddedText = cleanText(pageTexts.filter(Boolean).join("\n\n")); |
| if (embeddedText.length >= 20 && !shouldOcrMixedPdf(pageTexts)) { |
| return { pages: pagesToProcess, totalPages: document.numPages, text: embeddedText, extraction: "browser" }; |
| } |
| if (embeddedText.length >= 20) { |
| const ratio = embeddedTextMissingPageRatio(pageTexts); |
| jobMessage.textContent = `Embedded text is incomplete on ${Math.round(ratio * 100)}% of pages. Starting Arabic OCR.`; |
| } else { |
| jobMessage.textContent = "No text layer found. Starting Arabic OCR."; |
| } |
| const ocrText = await ocrPdfText(document, pagesToProcess); |
| return { pages: pagesToProcess, totalPages: document.numPages, text: cleanText(ocrText), extraction: "ocr" }; |
| } |
|
|
| function embeddedTextMissingPageRatio(pageTexts) { |
| if (!pageTexts.length) return 1; |
| const missingPages = pageTexts.filter((text) => text.length < 20).length; |
| return missingPages / pageTexts.length; |
| } |
|
|
| function shouldOcrMixedPdf(pageTexts) { |
| if (!pageTexts.length) return true; |
| if (!pageTexts.some((text) => text.length >= 20)) return true; |
| return embeddedTextMissingPageRatio(pageTexts) > mixedPdfOcrMissingPageRatio; |
| } |
|
|
| async function ocrPdfText(document, pagesToProcess) { |
| const worker = await getOcrWorker(); |
| const pieces = []; |
| for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) { |
| browserOcrProgress = { page: pageNumber, total: pagesToProcess }; |
| jobMessage.textContent = `OCR reading page ${pageNumber} of ${pagesToProcess}`; |
| updateStage({ |
| id: "ocr", |
| label: "Arabic OCR", |
| phase: "Rendering scanned pages", |
| detail: `Preparing scanned page ${pageNumber} of ${pagesToProcess}.`, |
| progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber - 1, pagesToProcess), |
| step: 3, |
| totalSteps: 5, |
| itemProgress: stageItem("Scanned page", pageNumber - 1, pagesToProcess), |
| steps: stepStates("ocr"), |
| }); |
| const imageUrl = await renderPageForOcr(document, pageNumber); |
| const result = await withTesseractNoiseFilter(() => worker.recognize(imageUrl)); |
| URL.revokeObjectURL(imageUrl); |
| if (result.data.text.trim()) pieces.push(result.data.text); |
| updateStage({ |
| id: "ocr", |
| label: "Arabic OCR", |
| phase: "Reading scanned pages", |
| detail: `Finished scanned page ${pageNumber} of ${pagesToProcess}.`, |
| progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber, pagesToProcess), |
| step: 3, |
| totalSteps: 5, |
| itemProgress: stageItem("Scanned page", pageNumber, pagesToProcess), |
| steps: stepStates("ocr"), |
| }); |
| } |
| return pieces.join("\n\n"); |
| } |
|
|
| function effectivePageCount(totalPages, pageLimit) { |
| return pageLimit > 0 ? Math.max(0, Math.min(totalPages, pageLimit)) : totalPages; |
| } |
|
|
| function getSelectedPageLimit() { |
| return Number.parseInt(pageLimitSelect.value, 10) || 0; |
| } |
|
|
| async function getOcrWorker() { |
| if (ocrWorker) return ocrWorker; |
| jobMessage.textContent = "Loading Arabic OCR. This can take a minute the first time."; |
| updateStage({ |
| id: "ocr", |
| label: "Arabic OCR", |
| phase: "Loading Arabic OCR", |
| detail: "Loading Arabic OCR files. This can take a minute the first time.", |
| progress: progressRanges.ocrStart, |
| step: 3, |
| totalSteps: 5, |
| steps: stepStates("ocr"), |
| }); |
| const createWorker = await getTesseractCreateWorker(); |
| ocrWorker = await createWorker("ara", 1, { |
| langPath: "https://tessdata.projectnaptha.com/4.0.0", |
| workerPath: "https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js", |
| corePath: "https://cdn.jsdelivr.net/npm/tesseract.js-core@5", |
| logger: (event) => { |
| if (event.status === "recognizing text" && event.progress) { |
| const percent = Math.round(event.progress * 100); |
| const page = browserOcrProgress.page || 1; |
| const total = browserOcrProgress.total || 1; |
| jobMessage.textContent = `OCR reading page ${page} of ${total}: ${percent}%`; |
| updateStage({ |
| id: "ocr", |
| label: "Arabic OCR", |
| phase: "Reading scanned pages", |
| detail: `Reading scanned page ${page} of ${total}: ${percent}%.`, |
| progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, page - 1 + event.progress, total), |
| step: 3, |
| totalSteps: 5, |
| itemProgress: stageItem("Scanned page", page - 1 + event.progress, total), |
| steps: stepStates("ocr"), |
| }); |
| } |
| }, |
| }); |
| if (typeof ocrWorker.setParameters === "function") { |
| await ocrWorker.setParameters({ |
| tessedit_pageseg_mode: "6", |
| preserve_interword_spaces: "1", |
| }); |
| } |
| return ocrWorker; |
| } |
|
|
| async function withTesseractNoiseFilter(callback) { |
| const originalWarn = console.warn; |
| const originalError = console.error; |
| const isKnownNoise = (message) => |
| message.includes("Image too small to scale") || |
| message.includes("Line cannot be recognized"); |
| const filter = (original) => (...args) => { |
| const message = args.map((arg) => String(arg)).join(" "); |
| if (isKnownNoise(message)) return; |
| original(...args); |
| }; |
| console.warn = filter(originalWarn); |
| console.error = filter(originalError); |
| try { |
| return await callback(); |
| } finally { |
| console.warn = originalWarn; |
| console.error = originalError; |
| } |
| } |
|
|
| async function getPdfJs() { |
| if (pdfjsLib) return pdfjsLib; |
| pdfjsLib = await import("https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.min.mjs"); |
| pdfjsLib.GlobalWorkerOptions.workerSrc = |
| "https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.worker.min.mjs"; |
| return pdfjsLib; |
| } |
|
|
| async function getTesseractCreateWorker() { |
| if (tesseractCreateWorker) return tesseractCreateWorker; |
| const tesseract = await import( |
| "https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.esm.min.js" |
| ); |
| tesseractCreateWorker = tesseract.createWorker || tesseract.default?.createWorker; |
| if (!tesseractCreateWorker) { |
| throw new Error("Arabic OCR could not load. Try the local version for scanned PDFs."); |
| } |
| return tesseractCreateWorker; |
| } |
|
|
| async function renderPageForOcr(document, pageNumber) { |
| const page = await document.getPage(pageNumber); |
| const viewport = page.getViewport({ scale: 2 }); |
| const canvas = documentCanvas(viewport.width, viewport.height); |
| const context = canvas.getContext("2d"); |
| await page.render({ canvasContext: context, viewport }).promise; |
| return await canvasToObjectUrl(canvas); |
| } |
|
|
| function documentCanvas(width, height) { |
| const canvas = document.createElement("canvas"); |
| canvas.width = Math.ceil(width); |
| canvas.height = Math.ceil(height); |
| return canvas; |
| } |
|
|
| function canvasToObjectUrl(canvas) { |
| return new Promise((resolve, reject) => { |
| canvas.toBlob((blob) => { |
| if (!blob) { |
| reject(new Error("Could not render PDF page for OCR.")); |
| return; |
| } |
| resolve(URL.createObjectURL(blob)); |
| }, "image/png"); |
| }); |
| } |
|
|
| function cleanText(text) { |
| return text |
| .normalize("NFKC") |
| .replace(/[\u200e\u200f]/g, " ") |
| .replace(/[\t\r\f\v]+/g, " ") |
| .replace(/\n{3,}/g, "\n\n") |
| .replace(/[ ]{2,}/g, " ") |
| .trim(); |
| } |
|
|
| function prepareTextForSpeech(text) { |
| const cleaned = cleanText(text); |
| const rawLines = cleaned.split(/\n/).map((line) => line.trim()); |
| const counts = new Map(); |
| for (const line of rawLines) { |
| if (!line) continue; |
| counts.set(line, (counts.get(line) || 0) + 1); |
| } |
| const repeatedLines = new Set( |
| [...counts.entries()].filter(([line, count]) => count >= 3 && line.length <= 48).map(([line]) => line), |
| ); |
| const output = []; |
| let previousLine = ""; |
| let blankPending = false; |
| for (const line of rawLines) { |
| if (!line) { |
| blankPending = output.length > 0; |
| continue; |
| } |
| if (line === previousLine) continue; |
| previousLine = line; |
| if (shouldDropSpeechLine(line, repeatedLines)) continue; |
| if (blankPending && output.length && output[output.length - 1] !== "") output.push(""); |
| output.push(line); |
| blankPending = false; |
| } |
| return output.join("\n").replace(/\n{3,}/g, "\n\n").trim(); |
| } |
|
|
| function shouldDropSpeechLine(line, repeatedLines) { |
| const compact = line.trim(); |
| if (!compact) return true; |
| if (/^[\s\-–—_.:|/\\()[\]{}]*(?:[0-9٠-٩۰-۹]+|[ivxlcdmIVXLCDM]+)[\s\-–—_.:|/\\()[\]{}]*$/.test(compact)) { |
| return true; |
| } |
| const metrics = lineNoiseMetrics(compact); |
| if (repeatedLines.has(compact)) return true; |
| if ( |
| !metrics.arabicWords && |
| compact.length <= 80 && |
| (compact.length <= 24 || metrics.digits >= 3 || metrics.symbols >= 2 || metrics.latinWords) |
| ) { |
| return true; |
| } |
| if (compact.length <= 2 && !metrics.arabicWords) return true; |
| if (metrics.digits >= 4 && metrics.arabicWords <= 3) return true; |
| if (metrics.digits >= 6 && metrics.digits > metrics.arabicChars) return true; |
| if (metrics.symbols >= 3 && metrics.arabicWords <= 4) return true; |
| if (metrics.placeholders >= 2 && metrics.arabicWords <= 4) return true; |
| return false; |
| } |
|
|
| function lineNoiseMetrics(line) { |
| const arabicWords = line.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || []; |
| const digits = line.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || []; |
| const symbols = line.match(/[!@#$%^&*_+=<>|~`]/g) || []; |
| const placeholders = line.match(/[?\ufffd]/g) || []; |
| const latinWords = line.match(/[A-Za-z]{3,}/g) || []; |
| return { |
| arabicWords: arabicWords.length, |
| arabicChars: arabicWords.reduce((total, word) => total + word.length, 0), |
| digits: digits.length, |
| symbols: symbols.length, |
| placeholders: placeholders.length, |
| latinWords: latinWords.length, |
| }; |
| } |
|
|
| function scoreTextQuality(text) { |
| const speechText = prepareTextForSpeech(text); |
| const arabicWords = speechText.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || []; |
| const placeholderCount = (speechText.match(/[?\ufffd]/g) || []).length; |
| const latinWords = speechText.match(/[A-Za-z]{3,}/g) || []; |
| const digitNoise = (speechText.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || []).length; |
| const lines = speechText.split(/\n/).map((line) => line.trim()).filter(Boolean); |
| const metrics = lines.map((line) => lineNoiseMetrics(line)); |
| const fragmentLines = metrics.filter((item, index) => ( |
| item.arabicWords <= 2 && item.arabicChars <= 18 && lines[index].length <= 28 |
| )).length; |
| const singleArabicWords = arabicWords.filter((word) => word.length === 1).length; |
| const singleArabicWordRatio = singleArabicWords / Math.max(arabicWords.length, 1); |
| const fragmentLineRatio = fragmentLines / Math.max(lines.length, 1); |
| const shortLines = lines.filter((line) => line.length <= 3).length; |
| const repeatedLines = lines.length - new Set(lines).size; |
| const commonWords = new Set(["في", "من", "على", "هذا", "هذه", "التي", "الذي", "كان", "إلى", "الى", "عن", "مع", "هو", "هي"]); |
| const commonHits = arabicWords.filter((word) => commonWords.has(word)).length; |
| const score = |
| speechText.length * 0.05 + |
| arabicWords.length * 3 + |
| commonHits * 18 - |
| placeholderCount * 25 - |
| shortLines * 8 - |
| repeatedLines * 6 - |
| latinWords.length * 4 - |
| digitNoise * 3 - |
| singleArabicWords * 6 - |
| fragmentLines * 14; |
| return { |
| score, |
| speechText, |
| arabicWords, |
| placeholderCount, |
| latinWords, |
| digitNoise, |
| singleArabicWords, |
| singleArabicWordRatio, |
| fragmentLines, |
| fragmentLineRatio, |
| }; |
| } |
|
|
| function assessTextQuality(text, speechText = prepareTextForSpeech(text)) { |
| const scored = scoreTextQuality(speechText); |
| const placeholderRatio = scored.placeholderCount / Math.max(speechText.length, 1); |
| const reasons = []; |
| if (speechText.length < 20) reasons.push("too little readable text after cleanup"); |
| if (scored.arabicWords.length < 5) reasons.push("too few Arabic words"); |
| if (placeholderRatio >= 0.2) reasons.push("too many unreadable placeholder characters"); |
| else if (placeholderRatio > 0) reasons.push("some unreadable placeholder characters remain"); |
| if (scored.digitNoise >= Math.max(20, scored.arabicWords.length)) reasons.push("digit-heavy OCR noise remains"); |
| if (scored.singleArabicWordRatio >= 0.1 && scored.arabicWords.length >= 25) { |
| reasons.push("many one-letter Arabic OCR fragments remain"); |
| } |
| if (scored.fragmentLineRatio >= 0.25 && speechText.split(/\n/).length >= 8) { |
| reasons.push("many low-information OCR lines remain"); |
| } |
| if (scored.latinWords.length >= 3 && scored.latinWords.length >= scored.arabicWords.length) { |
| reasons.push("non-Arabic OCR text dominates"); |
| } |
|
|
| const blockingReasons = new Set([ |
| "too little readable text after cleanup", |
| "too few Arabic words", |
| "too many unreadable placeholder characters", |
| "non-Arabic OCR text dominates", |
| ]); |
| const quality = reasons.some((reason) => blockingReasons.has(reason)) |
| ? "poor" |
| : reasons.length |
| ? "warning" |
| : "good"; |
| return { |
| quality, |
| readyForTts: quality !== "poor", |
| reasons, |
| score: Math.round(scored.score * 100) / 100, |
| singleArabicWordRatio: Math.round(scored.singleArabicWordRatio * 10000) / 10000, |
| fragmentLineRatio: Math.round(scored.fragmentLineRatio * 10000) / 10000, |
| }; |
| } |
|
|
| function formatQuality(quality) { |
| if (!quality?.quality) return "-"; |
| const label = qualityLabels[quality.quality] || quality.quality; |
| return `${label} ${quality.score ? Math.round(quality.score) : ""}`.trim(); |
| } |
|
|
| function splitLongTextAtWordBoundaries(text, size) { |
| const pieces = []; |
| let remaining = text.trim(); |
| while (remaining.length > size) { |
| let splitAt = remaining.lastIndexOf(" ", size); |
| if (splitAt < Math.max(1, Math.floor(size * 0.45))) splitAt = size; |
| const piece = remaining.slice(0, splitAt).trim(); |
| if (piece) pieces.push(piece); |
| remaining = remaining.slice(splitAt).trim(); |
| } |
| if (remaining) pieces.push(remaining); |
| return pieces; |
| } |
|
|
| function chunkText(text, size) { |
| text = prepareTextForSpeech(text); |
| const paragraphs = text.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean); |
| const chunks = []; |
| let current = ""; |
| for (const paragraph of paragraphs) { |
| if ((current.length + paragraph.length + 2) <= size) { |
| current = `${current}\n\n${paragraph}`.trim(); |
| continue; |
| } |
| if (current) chunks.push(current); |
| if (paragraph.length <= size) { |
| current = paragraph; |
| continue; |
| } |
| const sentences = paragraph.split(/(?<=[.!\u061f?\u060c\u061b])\s+/); |
| current = ""; |
| for (const sentence of sentences) { |
| let remaining = sentence; |
| if ((current.length + remaining.length + 1) <= size) { |
| current = `${current} ${remaining}`.trim(); |
| } else { |
| if (current) chunks.push(current); |
| const sentenceParts = splitLongTextAtWordBoundaries(remaining, size); |
| chunks.push(...sentenceParts.slice(0, -1)); |
| current = sentenceParts[sentenceParts.length - 1] || ""; |
| } |
| } |
| } |
| if (current) chunks.push(current); |
| return chunks; |
| } |
|
|
| function mergeWavBuffers(buffers) { |
| if (buffers.length === 1) return new Blob([buffers[0]], { type: "audio/wav" }); |
| const wavs = buffers.map(parseWav); |
| const fmt = wavs[0].fmt; |
| const dataLength = wavs.reduce((total, wav) => total + wav.data.length, 0); |
| const output = new ArrayBuffer(12 + 8 + fmt.length + 8 + dataLength); |
| const view = new DataView(output); |
| const bytes = new Uint8Array(output); |
| let offset = 0; |
|
|
| offset = writeString(bytes, offset, "RIFF"); |
| view.setUint32(offset, output.byteLength - 8, true); |
| offset += 4; |
| offset = writeString(bytes, offset, "WAVE"); |
| offset = writeString(bytes, offset, "fmt "); |
| view.setUint32(offset, fmt.length, true); |
| offset += 4; |
| bytes.set(fmt, offset); |
| offset += fmt.length; |
| offset = writeString(bytes, offset, "data"); |
| view.setUint32(offset, dataLength, true); |
| offset += 4; |
| for (const wav of wavs) { |
| bytes.set(wav.data, offset); |
| offset += wav.data.length; |
| } |
| return new Blob([output], { type: "audio/wav" }); |
| } |
|
|
| function parseWav(buffer) { |
| const view = new DataView(buffer); |
| const bytes = new Uint8Array(buffer); |
| if (readString(bytes, 0, 4) !== "RIFF" || readString(bytes, 8, 4) !== "WAVE") { |
| throw new Error("Cloud voice returned an audio format that cannot be merged."); |
| } |
| let offset = 12; |
| let fmt = null; |
| let data = null; |
| while (offset + 8 <= bytes.length) { |
| const id = readString(bytes, offset, 4); |
| const length = view.getUint32(offset + 4, true); |
| const start = offset + 8; |
| const end = start + length; |
| if (id === "fmt ") fmt = bytes.slice(start, end); |
| if (id === "data") data = bytes.slice(start, end); |
| offset = end + (length % 2); |
| } |
| if (!fmt || !data) throw new Error("Cloud voice returned invalid WAV audio."); |
| return { fmt, data }; |
| } |
|
|
| function readString(bytes, offset, length) { |
| return String.fromCharCode(...bytes.slice(offset, offset + length)); |
| } |
|
|
| function writeString(bytes, offset, value) { |
| for (let index = 0; index < value.length; index += 1) { |
| bytes[offset + index] = value.charCodeAt(index); |
| } |
| return offset + value.length; |
| } |
|
|
| function showAudio(blob, sourceName) { |
| resetBrowserSpeechFallback(); |
| if (currentAudioUrl) URL.revokeObjectURL(currentAudioUrl); |
| currentAudioUrl = URL.createObjectURL(blob); |
| audioPlayer.src = currentAudioUrl; |
| downloadLink.href = currentAudioUrl; |
| downloadLink.download = `${sourceName.replace(/\.pdf$/i, "") || "arabic-pdf"}.wav`; |
| audioPanel.classList.remove("hidden"); |
| } |
|
|
| function canUseBrowserSpeech() { |
| return "speechSynthesis" in window && typeof window.SpeechSynthesisUtterance === "function"; |
| } |
|
|
| function resetBrowserSpeechFallback() { |
| if (canUseBrowserSpeech()) window.speechSynthesis.cancel(); |
| browserSpeechText = ""; |
| browserSpeechSourceName = ""; |
| browserSpeechPanel.classList.add("hidden"); |
| browserSpeechMessage.textContent = "Browser read-aloud is ready."; |
| } |
|
|
| function showBrowserSpeechFallback(text, sourceName, message) { |
| browserSpeechText = text; |
| browserSpeechSourceName = sourceName; |
| audioPanel.classList.add("hidden"); |
| audioPlayer.removeAttribute("src"); |
| downloadLink.removeAttribute("href"); |
| downloadLink.removeAttribute("download"); |
| audioFormatValue.textContent = "Browser only"; |
| engineValue.textContent = "browser voice"; |
| setJobState("speaking", "Listen only"); |
| jobMessage.textContent = "Browser read-aloud is ready."; |
| browserSpeechMessage.textContent = message; |
| browserSpeechPanel.classList.remove("hidden"); |
| updateStage({ |
| id: "ready", |
| label: "Listen", |
| phase: "Browser read-aloud ready", |
| detail: "Downloadable audio needs the Vercel worker. Browser read-aloud is ready for listening.", |
| progress: 100, |
| step: 5, |
| totalSteps: 5, |
| steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), |
| }); |
| } |
|
|
| function getBrowserArabicVoice() { |
| if (!canUseBrowserSpeech()) return null; |
| const voices = window.speechSynthesis.getVoices(); |
| return ( |
| voices.find((voice) => voice.lang?.toLowerCase().startsWith("ar")) || |
| voices.find((voice) => /arabic|ar[-_]/i.test(`${voice.name} ${voice.lang}`)) || |
| null |
| ); |
| } |
|
|
| function speakBrowserSpeechText() { |
| if (!browserSpeechText || !canUseBrowserSpeech()) return; |
| const chunks = chunkText(browserSpeechText, 700); |
| if (!chunks.length) return; |
| const synth = window.speechSynthesis; |
| const voice = getBrowserArabicVoice(); |
| const rate = Number(speedSelect.value) || 1; |
| let index = 0; |
| synth.cancel(); |
|
|
| const speakNext = () => { |
| if (index >= chunks.length) { |
| jobMessage.textContent = "Browser read-aloud finished."; |
| return; |
| } |
| const utterance = new window.SpeechSynthesisUtterance(chunks[index]); |
| utterance.lang = voice?.lang || "ar"; |
| utterance.voice = voice; |
| utterance.rate = Math.max(0.6, Math.min(1.35, rate)); |
| utterance.onend = () => { |
| index += 1; |
| const nextPart = Math.min(index + 1, chunks.length); |
| updateStage({ |
| id: "voice", |
| label: "Listening", |
| phase: "Browser read-aloud", |
| detail: index >= chunks.length ? "Browser read-aloud finished." : `Reading part ${nextPart} of ${chunks.length}.`, |
| progress: rangeProgress(progressRanges.voiceStart, 100, index, chunks.length), |
| step: 5, |
| totalSteps: 5, |
| itemProgress: stageItem("Read-aloud part", index, chunks.length), |
| steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), |
| }); |
| speakNext(); |
| }; |
| utterance.onerror = () => { |
| jobMessage.textContent = "Browser read-aloud stopped."; |
| }; |
| jobMessage.textContent = `Reading ${browserSpeechSourceName || "PDF"} in the browser.`; |
| synth.speak(utterance); |
| }; |
|
|
| updateStage({ |
| id: "voice", |
| label: "Listening", |
| phase: "Browser read-aloud", |
| detail: `Reading part 1 of ${chunks.length}.`, |
| progress: progressRanges.voiceStart, |
| step: 5, |
| totalSteps: 5, |
| itemProgress: stageItem("Read-aloud part", 0, chunks.length), |
| steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), |
| }); |
| speakNext(); |
| } |
|
|
| function describeFile(file) { |
| fileName.textContent = file ? `${file.name} (${Math.ceil(file.size / 1024 / 1024)} MB)` : "No file selected"; |
| } |
|
|
| function describeOcrMode() { |
| const installedText = installedArabicOcrModels.length |
| ? ` Installed now: ${installedArabicOcrModels.join(", ")}.` |
| : " Install QARI, Tawkeed, or KATIB on the worker for the best Arabic-trained OCR."; |
| if (ocrModeSelect.value === "arabic-max") { |
| engineNotice.textContent = `Maximum Arabic OCR selected. It tries the most engines and keeps the cleanest text, but it is slower. Use Quick test first.${installedText}`; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "arabic") { |
| engineNotice.textContent = `Arabic OCR comparison selected. It compares installed OCR paths and can be much slower than the recommended Tesseract setting.${installedText}`; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "qari-ocr") { |
| engineNotice.textContent = "QARI Arabic books selected. Use this on a short sample or strong worker; it is trained for Arabic books, Islamic texts, manuscripts, and layout-aware Arabic transcription."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "tawkeed-ocr") { |
| engineNotice.textContent = "Tawkeed Arabic OCR selected. Use this on a short sample or worker; it is Arabic-first and trained for documents, handwriting, and scene text."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "katib-ocr") { |
| engineNotice.textContent = "KATIB Arabic OCR selected. Use this on a short sample; it is a smaller Arabic-trained model for printed and handwritten Arabic."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "arabic-qwen-ocr") { |
| engineNotice.textContent = "Arabic-Qwen OCR selected. Use this on a short sample or worker; it is a 0.9B Arabic-trained OCR model."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "arabic-glm-ocr") { |
| engineNotice.textContent = "Arabic-GLM OCR selected. Use this on a short sample or strong worker; it is a recent Arabic-trained OCR model for books and scanned documents."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "baseer-ocr") { |
| engineNotice.textContent = "Baseer Arabic OCR selected. Use this on a short sample or worker for complex Arabic document layouts."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "best") { |
| engineNotice.textContent = "Best scan test selected. Use this on a short sample, then run the winning engine for the full book."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "paddleocr") { |
| engineNotice.textContent = "Rank 3 PaddleOCR Arabic selected. It works, but the 5-page benchmark produced more fragmented text than Tesseract."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "tesseract") { |
| engineNotice.textContent = "Rank 1 Tesseract Arabic selected. This produced the best readable text in the 5-page OCR benchmark."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "tesseract-fast") { |
| engineNotice.textContent = "Rank 2 Tesseract Arabic selected. This was the faster readable runner-up in the 5-page OCR benchmark."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "paddleocr-vl") { |
| engineNotice.textContent = "PaddleOCR-VL selected. Use this only on a short sample or strong worker; it is much heavier than normal Arabic OCR."; |
| engineNotice.classList.remove("warning"); |
| } else if (ocrModeSelect.value === "surya") { |
| engineNotice.textContent = "Surya heavy OCR selected. Use this only on a worker or powerful computer for difficult scanned PDFs."; |
| engineNotice.classList.remove("warning"); |
| } |
| } |
|
|
| function renderVoiceOptions() { |
| const voices = cloudMode && !remoteWorkerMode ? voiceCatalog.cloud : voiceCatalog.local; |
| const fallback = cloudMode && !remoteWorkerMode |
| ? [{ id: "mms-ara", label: "Arabic Standard" }] |
| : [{ id: "espeak-ar", label: "Local Arabic" }]; |
| const options = voices.length ? voices : fallback; |
| const current = voiceSelect.value || voiceCatalog.default; |
| voiceSelect.innerHTML = ""; |
| for (const voice of options) { |
| const option = document.createElement("option"); |
| option.value = voice.id; |
| option.textContent = voice.label; |
| voiceSelect.append(option); |
| } |
| if (options.some((voice) => voice.id === current)) { |
| voiceSelect.value = current; |
| } else if (options.some((voice) => voice.id === voiceCatalog.default)) { |
| voiceSelect.value = voiceCatalog.default; |
| } |
| } |
|
|
| function resetJob(name, message = "Uploading PDF.") { |
| jobTitle.textContent = name; |
| setJobState("queued", cloudMode && !remoteWorkerMode ? "Reading" : "Uploading"); |
| jobMessage.textContent = message; |
| updateStage({ |
| id: "upload", |
| label: cloudMode && !remoteWorkerMode ? "Text scan" : "Upload", |
| phase: cloudMode && !remoteWorkerMode ? "Checking text layer" : "Uploading PDF", |
| detail: message, |
| progress: 2, |
| steps: defaultStageSteps.map((step, index) => ({ |
| ...step, |
| state: index === 0 ? "active" : "pending", |
| })), |
| }); |
| showQualityHint(null); |
| pagesValue.textContent = "0"; |
| charactersValue.textContent = "0"; |
| engineValue.textContent = "-"; |
| extractionValue.textContent = "-"; |
| qualityValue.textContent = "-"; |
| audioFormatValue.textContent = "-"; |
| audioPanel.classList.add("hidden"); |
| audioPlayer.removeAttribute("src"); |
| downloadLink.removeAttribute("href"); |
| downloadLink.removeAttribute("download"); |
| resetBrowserSpeechFallback(); |
| updateProgress(2); |
| } |
|
|
| function updateProgress(value) { |
| const safeValue = Math.max(0, Math.min(100, Math.round(Number(value) || 0))); |
| progressBar.style.width = `${safeValue}%`; |
| progressMeter?.setAttribute("aria-valuenow", String(safeValue)); |
| progressPercent.textContent = `${safeValue}%`; |
| } |
|
|
| function stepStates(activeId) { |
| const activeIndex = defaultStageSteps.findIndex((step) => step.id === activeId); |
| return defaultStageSteps.map((step, index) => ({ |
| ...step, |
| state: activeIndex < 0 ? "pending" : index < activeIndex ? "done" : index === activeIndex ? "active" : "pending", |
| })); |
| } |
|
|
| function stageItem(label, current, total) { |
| const safeTotal = Math.max(1, Number(total) || 1); |
| const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0)); |
| return { |
| label, |
| current: safeCurrent, |
| total: safeTotal, |
| percent: Math.round((safeCurrent / safeTotal) * 100), |
| }; |
| } |
|
|
| function formatProgressCount(value) { |
| const number = Number(value) || 0; |
| if (Number.isInteger(number)) return String(number); |
| return number.toFixed(1); |
| } |
|
|
| function updateStage(stage) { |
| const progress = stage?.progress ?? 0; |
| updateProgress(progress); |
| const label = stage?.label || "Working"; |
| const phase = stage?.phase || "Current step"; |
| const stepText = stage?.step && stage?.totalSteps ? `Step ${stage.step} of ${stage.totalSteps}` : "Current step"; |
| const overallLabel = stage?.overallLabel || "Overall progress"; |
| const labelWithStep = stage?.label |
| ? `${label}${stage.step && stage.totalSteps ? ` (${stage.step} of ${stage.totalSteps})` : ""}` |
| : "Working"; |
| stageLabel.textContent = labelWithStep; |
| stagePhaseLabel.textContent = `${stepText} - ${overallLabel} ${Math.round(Number(progress) || 0)}%`; |
| stageDetailTitle.textContent = phase === "Current step" ? labelWithStep : phase; |
| stageDetailText.textContent = stage?.detail || jobMessage.textContent || "Working on the PDF."; |
| const itemProgress = stage?.itemProgress; |
| if (itemProgress && itemProgress.total) { |
| const percent = Math.max(0, Math.min(100, Math.round(Number(itemProgress.percent) || 0))); |
| stageItemProgress.classList.remove("hidden"); |
| stageItemLabel.textContent = itemProgress.label || "Progress"; |
| stageItemValue.textContent = |
| `${formatProgressCount(itemProgress.current)} of ${formatProgressCount(itemProgress.total)} (${percent}%)`; |
| stageItemBar.style.width = `${percent}%`; |
| } else { |
| stageItemProgress.classList.add("hidden"); |
| stageItemLabel.textContent = "Progress"; |
| stageItemValue.textContent = "0 of 0"; |
| stageItemBar.style.width = "0%"; |
| } |
| const isServerJob = !cloudMode || remoteWorkerMode; |
| const backgroundText = isServerJob |
| ? "The server keeps working even if you are not watching this screen." |
| : "Keep this tab open while browser OCR is reading scanned pages."; |
| stageMetaText.textContent = `Updated just now. ${backgroundText}`; |
| const steps = stage?.steps?.length ? stage.steps : defaultStageSteps; |
| stageList.innerHTML = ""; |
| for (const step of steps) { |
| const item = document.createElement("li"); |
| item.className = step.state || "pending"; |
| const dot = document.createElement("span"); |
| dot.className = "stage-dot"; |
| dot.setAttribute("aria-hidden", "true"); |
| const text = document.createElement("span"); |
| text.textContent = step.label; |
| item.append(dot, text); |
| stageList.append(item); |
| } |
| } |
|
|
| function setJobState(status, label = statusLabels[status] || status) { |
| jobState.textContent = label; |
| jobState.classList.toggle("complete", status === "complete"); |
| jobState.classList.toggle("failed", status === "failed"); |
| } |
|
|
| function startPolling(jobId) { |
| activeJobId = jobId; |
| clearInterval(pollTimer); |
| pollTimer = setInterval(() => pollJob(jobId), 750); |
| pollJob(jobId); |
| } |
|
|
| async function pollJob(jobId) { |
| try { |
| const job = await apiFetch(`/api/jobs/${jobId}`).then(readJson); |
| jobTitle.textContent = job.filename || "Arabic PDF"; |
| setJobState(job.status); |
| jobMessage.textContent = job.error || job.message; |
| showQualityHint({ |
| quality: job.textQuality, |
| reasons: job.qualityReasons || [], |
| readyForTts: job.textQuality && job.textQuality !== "poor", |
| }); |
| pagesValue.textContent = formatPages(job); |
| charactersValue.textContent = (job.characters || 0).toLocaleString(); |
| engineValue.textContent = job.engine || "-"; |
| extractionValue.textContent = job.extraction || "-"; |
| qualityValue.textContent = job.textQuality |
| ? `${qualityLabels[job.textQuality] || job.textQuality} ${job.qualityScore ? Math.round(job.qualityScore) : ""}`.trim() |
| : "-"; |
| audioFormatValue.textContent = job.audioFormat |
| ? `${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}` |
| : "-"; |
| updateStage(job.stage || { progress: job.progress || 0, label: statusLabels[job.status] || job.status }); |
| if (job.status === "complete") { |
| clearInterval(pollTimer); |
| pollTimer = null; |
| activeJobId = null; |
| createButton.disabled = false; |
| audioPlayer.src = apiUrl(job.audioUrl); |
| downloadLink.href = apiUrl(job.downloadUrl); |
| audioPanel.classList.remove("hidden"); |
| loadHistory(); |
| } |
| if (job.status === "failed") { |
| clearInterval(pollTimer); |
| pollTimer = null; |
| activeJobId = null; |
| createButton.disabled = false; |
| loadHistory(); |
| } |
| } catch (error) { |
| clearInterval(pollTimer); |
| pollTimer = null; |
| activeJobId = null; |
| createButton.disabled = false; |
| setError(uploadError, error.message); |
| } |
| } |
|
|
| function formatBytes(value) { |
| if (!value) return ""; |
| const units = ["B", "KB", "MB", "GB"]; |
| let size = Number(value); |
| let unit = 0; |
| while (size >= 1024 && unit < units.length - 1) { |
| size /= 1024; |
| unit += 1; |
| } |
| return `${size >= 10 || unit === 0 ? size.toFixed(0) : size.toFixed(1)} ${units[unit]}`; |
| } |
|
|
| function formatPages(job) { |
| const pages = job.pages || 0; |
| const total = job.totalPages || pages; |
| if (total && total > pages) return `${pages} / ${total}`; |
| return `${pages}`; |
| } |
|
|
| function showQualityHint(quality) { |
| if (!quality || !quality.quality || quality.quality === "good") { |
| qualityHint.textContent = ""; |
| qualityHint.classList.add("hidden"); |
| qualityHint.classList.remove("poor"); |
| return; |
| } |
| const reasons = quality.reasons?.length ? ` ${quality.reasons.join("; ")}.` : ""; |
| const action = quality.quality === "poor" |
| ? "Try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode before creating audio." |
| : "Listen to a short sample before running the full book. If it sounds wrong, try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode."; |
| qualityHint.textContent = `Text needs checking.${reasons} ${action}`; |
| qualityHint.classList.remove("hidden"); |
| qualityHint.classList.toggle("poor", quality.quality === "poor"); |
| } |
|
|
| renderVoiceOptions(); |
| checkSession(); |
|
|