const loginPanel = document.querySelector("#loginPanel"); const workspace = document.querySelector("#workspace"); const loginForm = document.querySelector("#loginForm"); const codeInput = document.querySelector("#codeInput"); const loginError = document.querySelector("#loginError"); const logoutButton = document.querySelector("#logoutButton"); const uploadForm = document.querySelector("#uploadForm"); const pdfInput = document.querySelector("#pdfInput"); const fileName = document.querySelector("#fileName"); const createButton = document.querySelector("#createButton"); const voiceSelect = document.querySelector("#voiceSelect"); const speedSelect = document.querySelector("#speedSelect"); const ocrModeSelect = document.querySelector("#ocrModeSelect"); const pageLimitSelect = document.querySelector("#pageLimitSelect"); const uploadError = document.querySelector("#uploadError"); const engineNotice = document.querySelector("#engineNotice"); const jobTitle = document.querySelector("#jobTitle"); const jobState = document.querySelector("#jobState"); const progressBar = document.querySelector("#progressBar"); const progressMeter = document.querySelector(".meter"); const progressPercent = document.querySelector("#progressPercent"); const stageLabel = document.querySelector("#stageLabel"); const stagePhaseLabel = document.querySelector("#stagePhaseLabel"); const stageDetailTitle = document.querySelector("#stageDetailTitle"); const stageDetailText = document.querySelector("#stageDetailText"); const stageItemProgress = document.querySelector("#stageItemProgress"); const stageItemLabel = document.querySelector("#stageItemLabel"); const stageItemValue = document.querySelector("#stageItemValue"); const stageItemBar = document.querySelector("#stageItemBar"); const stageMetaText = document.querySelector("#stageMetaText"); const stageList = document.querySelector("#stageList"); const jobMessage = document.querySelector("#jobMessage"); const qualityHint = document.querySelector("#qualityHint"); const pagesValue = document.querySelector("#pagesValue"); const charactersValue = document.querySelector("#charactersValue"); const engineValue = document.querySelector("#engineValue"); const extractionValue = document.querySelector("#extractionValue"); const qualityValue = document.querySelector("#qualityValue"); const audioFormatValue = document.querySelector("#audioFormatValue"); const audioPanel = document.querySelector("#audioPanel"); const audioPlayer = document.querySelector("#audioPlayer"); const downloadLink = document.querySelector("#downloadLink"); const browserSpeechPanel = document.querySelector("#browserSpeechPanel"); const browserSpeechMessage = document.querySelector("#browserSpeechMessage"); const browserSpeakButton = document.querySelector("#browserSpeakButton"); const browserStopButton = document.querySelector("#browserStopButton"); const historyPanel = document.querySelector("#historyPanel"); const historyList = document.querySelector("#historyList"); let pollTimer = null; let maxUploadMb = null; let cloudMode = false; let hostedShellMode = false; let remoteWorkerMode = false; let workerBaseUrl = ""; let cloudTtsReady = false; let cloudTtsMaxChars = 900; let workerUnavailableDiagnostic = null; let currentAudioUrl = null; const defaultVoiceCatalog = { default: "silma-local", cloud: [ { id: "mms-ara", label: "Arabic Standard" }, { id: "silma-tts", label: "SILMA Arabic" }, ], local: [ { id: "silma-local", label: "1. SILMA Arabic - Most natural" }, { id: "espeak-ar-clear", label: "2. Local Arabic Clear - Fast fallback" }, { id: "espeak-ar", label: "3. Local Arabic - Standard fallback" }, { id: "espeak-ar-male", label: "Local Arabic Low" }, ], }; let voiceCatalog = defaultVoiceCatalog; let ocrWorker = null; let pdfjsLib = null; let tesseractCreateWorker = null; let activeJobId = null; let pendingUnlockCode = ""; let sessionUnlockCode = ""; let installedArabicOcrModels = []; let browserOcrProgress = { page: 0, total: 0 }; let browserSpeechText = ""; let browserSpeechSourceName = ""; const ocrModeLabels = { tesseract: "1. Tesseract Arabic - Best readable", "tesseract-fast": "2. Tesseract Arabic - Faster readable", paddleocr: "3. PaddleOCR Arabic - Faster fallback", "arabic-max": "Maximum Arabic OCR - slower", arabic: "Arabic OCR comparison - slower", "qari-ocr": "QARI Arabic books (best)", "tawkeed-ocr": "Tawkeed Arabic OCR", "katib-ocr": "KATIB Arabic OCR (lighter)", "arabic-qwen-ocr": "Arabic-Qwen OCR", "arabic-glm-ocr": "Arabic-GLM OCR v2", "baseer-ocr": "Baseer Arabic OCR", "paddleocr-vl": "PaddleOCR-VL heavy", best: "Best scan test", surya: "Surya heavy OCR", easyocr: "General Arabic OCR", auto: "Auto fallback", }; const qualityLabels = { good: "Good", warning: "Check", poor: "Poor", }; const pageLimitLabels = { 0: "Full book", 5: "Quick test", 10: "Longer test", }; const speedLabels = { 0.9: "Slower", 1: "Normal", 1.15: "Faster", }; const mixedPdfOcrMissingPageRatio = 0.15; const statusLabels = { queued: "Queued", reading: "Reading", speaking: "Creating", complete: "Ready", failed: "Failed", }; const defaultStageSteps = [ { id: "upload", label: "Upload" }, { id: "text", label: "Text scan" }, { id: "ocr", label: "Arabic OCR" }, { id: "voice", label: "Voice" }, { id: "ready", label: "Ready" }, ]; const progressRanges = { textStart: 5, textEnd: 18, ocrStart: 18, ocrRenderEnd: 30, ocrEnd: 72, voiceStart: 72, voiceEnd: 98, }; function rangeProgress(start, end, current, total) { const safeTotal = Math.max(1, Number(total) || 1); const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0)); return Math.round(start + (safeCurrent / safeTotal) * (end - start)); } function setAuthenticated(authenticated) { loginPanel.classList.toggle("hidden", authenticated); workspace.classList.toggle("hidden", !authenticated); logoutButton.classList.toggle("hidden", !authenticated); if (authenticated) { uploadError.textContent = ""; loadHealth().then(loadHistory); } } function setError(target, message) { target.textContent = message || ""; } function formatDiagnosticMessage(diagnostic, fallback = "") { const message = diagnostic?.message || fallback || ""; const nextSteps = Array.isArray(diagnostic?.nextSteps) ? diagnostic.nextSteps.filter(Boolean).slice(0, 3) : []; if (!nextSteps.length) return message; return `${message} Next: ${nextSteps.join(" ")}`; } async function readJson(response) { const payload = await response.json().catch(() => ({})); if (!response.ok) { const error = new Error(payload.detail || "Request failed"); error.status = response.status; throw error; } return payload; } function describeNetworkError(error) { if (error?.name === "TypeError") { if (remoteWorkerMode && workerBaseUrl) { return ( "The OCR/TTS worker could not be reached from this browser. The Space may be waking up, or the browser blocked the cross-site upload. " + "Wait a minute, refresh, unlock with the code again, and retry." ); } return "The site could not be reached. Check the connection and try again."; } return error?.message || "Request failed"; } function apiUrl(path) { if (!remoteWorkerMode || path.startsWith("http")) return path; return `${workerBaseUrl}${path}`; } function apiFetch(path, options = {}) { const credentials = remoteWorkerMode ? "include" : "same-origin"; return fetch(apiUrl(path), { credentials, ...options }); } async function checkSession() { const payload = await fetch("/api/session").then(readJson); setAuthenticated(payload.authenticated); } async function loadHealth() { try { let payload = await fetch("/api/health").then(readJson); let engines = payload.engines; hostedShellMode = engines.deployment?.platform === "vercel"; workerBaseUrl = engines.deployment?.workerBaseUrl || ""; remoteWorkerMode = Boolean(hostedShellMode && workerBaseUrl); workerUnavailableDiagnostic = null; let workerDiagnostic = null; if (remoteWorkerMode) { cloudMode = false; cloudTtsReady = false; maxUploadMb = payload.maxUploadMb || 512; voiceCatalog = defaultVoiceCatalog; renderVoiceOptions(); engineNotice.textContent = "The OCR/TTS worker is connected. You can upload now while voice details finish checking."; engineNotice.classList.remove("warning"); } if (remoteWorkerMode) { await loginRemoteWorker(); try { payload = await apiFetch("/api/health").then(readJson); } catch (error) { if (error.status === 401) { await requireWorkerUnlock(); return; } workerDiagnostic = await loadWorkerDiagnostic(error); workerUnavailableDiagnostic = workerDiagnostic; remoteWorkerMode = false; } if (remoteWorkerMode) { engines = payload.engines; } } else if (hostedShellMode && !workerBaseUrl) { workerDiagnostic = await loadWorkerDiagnostic({ message: engines.deployment?.nextAction || "" }); workerUnavailableDiagnostic = workerDiagnostic; } cloudMode = hostedShellMode && !remoteWorkerMode && !workerBaseUrl; cloudTtsReady = Boolean(cloudMode && engines.cloudTts?.available); cloudTtsMaxChars = engines.cloudTts?.maxChunkChars || 900; voiceCatalog = engines.voices || voiceCatalog || defaultVoiceCatalog; installedArabicOcrModels = engines.ocr?.arabicTrainedStack?.installed || []; renderVoiceOptions(); if (!cloudMode || remoteWorkerMode) { setSelectValue(ocrModeSelect, engines.ocr?.preferred); } maxUploadMb = cloudMode ? (cloudTtsReady ? engines.cloudTts?.maxPdfMb || 512 : null) : payload.maxUploadMb; ocrModeSelect.disabled = Boolean(cloudMode && !remoteWorkerMode); const deploymentStatus = engines.deployment || {}; if (workerDiagnostic) { deploymentStatus.workerDiagnostic = workerDiagnostic; } const deploymentAction = deploymentStatus.nextAction || ""; if (remoteWorkerMode) { const label = engines.preferred === "silma" ? "SILMA Arabic voice" : "Arabic worker"; engineNotice.textContent = deploymentStatus.productionReady === false ? deploymentAction || "The worker is connected, but Vercel production settings still need attention." : `${label} is ready on the OCR/TTS worker. Uploads up to ${payload.maxUploadMb} MB are accepted.`; engineNotice.classList.toggle("warning", deploymentStatus.productionReady === false); return; } if (cloudMode && cloudTtsReady) { engineNotice.textContent = formatDiagnosticMessage(deploymentStatus.workerDiagnostic) || "Temporary cloud voice test mode is enabled. For reliable downloadable audio on Vercel, add WORKER_BASE_URL and redeploy."; engineNotice.classList.add("warning"); return; } if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) { engineNotice.textContent = formatDiagnosticMessage(workerDiagnostic) || "The OCR/TTS worker is configured but not reachable yet. Check the Hugging Face Space URL, CORS_ORIGINS, and Space status, then redeploy."; engineNotice.classList.add("warning"); return; } if (cloudMode) { engineNotice.textContent = formatDiagnosticMessage(deploymentStatus.workerDiagnostic) || deploymentAction || "Downloadable Vercel audio needs the OCR/TTS worker. Add WORKER_BASE_URL in Vercel, then redeploy."; engineNotice.classList.add("warning"); return; } if (engines.readyForArabic) { const label = engines.preferred === "piper" ? "Neural Arabic voice" : "Arabic voice"; const installedArabicStack = engines.ocr?.arabicTrainedStack?.installed || installedArabicOcrModels; const installedArabicText = installedArabicStack.length ? ` Installed Arabic OCR: ${installedArabicStack.join(", ")}.` : ""; const ocrLabel = engines.ocr?.preferred === "arabic-max" ? "Maximum Arabic OCR is ready, but slower" : engines.ocr?.preferred === "arabic" ? "Arabic OCR comparison is ready, but slower" : engines.ocr?.preferred === "qari-ocr" ? "QARI Arabic book OCR is ready" : engines.ocr?.preferred === "tawkeed-ocr" ? "Tawkeed Arabic OCR is ready" : engines.ocr?.preferred === "katib-ocr" ? "KATIB Arabic OCR is ready" : engines.ocr?.preferred === "arabic-qwen-ocr" ? "Arabic-Qwen OCR is ready" : engines.ocr?.preferred === "arabic-glm-ocr" ? "Arabic-GLM OCR is ready" : engines.ocr?.preferred === "baseer-ocr" ? "Baseer Arabic OCR is ready" : engines.ocr?.preferred === "paddleocr" ? "PaddleOCR Arabic is ready, but less readable" : engines.ocr?.preferred === "paddleocr-vl" ? "PaddleOCR-VL heavy OCR is ready" : engines.ocr?.preferred === "surya" ? "Surya heavy OCR is ready" : engines.ocr?.preferred === "tesseract" ? "Rank 1 readable Tesseract Arabic OCR is ready" : engines.ocr?.preferred === "tesseract-fast" ? "Rank 2 faster Tesseract Arabic OCR is ready" : engines.ocr?.preferred === "best" ? "Best Arabic OCR test mode is ready" : engines.ocr?.preferred ? `${ocrModeLabels[engines.ocr.preferred] || "Arabic OCR"} is ready` : "Arabic OCR is not ready"; engineNotice.textContent = `${label} is ready. ${ocrLabel}. Uploads up to ${payload.maxUploadMb} MB are accepted.${installedArabicText}`; engineNotice.classList.remove("warning"); return; } engineNotice.textContent = `A better Arabic voice is not installed yet. Upload limit: ${payload.maxUploadMb} MB.`; engineNotice.classList.add("warning"); } catch (error) { engineNotice.textContent = "Unable to check the voice."; engineNotice.classList.add("warning"); } } async function loadWorkerDiagnostic(error) { try { const diagnostic = await fetch("/api/worker-diagnostics").then(readJson); if (diagnostic?.message) return diagnostic; } catch (_diagnosticError) { // Keep the original worker error visible when the diagnostic endpoint cannot run. } return { status: "browser-worker-error", reachable: false, workerBaseUrl, message: `The OCR/TTS worker could not be reached from this browser. ${error.message}`, }; } async function requireWorkerUnlock() { pendingUnlockCode = ""; sessionUnlockCode = ""; remoteWorkerMode = false; cloudMode = false; await fetch("/api/logout", { method: "POST" }).catch(() => null); setAuthenticated(false); setError(loginError, "Enter the code once more to unlock the OCR worker."); engineNotice.textContent = "The OCR worker needs to be unlocked."; engineNotice.classList.add("warning"); } async function loginRemoteWorker() { const code = pendingUnlockCode || sessionUnlockCode; if (!remoteWorkerMode || !code) return false; const formData = new FormData(); formData.append("code", code); try { await fetch(`${workerBaseUrl}/api/login`, { method: "POST", body: formData, credentials: "include", }).then(readJson); pendingUnlockCode = ""; return true; } catch (_error) { return false; } } async function ensureWorkerSessionForUpload() { if (!remoteWorkerMode) return true; try { const session = await apiFetch("/api/session").then(readJson); if (session.authenticated) return true; } catch (_error) { // Try a fresh login below before surfacing an upload-specific network message. } if (await loginRemoteWorker()) return true; try { const session = await apiFetch("/api/session").then(readJson); return Boolean(session.authenticated); } catch (_error) { return false; } } async function loadHistory() { if (cloudMode && !remoteWorkerMode) { historyPanel.classList.add("hidden"); return; } try { const payload = await apiFetch("/api/jobs").then(readJson); const jobs = payload.jobs || []; renderHistory(jobs); resumeActiveJob(jobs); } catch (error) { historyPanel.classList.add("hidden"); } } function renderHistory(jobs) { const visibleJobs = jobs.filter((job) => job.filename); historyPanel.classList.toggle("hidden", visibleJobs.length === 0); historyList.innerHTML = ""; for (const job of visibleJobs) { const item = document.createElement("li"); const details = document.createElement("div"); const name = document.createElement("strong"); const meta = document.createElement("span"); name.textContent = job.filename; meta.textContent = `${statusLabels[job.status] || job.status} - ${formatPages(job)}`; if (job.pageLimit) { meta.textContent += ` - ${pageLimitLabels[job.pageLimit] || `${job.pageLimit} pages`}`; } if (job.ttsSpeed && job.ttsSpeed !== 1) { meta.textContent += ` - ${speedLabels[job.ttsSpeed] || `${job.ttsSpeed}x`}`; } if (job.ocrEngine && job.extraction && job.extraction !== "embedded") { meta.textContent += ` - ${ocrModeLabels[job.ocrEngine] || job.ocrEngine}`; } if (job.textQuality) { meta.textContent += ` - ${qualityLabels[job.textQuality] || job.textQuality}`; } if (job.audioFormat) { meta.textContent += ` - ${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}`; } details.append(name, meta); item.append(details); const actions = document.createElement("div"); actions.className = "history-actions"; if (["queued", "reading", "speaking"].includes(job.status)) { const watchButton = document.createElement("button"); watchButton.type = "button"; watchButton.className = "link-button"; watchButton.textContent = "Watch"; watchButton.addEventListener("click", () => startPolling(job.id)); actions.append(watchButton); } if (["failed", "complete"].includes(job.status)) { const settingsButton = document.createElement("button"); settingsButton.type = "button"; settingsButton.className = "link-button"; settingsButton.textContent = "Use settings"; settingsButton.addEventListener("click", () => applyJobSettings(job)); actions.append(settingsButton); } if (job.downloadUrl) { const link = document.createElement("a"); link.href = apiUrl(job.downloadUrl); link.textContent = "Download"; actions.append(link); } if (actions.children.length) item.append(actions); historyList.append(item); } } function resumeActiveJob(jobs) { if (activeJobId || pollTimer) return; const activeJob = jobs.find((job) => ["queued", "reading", "speaking"].includes(job.status)); if (activeJob) startPolling(activeJob.id); } function applyJobSettings(job) { setSelectValue(voiceSelect, job.voiceId); setSelectValue(speedSelect, String(job.ttsSpeed || 1)); setSelectValue(ocrModeSelect, job.ocrEngine); setSelectValue(pageLimitSelect, String(job.pageLimit || 0)); describeOcrMode(); setError(uploadError, `Settings loaded for ${job.filename || "that job"}. Choose the PDF again, then create audio.`); pdfInput.focus(); } function setSelectValue(select, value) { if (value === undefined || value === null || value === "") return; const text = String(value); if ([...select.options].some((option) => option.value === text)) { select.value = text; } } loginForm.addEventListener("submit", async (event) => { event.preventDefault(); setError(loginError, ""); const formData = new FormData(); pendingUnlockCode = codeInput.value.trim(); sessionUnlockCode = pendingUnlockCode; formData.append("code", pendingUnlockCode); try { await fetch("/api/login", { method: "POST", body: formData }).then(readJson); codeInput.value = ""; setAuthenticated(true); } catch (error) { setError(loginError, error.message); } }); logoutButton.addEventListener("click", async () => { if (remoteWorkerMode) { await apiFetch("/api/logout", { method: "POST" }).catch(() => null); } await fetch("/api/logout", { method: "POST" }).then(readJson); pendingUnlockCode = ""; sessionUnlockCode = ""; setAuthenticated(false); }); pdfInput.addEventListener("change", () => { describeFile(pdfInput.files[0]); }); ocrModeSelect.addEventListener("change", () => { describeOcrMode(); }); browserSpeakButton.addEventListener("click", () => { speakBrowserSpeechText(); }); browserStopButton.addEventListener("click", () => { if (canUseBrowserSpeech()) window.speechSynthesis.cancel(); jobMessage.textContent = "Browser read-aloud stopped."; }); uploadForm.addEventListener("submit", async (event) => { event.preventDefault(); setError(uploadError, ""); const file = pdfInput.files[0]; if (!file) { setError(uploadError, "Choose a PDF first."); return; } if (!file.name.toLowerCase().endsWith(".pdf")) { setError(uploadError, "Choose a PDF file."); return; } if (maxUploadMb && file.size > maxUploadMb * 1024 * 1024) { setError(uploadError, `This PDF is larger than ${maxUploadMb} MB.`); return; } if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) { setError( uploadError, formatDiagnosticMessage(workerUnavailableDiagnostic) || "The OCR/TTS worker is configured but not reachable. Open the Hugging Face Space, confirm it is running, set CORS_ORIGINS to this Vercel URL, then redeploy.", ); return; } if (cloudMode && !remoteWorkerMode) { setError( uploadError, formatDiagnosticMessage( workerUnavailableDiagnostic, "This Vercel site needs WORKER_BASE_URL before it can create downloadable Arabic audio. Deploy the Hugging Face Space worker, set Vercel WORKER_BASE_URL to that https://*.hf.space URL, remove the temporary Hugging Face cloud TTS variables, then redeploy.", ), ); updateStage({ id: "upload", label: "Worker needed", phase: "Vercel setup needed", detail: "Downloadable audio on Vercel is created by the OCR/TTS worker, not the temporary cloud voice fallback.", progress: 0, steps: defaultStageSteps.map((step) => ({ ...step, state: "pending" })), }); return; } await createLocalAudio(file); }); uploadForm.addEventListener("dragover", (event) => { event.preventDefault(); uploadForm.classList.add("is-dragging"); }); uploadForm.addEventListener("dragleave", () => { uploadForm.classList.remove("is-dragging"); }); uploadForm.addEventListener("drop", (event) => { event.preventDefault(); uploadForm.classList.remove("is-dragging"); const file = event.dataTransfer.files[0]; if (!file) return; const transfer = new DataTransfer(); transfer.items.add(file); pdfInput.files = transfer.files; describeFile(file); }); async function createLocalAudio(file) { const formData = new FormData(); formData.append("pdf", file); formData.append("voice_id", voiceSelect.value); formData.append("tts_speed", speedSelect.value); formData.append("ocr_engine", ocrModeSelect.value); formData.append("page_limit", pageLimitSelect.value); resetJob(file.name, "Uploading PDF."); createButton.disabled = true; try { if (remoteWorkerMode && !(await ensureWorkerSessionForUpload())) { throw new Error("Enter the code again so this browser can upload directly to the OCR/TTS worker."); } const payload = await apiFetch("/api/jobs", { method: "POST", body: formData }).then(readJson); startPolling(payload.jobId); } catch (error) { setError(uploadError, describeNetworkError(error)); updateProgress(0); setJobState("failed"); createButton.disabled = false; } } async function createCloudAudio(file) { const browserSpeechAvailable = canUseBrowserSpeech(); if (!cloudTtsReady && !browserSpeechAvailable) { setError( uploadError, formatDiagnosticMessage( workerUnavailableDiagnostic, "Downloadable Vercel audio needs WORKER_BASE_URL. Browser read-aloud can be used for a short temporary test.", ), ); return; } resetJob(file.name, "Reading PDF in this browser."); createButton.disabled = true; let speechTextForFallback = ""; try { const extracted = await extractPdfText(file, getSelectedPageLimit()); const speechText = prepareTextForSpeech(extracted.text); speechTextForFallback = speechText; const chunks = chunkText(speechText, cloudTtsMaxChars); const quality = assessTextQuality(extracted.text, speechText); pagesValue.textContent = formatPages({ pages: extracted.pages, totalPages: extracted.totalPages }); charactersValue.textContent = speechText.length.toLocaleString(); engineValue.textContent = "cloud"; extractionValue.textContent = extracted.extraction; qualityValue.textContent = formatQuality(quality); showQualityHint(quality); if (!chunks.length || !quality.readyForTts) { const reason = quality.reasons.length ? ` ${quality.reasons.join("; ")}.` : ""; throw new Error(`Text quality is poor, so audio was not created.${reason}`); } if (!cloudTtsReady) { showBrowserSpeechFallback( speechText, file.name, formatDiagnosticMessage(workerUnavailableDiagnostic, "Downloadable Vercel audio needs WORKER_BASE_URL."), ); return; } audioFormatValue.textContent = "WAV"; setJobState("speaking", "Creating"); updateStage({ id: "voice", label: "Voice", phase: "Creating voice", detail: "Creating Arabic audio from the cleaned text.", progress: progressRanges.voiceStart, step: 4, totalSteps: 5, itemProgress: stageItem("Audio part", 0, chunks.length), steps: stepStates("voice"), }); const audioBuffers = []; for (let index = 0; index < chunks.length; index += 1) { const label = `Creating audio part ${index + 1} of ${chunks.length}`; jobMessage.textContent = label; updateStage({ id: "voice", label: "Voice", phase: "Creating audio parts", detail: label, progress: rangeProgress(progressRanges.voiceStart, progressRanges.voiceEnd, index + 1, chunks.length), step: 4, totalSteps: 5, itemProgress: stageItem("Audio part", index + 1, chunks.length), steps: stepStates("voice"), }); const response = await fetch("/api/cloud-tts", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ text: chunks[index], voiceId: voiceSelect.value }), }); if (!response.ok) { const payload = await response.json().catch(() => ({})); throw new Error(payload.detail || "Cloud voice failed."); } audioBuffers.push(await response.arrayBuffer()); } const audioBlob = mergeWavBuffers(audioBuffers); showAudio(audioBlob, file.name); setJobState("complete"); jobMessage.textContent = "Audio is ready."; updateStage({ id: "ready", label: "Ready", phase: "Audio ready", detail: "Audio is ready to play or download.", progress: 100, step: 5, totalSteps: 5, steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), }); } catch (error) { if (speechTextForFallback && canUseBrowserSpeech()) { const fallbackMessage = formatDiagnosticMessage( workerUnavailableDiagnostic, `${error.message} Browser read-aloud is available now, but downloadable Vercel audio needs WORKER_BASE_URL.`, ); showBrowserSpeechFallback( speechTextForFallback, file.name, fallbackMessage, ); setError(uploadError, fallbackMessage); return; } setError(uploadError, error.message); setJobState("failed"); jobMessage.textContent = "Processing failed."; updateStage({ id: "ocr", label: "Stopped", phase: "Processing stopped", detail: "Processing stopped before audio could be created.", progress: 0, steps: defaultStageSteps.map((step, index) => ({ ...step, state: index === 2 ? "failed" : index < 2 ? "done" : "pending", })), }); } finally { createButton.disabled = false; } } async function extractPdfText(file, pageLimit = 0) { const pdfjs = await getPdfJs(); const data = await file.arrayBuffer(); const document = await pdfjs.getDocument({ data }).promise; const pageTexts = []; const pagesToProcess = effectivePageCount(document.numPages, pageLimit); for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) { const page = await document.getPage(pageNumber); const content = await page.getTextContent(); const pageText = cleanText(content.items.map((item) => item.str || "").join(" ")); pageTexts.push(pageText); pagesValue.textContent = pageNumber; jobMessage.textContent = `Reading page ${pageNumber} of ${pagesToProcess}`; updateStage({ id: "text", label: "Text scan", phase: "Checking text layer", detail: `Checking the PDF text layer: page ${pageNumber} of ${pagesToProcess}.`, progress: rangeProgress(progressRanges.textStart, progressRanges.textEnd, pageNumber, pagesToProcess), step: 2, totalSteps: 5, itemProgress: stageItem("PDF page", pageNumber, pagesToProcess), steps: stepStates("text"), }); } const embeddedText = cleanText(pageTexts.filter(Boolean).join("\n\n")); if (embeddedText.length >= 20 && !shouldOcrMixedPdf(pageTexts)) { return { pages: pagesToProcess, totalPages: document.numPages, text: embeddedText, extraction: "browser" }; } if (embeddedText.length >= 20) { const ratio = embeddedTextMissingPageRatio(pageTexts); jobMessage.textContent = `Embedded text is incomplete on ${Math.round(ratio * 100)}% of pages. Starting Arabic OCR.`; } else { jobMessage.textContent = "No text layer found. Starting Arabic OCR."; } const ocrText = await ocrPdfText(document, pagesToProcess); return { pages: pagesToProcess, totalPages: document.numPages, text: cleanText(ocrText), extraction: "ocr" }; } function embeddedTextMissingPageRatio(pageTexts) { if (!pageTexts.length) return 1; const missingPages = pageTexts.filter((text) => text.length < 20).length; return missingPages / pageTexts.length; } function shouldOcrMixedPdf(pageTexts) { if (!pageTexts.length) return true; if (!pageTexts.some((text) => text.length >= 20)) return true; return embeddedTextMissingPageRatio(pageTexts) > mixedPdfOcrMissingPageRatio; } async function ocrPdfText(document, pagesToProcess) { const worker = await getOcrWorker(); const pieces = []; for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) { browserOcrProgress = { page: pageNumber, total: pagesToProcess }; jobMessage.textContent = `OCR reading page ${pageNumber} of ${pagesToProcess}`; updateStage({ id: "ocr", label: "Arabic OCR", phase: "Rendering scanned pages", detail: `Preparing scanned page ${pageNumber} of ${pagesToProcess}.`, progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber - 1, pagesToProcess), step: 3, totalSteps: 5, itemProgress: stageItem("Scanned page", pageNumber - 1, pagesToProcess), steps: stepStates("ocr"), }); const imageUrl = await renderPageForOcr(document, pageNumber); const result = await withTesseractNoiseFilter(() => worker.recognize(imageUrl)); URL.revokeObjectURL(imageUrl); if (result.data.text.trim()) pieces.push(result.data.text); updateStage({ id: "ocr", label: "Arabic OCR", phase: "Reading scanned pages", detail: `Finished scanned page ${pageNumber} of ${pagesToProcess}.`, progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber, pagesToProcess), step: 3, totalSteps: 5, itemProgress: stageItem("Scanned page", pageNumber, pagesToProcess), steps: stepStates("ocr"), }); } return pieces.join("\n\n"); } function effectivePageCount(totalPages, pageLimit) { return pageLimit > 0 ? Math.max(0, Math.min(totalPages, pageLimit)) : totalPages; } function getSelectedPageLimit() { return Number.parseInt(pageLimitSelect.value, 10) || 0; } async function getOcrWorker() { if (ocrWorker) return ocrWorker; jobMessage.textContent = "Loading Arabic OCR. This can take a minute the first time."; updateStage({ id: "ocr", label: "Arabic OCR", phase: "Loading Arabic OCR", detail: "Loading Arabic OCR files. This can take a minute the first time.", progress: progressRanges.ocrStart, step: 3, totalSteps: 5, steps: stepStates("ocr"), }); const createWorker = await getTesseractCreateWorker(); ocrWorker = await createWorker("ara", 1, { langPath: "https://tessdata.projectnaptha.com/4.0.0", workerPath: "https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js", corePath: "https://cdn.jsdelivr.net/npm/tesseract.js-core@5", logger: (event) => { if (event.status === "recognizing text" && event.progress) { const percent = Math.round(event.progress * 100); const page = browserOcrProgress.page || 1; const total = browserOcrProgress.total || 1; jobMessage.textContent = `OCR reading page ${page} of ${total}: ${percent}%`; updateStage({ id: "ocr", label: "Arabic OCR", phase: "Reading scanned pages", detail: `Reading scanned page ${page} of ${total}: ${percent}%.`, progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, page - 1 + event.progress, total), step: 3, totalSteps: 5, itemProgress: stageItem("Scanned page", page - 1 + event.progress, total), steps: stepStates("ocr"), }); } }, }); if (typeof ocrWorker.setParameters === "function") { await ocrWorker.setParameters({ tessedit_pageseg_mode: "6", preserve_interword_spaces: "1", }); } return ocrWorker; } async function withTesseractNoiseFilter(callback) { const originalWarn = console.warn; const originalError = console.error; const isKnownNoise = (message) => message.includes("Image too small to scale") || message.includes("Line cannot be recognized"); const filter = (original) => (...args) => { const message = args.map((arg) => String(arg)).join(" "); if (isKnownNoise(message)) return; original(...args); }; console.warn = filter(originalWarn); console.error = filter(originalError); try { return await callback(); } finally { console.warn = originalWarn; console.error = originalError; } } async function getPdfJs() { if (pdfjsLib) return pdfjsLib; pdfjsLib = await import("https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.min.mjs"); pdfjsLib.GlobalWorkerOptions.workerSrc = "https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.worker.min.mjs"; return pdfjsLib; } async function getTesseractCreateWorker() { if (tesseractCreateWorker) return tesseractCreateWorker; const tesseract = await import( "https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.esm.min.js" ); tesseractCreateWorker = tesseract.createWorker || tesseract.default?.createWorker; if (!tesseractCreateWorker) { throw new Error("Arabic OCR could not load. Try the local version for scanned PDFs."); } return tesseractCreateWorker; } async function renderPageForOcr(document, pageNumber) { const page = await document.getPage(pageNumber); const viewport = page.getViewport({ scale: 2 }); const canvas = documentCanvas(viewport.width, viewport.height); const context = canvas.getContext("2d"); await page.render({ canvasContext: context, viewport }).promise; return await canvasToObjectUrl(canvas); } function documentCanvas(width, height) { const canvas = document.createElement("canvas"); canvas.width = Math.ceil(width); canvas.height = Math.ceil(height); return canvas; } function canvasToObjectUrl(canvas) { return new Promise((resolve, reject) => { canvas.toBlob((blob) => { if (!blob) { reject(new Error("Could not render PDF page for OCR.")); return; } resolve(URL.createObjectURL(blob)); }, "image/png"); }); } function cleanText(text) { return text .normalize("NFKC") .replace(/[\u200e\u200f]/g, " ") .replace(/[\t\r\f\v]+/g, " ") .replace(/\n{3,}/g, "\n\n") .replace(/[ ]{2,}/g, " ") .trim(); } function prepareTextForSpeech(text) { const cleaned = cleanText(text); const rawLines = cleaned.split(/\n/).map((line) => line.trim()); const counts = new Map(); for (const line of rawLines) { if (!line) continue; counts.set(line, (counts.get(line) || 0) + 1); } const repeatedLines = new Set( [...counts.entries()].filter(([line, count]) => count >= 3 && line.length <= 48).map(([line]) => line), ); const output = []; let previousLine = ""; let blankPending = false; for (const line of rawLines) { if (!line) { blankPending = output.length > 0; continue; } if (line === previousLine) continue; previousLine = line; if (shouldDropSpeechLine(line, repeatedLines)) continue; if (blankPending && output.length && output[output.length - 1] !== "") output.push(""); output.push(line); blankPending = false; } return output.join("\n").replace(/\n{3,}/g, "\n\n").trim(); } function shouldDropSpeechLine(line, repeatedLines) { const compact = line.trim(); if (!compact) return true; if (/^[\s\-–—_.:|/\\()[\]{}]*(?:[0-9٠-٩۰-۹]+|[ivxlcdmIVXLCDM]+)[\s\-–—_.:|/\\()[\]{}]*$/.test(compact)) { return true; } const metrics = lineNoiseMetrics(compact); if (repeatedLines.has(compact)) return true; if ( !metrics.arabicWords && compact.length <= 80 && (compact.length <= 24 || metrics.digits >= 3 || metrics.symbols >= 2 || metrics.latinWords) ) { return true; } if (compact.length <= 2 && !metrics.arabicWords) return true; if (metrics.digits >= 4 && metrics.arabicWords <= 3) return true; if (metrics.digits >= 6 && metrics.digits > metrics.arabicChars) return true; if (metrics.symbols >= 3 && metrics.arabicWords <= 4) return true; if (metrics.placeholders >= 2 && metrics.arabicWords <= 4) return true; return false; } function lineNoiseMetrics(line) { const arabicWords = line.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || []; const digits = line.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || []; const symbols = line.match(/[!@#$%^&*_+=<>|~`]/g) || []; const placeholders = line.match(/[?\ufffd]/g) || []; const latinWords = line.match(/[A-Za-z]{3,}/g) || []; return { arabicWords: arabicWords.length, arabicChars: arabicWords.reduce((total, word) => total + word.length, 0), digits: digits.length, symbols: symbols.length, placeholders: placeholders.length, latinWords: latinWords.length, }; } function scoreTextQuality(text) { const speechText = prepareTextForSpeech(text); const arabicWords = speechText.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || []; const placeholderCount = (speechText.match(/[?\ufffd]/g) || []).length; const latinWords = speechText.match(/[A-Za-z]{3,}/g) || []; const digitNoise = (speechText.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || []).length; const lines = speechText.split(/\n/).map((line) => line.trim()).filter(Boolean); const metrics = lines.map((line) => lineNoiseMetrics(line)); const fragmentLines = metrics.filter((item, index) => ( item.arabicWords <= 2 && item.arabicChars <= 18 && lines[index].length <= 28 )).length; const singleArabicWords = arabicWords.filter((word) => word.length === 1).length; const singleArabicWordRatio = singleArabicWords / Math.max(arabicWords.length, 1); const fragmentLineRatio = fragmentLines / Math.max(lines.length, 1); const shortLines = lines.filter((line) => line.length <= 3).length; const repeatedLines = lines.length - new Set(lines).size; const commonWords = new Set(["في", "من", "على", "هذا", "هذه", "التي", "الذي", "كان", "إلى", "الى", "عن", "مع", "هو", "هي"]); const commonHits = arabicWords.filter((word) => commonWords.has(word)).length; const score = speechText.length * 0.05 + arabicWords.length * 3 + commonHits * 18 - placeholderCount * 25 - shortLines * 8 - repeatedLines * 6 - latinWords.length * 4 - digitNoise * 3 - singleArabicWords * 6 - fragmentLines * 14; return { score, speechText, arabicWords, placeholderCount, latinWords, digitNoise, singleArabicWords, singleArabicWordRatio, fragmentLines, fragmentLineRatio, }; } function assessTextQuality(text, speechText = prepareTextForSpeech(text)) { const scored = scoreTextQuality(speechText); const placeholderRatio = scored.placeholderCount / Math.max(speechText.length, 1); const reasons = []; if (speechText.length < 20) reasons.push("too little readable text after cleanup"); if (scored.arabicWords.length < 5) reasons.push("too few Arabic words"); if (placeholderRatio >= 0.2) reasons.push("too many unreadable placeholder characters"); else if (placeholderRatio > 0) reasons.push("some unreadable placeholder characters remain"); if (scored.digitNoise >= Math.max(20, scored.arabicWords.length)) reasons.push("digit-heavy OCR noise remains"); if (scored.singleArabicWordRatio >= 0.1 && scored.arabicWords.length >= 25) { reasons.push("many one-letter Arabic OCR fragments remain"); } if (scored.fragmentLineRatio >= 0.25 && speechText.split(/\n/).length >= 8) { reasons.push("many low-information OCR lines remain"); } if (scored.latinWords.length >= 3 && scored.latinWords.length >= scored.arabicWords.length) { reasons.push("non-Arabic OCR text dominates"); } const blockingReasons = new Set([ "too little readable text after cleanup", "too few Arabic words", "too many unreadable placeholder characters", "non-Arabic OCR text dominates", ]); const quality = reasons.some((reason) => blockingReasons.has(reason)) ? "poor" : reasons.length ? "warning" : "good"; return { quality, readyForTts: quality !== "poor", reasons, score: Math.round(scored.score * 100) / 100, singleArabicWordRatio: Math.round(scored.singleArabicWordRatio * 10000) / 10000, fragmentLineRatio: Math.round(scored.fragmentLineRatio * 10000) / 10000, }; } function formatQuality(quality) { if (!quality?.quality) return "-"; const label = qualityLabels[quality.quality] || quality.quality; return `${label} ${quality.score ? Math.round(quality.score) : ""}`.trim(); } function splitLongTextAtWordBoundaries(text, size) { const pieces = []; let remaining = text.trim(); while (remaining.length > size) { let splitAt = remaining.lastIndexOf(" ", size); if (splitAt < Math.max(1, Math.floor(size * 0.45))) splitAt = size; const piece = remaining.slice(0, splitAt).trim(); if (piece) pieces.push(piece); remaining = remaining.slice(splitAt).trim(); } if (remaining) pieces.push(remaining); return pieces; } function chunkText(text, size) { text = prepareTextForSpeech(text); const paragraphs = text.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean); const chunks = []; let current = ""; for (const paragraph of paragraphs) { if ((current.length + paragraph.length + 2) <= size) { current = `${current}\n\n${paragraph}`.trim(); continue; } if (current) chunks.push(current); if (paragraph.length <= size) { current = paragraph; continue; } const sentences = paragraph.split(/(?<=[.!\u061f?\u060c\u061b])\s+/); current = ""; for (const sentence of sentences) { let remaining = sentence; if ((current.length + remaining.length + 1) <= size) { current = `${current} ${remaining}`.trim(); } else { if (current) chunks.push(current); const sentenceParts = splitLongTextAtWordBoundaries(remaining, size); chunks.push(...sentenceParts.slice(0, -1)); current = sentenceParts[sentenceParts.length - 1] || ""; } } } if (current) chunks.push(current); return chunks; } function mergeWavBuffers(buffers) { if (buffers.length === 1) return new Blob([buffers[0]], { type: "audio/wav" }); const wavs = buffers.map(parseWav); const fmt = wavs[0].fmt; const dataLength = wavs.reduce((total, wav) => total + wav.data.length, 0); const output = new ArrayBuffer(12 + 8 + fmt.length + 8 + dataLength); const view = new DataView(output); const bytes = new Uint8Array(output); let offset = 0; offset = writeString(bytes, offset, "RIFF"); view.setUint32(offset, output.byteLength - 8, true); offset += 4; offset = writeString(bytes, offset, "WAVE"); offset = writeString(bytes, offset, "fmt "); view.setUint32(offset, fmt.length, true); offset += 4; bytes.set(fmt, offset); offset += fmt.length; offset = writeString(bytes, offset, "data"); view.setUint32(offset, dataLength, true); offset += 4; for (const wav of wavs) { bytes.set(wav.data, offset); offset += wav.data.length; } return new Blob([output], { type: "audio/wav" }); } function parseWav(buffer) { const view = new DataView(buffer); const bytes = new Uint8Array(buffer); if (readString(bytes, 0, 4) !== "RIFF" || readString(bytes, 8, 4) !== "WAVE") { throw new Error("Cloud voice returned an audio format that cannot be merged."); } let offset = 12; let fmt = null; let data = null; while (offset + 8 <= bytes.length) { const id = readString(bytes, offset, 4); const length = view.getUint32(offset + 4, true); const start = offset + 8; const end = start + length; if (id === "fmt ") fmt = bytes.slice(start, end); if (id === "data") data = bytes.slice(start, end); offset = end + (length % 2); } if (!fmt || !data) throw new Error("Cloud voice returned invalid WAV audio."); return { fmt, data }; } function readString(bytes, offset, length) { return String.fromCharCode(...bytes.slice(offset, offset + length)); } function writeString(bytes, offset, value) { for (let index = 0; index < value.length; index += 1) { bytes[offset + index] = value.charCodeAt(index); } return offset + value.length; } function showAudio(blob, sourceName) { resetBrowserSpeechFallback(); if (currentAudioUrl) URL.revokeObjectURL(currentAudioUrl); currentAudioUrl = URL.createObjectURL(blob); audioPlayer.src = currentAudioUrl; downloadLink.href = currentAudioUrl; downloadLink.download = `${sourceName.replace(/\.pdf$/i, "") || "arabic-pdf"}.wav`; audioPanel.classList.remove("hidden"); } function canUseBrowserSpeech() { return "speechSynthesis" in window && typeof window.SpeechSynthesisUtterance === "function"; } function resetBrowserSpeechFallback() { if (canUseBrowserSpeech()) window.speechSynthesis.cancel(); browserSpeechText = ""; browserSpeechSourceName = ""; browserSpeechPanel.classList.add("hidden"); browserSpeechMessage.textContent = "Browser read-aloud is ready."; } function showBrowserSpeechFallback(text, sourceName, message) { browserSpeechText = text; browserSpeechSourceName = sourceName; audioPanel.classList.add("hidden"); audioPlayer.removeAttribute("src"); downloadLink.removeAttribute("href"); downloadLink.removeAttribute("download"); audioFormatValue.textContent = "Browser only"; engineValue.textContent = "browser voice"; setJobState("speaking", "Listen only"); jobMessage.textContent = "Browser read-aloud is ready."; browserSpeechMessage.textContent = message; browserSpeechPanel.classList.remove("hidden"); updateStage({ id: "ready", label: "Listen", phase: "Browser read-aloud ready", detail: "Downloadable audio needs the Vercel worker. Browser read-aloud is ready for listening.", progress: 100, step: 5, totalSteps: 5, steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), }); } function getBrowserArabicVoice() { if (!canUseBrowserSpeech()) return null; const voices = window.speechSynthesis.getVoices(); return ( voices.find((voice) => voice.lang?.toLowerCase().startsWith("ar")) || voices.find((voice) => /arabic|ar[-_]/i.test(`${voice.name} ${voice.lang}`)) || null ); } function speakBrowserSpeechText() { if (!browserSpeechText || !canUseBrowserSpeech()) return; const chunks = chunkText(browserSpeechText, 700); if (!chunks.length) return; const synth = window.speechSynthesis; const voice = getBrowserArabicVoice(); const rate = Number(speedSelect.value) || 1; let index = 0; synth.cancel(); const speakNext = () => { if (index >= chunks.length) { jobMessage.textContent = "Browser read-aloud finished."; return; } const utterance = new window.SpeechSynthesisUtterance(chunks[index]); utterance.lang = voice?.lang || "ar"; utterance.voice = voice; utterance.rate = Math.max(0.6, Math.min(1.35, rate)); utterance.onend = () => { index += 1; const nextPart = Math.min(index + 1, chunks.length); updateStage({ id: "voice", label: "Listening", phase: "Browser read-aloud", detail: index >= chunks.length ? "Browser read-aloud finished." : `Reading part ${nextPart} of ${chunks.length}.`, progress: rangeProgress(progressRanges.voiceStart, 100, index, chunks.length), step: 5, totalSteps: 5, itemProgress: stageItem("Read-aloud part", index, chunks.length), steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), }); speakNext(); }; utterance.onerror = () => { jobMessage.textContent = "Browser read-aloud stopped."; }; jobMessage.textContent = `Reading ${browserSpeechSourceName || "PDF"} in the browser.`; synth.speak(utterance); }; updateStage({ id: "voice", label: "Listening", phase: "Browser read-aloud", detail: `Reading part 1 of ${chunks.length}.`, progress: progressRanges.voiceStart, step: 5, totalSteps: 5, itemProgress: stageItem("Read-aloud part", 0, chunks.length), steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })), }); speakNext(); } function describeFile(file) { fileName.textContent = file ? `${file.name} (${Math.ceil(file.size / 1024 / 1024)} MB)` : "No file selected"; } function describeOcrMode() { const installedText = installedArabicOcrModels.length ? ` Installed now: ${installedArabicOcrModels.join(", ")}.` : " Install QARI, Tawkeed, or KATIB on the worker for the best Arabic-trained OCR."; if (ocrModeSelect.value === "arabic-max") { engineNotice.textContent = `Maximum Arabic OCR selected. It tries the most engines and keeps the cleanest text, but it is slower. Use Quick test first.${installedText}`; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "arabic") { engineNotice.textContent = `Arabic OCR comparison selected. It compares installed OCR paths and can be much slower than the recommended Tesseract setting.${installedText}`; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "qari-ocr") { engineNotice.textContent = "QARI Arabic books selected. Use this on a short sample or strong worker; it is trained for Arabic books, Islamic texts, manuscripts, and layout-aware Arabic transcription."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "tawkeed-ocr") { engineNotice.textContent = "Tawkeed Arabic OCR selected. Use this on a short sample or worker; it is Arabic-first and trained for documents, handwriting, and scene text."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "katib-ocr") { engineNotice.textContent = "KATIB Arabic OCR selected. Use this on a short sample; it is a smaller Arabic-trained model for printed and handwritten Arabic."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "arabic-qwen-ocr") { engineNotice.textContent = "Arabic-Qwen OCR selected. Use this on a short sample or worker; it is a 0.9B Arabic-trained OCR model."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "arabic-glm-ocr") { engineNotice.textContent = "Arabic-GLM OCR selected. Use this on a short sample or strong worker; it is a recent Arabic-trained OCR model for books and scanned documents."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "baseer-ocr") { engineNotice.textContent = "Baseer Arabic OCR selected. Use this on a short sample or worker for complex Arabic document layouts."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "best") { engineNotice.textContent = "Best scan test selected. Use this on a short sample, then run the winning engine for the full book."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "paddleocr") { engineNotice.textContent = "Rank 3 PaddleOCR Arabic selected. It works, but the 5-page benchmark produced more fragmented text than Tesseract."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "tesseract") { engineNotice.textContent = "Rank 1 Tesseract Arabic selected. This produced the best readable text in the 5-page OCR benchmark."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "tesseract-fast") { engineNotice.textContent = "Rank 2 Tesseract Arabic selected. This was the faster readable runner-up in the 5-page OCR benchmark."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "paddleocr-vl") { engineNotice.textContent = "PaddleOCR-VL selected. Use this only on a short sample or strong worker; it is much heavier than normal Arabic OCR."; engineNotice.classList.remove("warning"); } else if (ocrModeSelect.value === "surya") { engineNotice.textContent = "Surya heavy OCR selected. Use this only on a worker or powerful computer for difficult scanned PDFs."; engineNotice.classList.remove("warning"); } } function renderVoiceOptions() { const voices = cloudMode && !remoteWorkerMode ? voiceCatalog.cloud : voiceCatalog.local; const fallback = cloudMode && !remoteWorkerMode ? [{ id: "mms-ara", label: "Arabic Standard" }] : [{ id: "espeak-ar", label: "Local Arabic" }]; const options = voices.length ? voices : fallback; const current = voiceSelect.value || voiceCatalog.default; voiceSelect.innerHTML = ""; for (const voice of options) { const option = document.createElement("option"); option.value = voice.id; option.textContent = voice.label; voiceSelect.append(option); } if (options.some((voice) => voice.id === current)) { voiceSelect.value = current; } else if (options.some((voice) => voice.id === voiceCatalog.default)) { voiceSelect.value = voiceCatalog.default; } } function resetJob(name, message = "Uploading PDF.") { jobTitle.textContent = name; setJobState("queued", cloudMode && !remoteWorkerMode ? "Reading" : "Uploading"); jobMessage.textContent = message; updateStage({ id: "upload", label: cloudMode && !remoteWorkerMode ? "Text scan" : "Upload", phase: cloudMode && !remoteWorkerMode ? "Checking text layer" : "Uploading PDF", detail: message, progress: 2, steps: defaultStageSteps.map((step, index) => ({ ...step, state: index === 0 ? "active" : "pending", })), }); showQualityHint(null); pagesValue.textContent = "0"; charactersValue.textContent = "0"; engineValue.textContent = "-"; extractionValue.textContent = "-"; qualityValue.textContent = "-"; audioFormatValue.textContent = "-"; audioPanel.classList.add("hidden"); audioPlayer.removeAttribute("src"); downloadLink.removeAttribute("href"); downloadLink.removeAttribute("download"); resetBrowserSpeechFallback(); updateProgress(2); } function updateProgress(value) { const safeValue = Math.max(0, Math.min(100, Math.round(Number(value) || 0))); progressBar.style.width = `${safeValue}%`; progressMeter?.setAttribute("aria-valuenow", String(safeValue)); progressPercent.textContent = `${safeValue}%`; } function stepStates(activeId) { const activeIndex = defaultStageSteps.findIndex((step) => step.id === activeId); return defaultStageSteps.map((step, index) => ({ ...step, state: activeIndex < 0 ? "pending" : index < activeIndex ? "done" : index === activeIndex ? "active" : "pending", })); } function stageItem(label, current, total) { const safeTotal = Math.max(1, Number(total) || 1); const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0)); return { label, current: safeCurrent, total: safeTotal, percent: Math.round((safeCurrent / safeTotal) * 100), }; } function formatProgressCount(value) { const number = Number(value) || 0; if (Number.isInteger(number)) return String(number); return number.toFixed(1); } function updateStage(stage) { const progress = stage?.progress ?? 0; updateProgress(progress); const label = stage?.label || "Working"; const phase = stage?.phase || "Current step"; const stepText = stage?.step && stage?.totalSteps ? `Step ${stage.step} of ${stage.totalSteps}` : "Current step"; const overallLabel = stage?.overallLabel || "Overall progress"; const labelWithStep = stage?.label ? `${label}${stage.step && stage.totalSteps ? ` (${stage.step} of ${stage.totalSteps})` : ""}` : "Working"; stageLabel.textContent = labelWithStep; stagePhaseLabel.textContent = `${stepText} - ${overallLabel} ${Math.round(Number(progress) || 0)}%`; stageDetailTitle.textContent = phase === "Current step" ? labelWithStep : phase; stageDetailText.textContent = stage?.detail || jobMessage.textContent || "Working on the PDF."; const itemProgress = stage?.itemProgress; if (itemProgress && itemProgress.total) { const percent = Math.max(0, Math.min(100, Math.round(Number(itemProgress.percent) || 0))); stageItemProgress.classList.remove("hidden"); stageItemLabel.textContent = itemProgress.label || "Progress"; stageItemValue.textContent = `${formatProgressCount(itemProgress.current)} of ${formatProgressCount(itemProgress.total)} (${percent}%)`; stageItemBar.style.width = `${percent}%`; } else { stageItemProgress.classList.add("hidden"); stageItemLabel.textContent = "Progress"; stageItemValue.textContent = "0 of 0"; stageItemBar.style.width = "0%"; } const isServerJob = !cloudMode || remoteWorkerMode; const backgroundText = isServerJob ? "The server keeps working even if you are not watching this screen." : "Keep this tab open while browser OCR is reading scanned pages."; stageMetaText.textContent = `Updated just now. ${backgroundText}`; const steps = stage?.steps?.length ? stage.steps : defaultStageSteps; stageList.innerHTML = ""; for (const step of steps) { const item = document.createElement("li"); item.className = step.state || "pending"; const dot = document.createElement("span"); dot.className = "stage-dot"; dot.setAttribute("aria-hidden", "true"); const text = document.createElement("span"); text.textContent = step.label; item.append(dot, text); stageList.append(item); } } function setJobState(status, label = statusLabels[status] || status) { jobState.textContent = label; jobState.classList.toggle("complete", status === "complete"); jobState.classList.toggle("failed", status === "failed"); } function startPolling(jobId) { activeJobId = jobId; clearInterval(pollTimer); pollTimer = setInterval(() => pollJob(jobId), 750); pollJob(jobId); } async function pollJob(jobId) { try { const job = await apiFetch(`/api/jobs/${jobId}`).then(readJson); jobTitle.textContent = job.filename || "Arabic PDF"; setJobState(job.status); jobMessage.textContent = job.error || job.message; showQualityHint({ quality: job.textQuality, reasons: job.qualityReasons || [], readyForTts: job.textQuality && job.textQuality !== "poor", }); pagesValue.textContent = formatPages(job); charactersValue.textContent = (job.characters || 0).toLocaleString(); engineValue.textContent = job.engine || "-"; extractionValue.textContent = job.extraction || "-"; qualityValue.textContent = job.textQuality ? `${qualityLabels[job.textQuality] || job.textQuality} ${job.qualityScore ? Math.round(job.qualityScore) : ""}`.trim() : "-"; audioFormatValue.textContent = job.audioFormat ? `${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}` : "-"; updateStage(job.stage || { progress: job.progress || 0, label: statusLabels[job.status] || job.status }); if (job.status === "complete") { clearInterval(pollTimer); pollTimer = null; activeJobId = null; createButton.disabled = false; audioPlayer.src = apiUrl(job.audioUrl); downloadLink.href = apiUrl(job.downloadUrl); audioPanel.classList.remove("hidden"); loadHistory(); } if (job.status === "failed") { clearInterval(pollTimer); pollTimer = null; activeJobId = null; createButton.disabled = false; loadHistory(); } } catch (error) { clearInterval(pollTimer); pollTimer = null; activeJobId = null; createButton.disabled = false; setError(uploadError, error.message); } } function formatBytes(value) { if (!value) return ""; const units = ["B", "KB", "MB", "GB"]; let size = Number(value); let unit = 0; while (size >= 1024 && unit < units.length - 1) { size /= 1024; unit += 1; } return `${size >= 10 || unit === 0 ? size.toFixed(0) : size.toFixed(1)} ${units[unit]}`; } function formatPages(job) { const pages = job.pages || 0; const total = job.totalPages || pages; if (total && total > pages) return `${pages} / ${total}`; return `${pages}`; } function showQualityHint(quality) { if (!quality || !quality.quality || quality.quality === "good") { qualityHint.textContent = ""; qualityHint.classList.add("hidden"); qualityHint.classList.remove("poor"); return; } const reasons = quality.reasons?.length ? ` ${quality.reasons.join("; ")}.` : ""; const action = quality.quality === "poor" ? "Try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode before creating audio." : "Listen to a short sample before running the full book. If it sounds wrong, try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode."; qualityHint.textContent = `Text needs checking.${reasons} ${action}`; qualityHint.classList.remove("hidden"); qualityHint.classList.toggle("poor", quality.quality === "poor"); } renderVoiceOptions(); checkSession();