Syncre's picture
Deploy Arabic Audio Reader worker
985cdbe verified
const loginPanel = document.querySelector("#loginPanel");
const workspace = document.querySelector("#workspace");
const loginForm = document.querySelector("#loginForm");
const codeInput = document.querySelector("#codeInput");
const loginError = document.querySelector("#loginError");
const logoutButton = document.querySelector("#logoutButton");
const uploadForm = document.querySelector("#uploadForm");
const pdfInput = document.querySelector("#pdfInput");
const fileName = document.querySelector("#fileName");
const createButton = document.querySelector("#createButton");
const voiceSelect = document.querySelector("#voiceSelect");
const speedSelect = document.querySelector("#speedSelect");
const ocrModeSelect = document.querySelector("#ocrModeSelect");
const pageLimitSelect = document.querySelector("#pageLimitSelect");
const uploadError = document.querySelector("#uploadError");
const engineNotice = document.querySelector("#engineNotice");
const jobTitle = document.querySelector("#jobTitle");
const jobState = document.querySelector("#jobState");
const progressBar = document.querySelector("#progressBar");
const progressMeter = document.querySelector(".meter");
const progressPercent = document.querySelector("#progressPercent");
const stageLabel = document.querySelector("#stageLabel");
const stagePhaseLabel = document.querySelector("#stagePhaseLabel");
const stageDetailTitle = document.querySelector("#stageDetailTitle");
const stageDetailText = document.querySelector("#stageDetailText");
const stageItemProgress = document.querySelector("#stageItemProgress");
const stageItemLabel = document.querySelector("#stageItemLabel");
const stageItemValue = document.querySelector("#stageItemValue");
const stageItemBar = document.querySelector("#stageItemBar");
const stageMetaText = document.querySelector("#stageMetaText");
const stageList = document.querySelector("#stageList");
const jobMessage = document.querySelector("#jobMessage");
const qualityHint = document.querySelector("#qualityHint");
const pagesValue = document.querySelector("#pagesValue");
const charactersValue = document.querySelector("#charactersValue");
const engineValue = document.querySelector("#engineValue");
const extractionValue = document.querySelector("#extractionValue");
const qualityValue = document.querySelector("#qualityValue");
const audioFormatValue = document.querySelector("#audioFormatValue");
const audioPanel = document.querySelector("#audioPanel");
const audioPlayer = document.querySelector("#audioPlayer");
const downloadLink = document.querySelector("#downloadLink");
const browserSpeechPanel = document.querySelector("#browserSpeechPanel");
const browserSpeechMessage = document.querySelector("#browserSpeechMessage");
const browserSpeakButton = document.querySelector("#browserSpeakButton");
const browserStopButton = document.querySelector("#browserStopButton");
const historyPanel = document.querySelector("#historyPanel");
const historyList = document.querySelector("#historyList");
let pollTimer = null;
let maxUploadMb = null;
let cloudMode = false;
let hostedShellMode = false;
let remoteWorkerMode = false;
let workerBaseUrl = "";
let cloudTtsReady = false;
let cloudTtsMaxChars = 900;
let workerUnavailableDiagnostic = null;
let currentAudioUrl = null;
const defaultVoiceCatalog = {
default: "silma-local",
cloud: [
{ id: "mms-ara", label: "Arabic Standard" },
{ id: "silma-tts", label: "SILMA Arabic" },
],
local: [
{ id: "silma-local", label: "1. SILMA Arabic - Most natural" },
{ id: "espeak-ar-clear", label: "2. Local Arabic Clear - Fast fallback" },
{ id: "espeak-ar", label: "3. Local Arabic - Standard fallback" },
{ id: "espeak-ar-male", label: "Local Arabic Low" },
],
};
let voiceCatalog = defaultVoiceCatalog;
let ocrWorker = null;
let pdfjsLib = null;
let tesseractCreateWorker = null;
let activeJobId = null;
let pendingUnlockCode = "";
let sessionUnlockCode = "";
let installedArabicOcrModels = [];
let browserOcrProgress = { page: 0, total: 0 };
let browserSpeechText = "";
let browserSpeechSourceName = "";
const ocrModeLabels = {
tesseract: "1. Tesseract Arabic - Best readable",
"tesseract-fast": "2. Tesseract Arabic - Faster readable",
paddleocr: "3. PaddleOCR Arabic - Faster fallback",
"arabic-max": "Maximum Arabic OCR - slower",
arabic: "Arabic OCR comparison - slower",
"qari-ocr": "QARI Arabic books (best)",
"tawkeed-ocr": "Tawkeed Arabic OCR",
"katib-ocr": "KATIB Arabic OCR (lighter)",
"arabic-qwen-ocr": "Arabic-Qwen OCR",
"arabic-glm-ocr": "Arabic-GLM OCR v2",
"baseer-ocr": "Baseer Arabic OCR",
"paddleocr-vl": "PaddleOCR-VL heavy",
best: "Best scan test",
surya: "Surya heavy OCR",
easyocr: "General Arabic OCR",
auto: "Auto fallback",
};
const qualityLabels = {
good: "Good",
warning: "Check",
poor: "Poor",
};
const pageLimitLabels = {
0: "Full book",
5: "Quick test",
10: "Longer test",
};
const speedLabels = {
0.9: "Slower",
1: "Normal",
1.15: "Faster",
};
const mixedPdfOcrMissingPageRatio = 0.15;
const statusLabels = {
queued: "Queued",
reading: "Reading",
speaking: "Creating",
complete: "Ready",
failed: "Failed",
};
const defaultStageSteps = [
{ id: "upload", label: "Upload" },
{ id: "text", label: "Text scan" },
{ id: "ocr", label: "Arabic OCR" },
{ id: "voice", label: "Voice" },
{ id: "ready", label: "Ready" },
];
const progressRanges = {
textStart: 5,
textEnd: 18,
ocrStart: 18,
ocrRenderEnd: 30,
ocrEnd: 72,
voiceStart: 72,
voiceEnd: 98,
};
function rangeProgress(start, end, current, total) {
const safeTotal = Math.max(1, Number(total) || 1);
const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0));
return Math.round(start + (safeCurrent / safeTotal) * (end - start));
}
function setAuthenticated(authenticated) {
loginPanel.classList.toggle("hidden", authenticated);
workspace.classList.toggle("hidden", !authenticated);
logoutButton.classList.toggle("hidden", !authenticated);
if (authenticated) {
uploadError.textContent = "";
loadHealth().then(loadHistory);
}
}
function setError(target, message) {
target.textContent = message || "";
}
function formatDiagnosticMessage(diagnostic, fallback = "") {
const message = diagnostic?.message || fallback || "";
const nextSteps = Array.isArray(diagnostic?.nextSteps)
? diagnostic.nextSteps.filter(Boolean).slice(0, 3)
: [];
if (!nextSteps.length) return message;
return `${message} Next: ${nextSteps.join(" ")}`;
}
async function readJson(response) {
const payload = await response.json().catch(() => ({}));
if (!response.ok) {
const error = new Error(payload.detail || "Request failed");
error.status = response.status;
throw error;
}
return payload;
}
function describeNetworkError(error) {
if (error?.name === "TypeError") {
if (remoteWorkerMode && workerBaseUrl) {
return (
"The OCR/TTS worker could not be reached from this browser. The Space may be waking up, or the browser blocked the cross-site upload. " +
"Wait a minute, refresh, unlock with the code again, and retry."
);
}
return "The site could not be reached. Check the connection and try again.";
}
return error?.message || "Request failed";
}
function apiUrl(path) {
if (!remoteWorkerMode || path.startsWith("http")) return path;
return `${workerBaseUrl}${path}`;
}
function apiFetch(path, options = {}) {
const credentials = remoteWorkerMode ? "include" : "same-origin";
return fetch(apiUrl(path), { credentials, ...options });
}
async function checkSession() {
const payload = await fetch("/api/session").then(readJson);
setAuthenticated(payload.authenticated);
}
async function loadHealth() {
try {
let payload = await fetch("/api/health").then(readJson);
let engines = payload.engines;
hostedShellMode = engines.deployment?.platform === "vercel";
workerBaseUrl = engines.deployment?.workerBaseUrl || "";
remoteWorkerMode = Boolean(hostedShellMode && workerBaseUrl);
workerUnavailableDiagnostic = null;
let workerDiagnostic = null;
if (remoteWorkerMode) {
cloudMode = false;
cloudTtsReady = false;
maxUploadMb = payload.maxUploadMb || 512;
voiceCatalog = defaultVoiceCatalog;
renderVoiceOptions();
engineNotice.textContent = "The OCR/TTS worker is connected. You can upload now while voice details finish checking.";
engineNotice.classList.remove("warning");
}
if (remoteWorkerMode) {
await loginRemoteWorker();
try {
payload = await apiFetch("/api/health").then(readJson);
} catch (error) {
if (error.status === 401) {
await requireWorkerUnlock();
return;
}
workerDiagnostic = await loadWorkerDiagnostic(error);
workerUnavailableDiagnostic = workerDiagnostic;
remoteWorkerMode = false;
}
if (remoteWorkerMode) {
engines = payload.engines;
}
} else if (hostedShellMode && !workerBaseUrl) {
workerDiagnostic = await loadWorkerDiagnostic({ message: engines.deployment?.nextAction || "" });
workerUnavailableDiagnostic = workerDiagnostic;
}
cloudMode = hostedShellMode && !remoteWorkerMode && !workerBaseUrl;
cloudTtsReady = Boolean(cloudMode && engines.cloudTts?.available);
cloudTtsMaxChars = engines.cloudTts?.maxChunkChars || 900;
voiceCatalog = engines.voices || voiceCatalog || defaultVoiceCatalog;
installedArabicOcrModels = engines.ocr?.arabicTrainedStack?.installed || [];
renderVoiceOptions();
if (!cloudMode || remoteWorkerMode) {
setSelectValue(ocrModeSelect, engines.ocr?.preferred);
}
maxUploadMb = cloudMode ? (cloudTtsReady ? engines.cloudTts?.maxPdfMb || 512 : null) : payload.maxUploadMb;
ocrModeSelect.disabled = Boolean(cloudMode && !remoteWorkerMode);
const deploymentStatus = engines.deployment || {};
if (workerDiagnostic) {
deploymentStatus.workerDiagnostic = workerDiagnostic;
}
const deploymentAction = deploymentStatus.nextAction || "";
if (remoteWorkerMode) {
const label = engines.preferred === "silma" ? "SILMA Arabic voice" : "Arabic worker";
engineNotice.textContent =
deploymentStatus.productionReady === false
? deploymentAction || "The worker is connected, but Vercel production settings still need attention."
: `${label} is ready on the OCR/TTS worker. Uploads up to ${payload.maxUploadMb} MB are accepted.`;
engineNotice.classList.toggle("warning", deploymentStatus.productionReady === false);
return;
}
if (cloudMode && cloudTtsReady) {
engineNotice.textContent =
formatDiagnosticMessage(deploymentStatus.workerDiagnostic) ||
"Temporary cloud voice test mode is enabled. For reliable downloadable audio on Vercel, add WORKER_BASE_URL and redeploy.";
engineNotice.classList.add("warning");
return;
}
if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) {
engineNotice.textContent =
formatDiagnosticMessage(workerDiagnostic) ||
"The OCR/TTS worker is configured but not reachable yet. Check the Hugging Face Space URL, CORS_ORIGINS, and Space status, then redeploy.";
engineNotice.classList.add("warning");
return;
}
if (cloudMode) {
engineNotice.textContent =
formatDiagnosticMessage(deploymentStatus.workerDiagnostic) ||
deploymentAction ||
"Downloadable Vercel audio needs the OCR/TTS worker. Add WORKER_BASE_URL in Vercel, then redeploy.";
engineNotice.classList.add("warning");
return;
}
if (engines.readyForArabic) {
const label = engines.preferred === "piper" ? "Neural Arabic voice" : "Arabic voice";
const installedArabicStack = engines.ocr?.arabicTrainedStack?.installed || installedArabicOcrModels;
const installedArabicText = installedArabicStack.length
? ` Installed Arabic OCR: ${installedArabicStack.join(", ")}.`
: "";
const ocrLabel =
engines.ocr?.preferred === "arabic-max"
? "Maximum Arabic OCR is ready, but slower"
: engines.ocr?.preferred === "arabic"
? "Arabic OCR comparison is ready, but slower"
: engines.ocr?.preferred === "qari-ocr"
? "QARI Arabic book OCR is ready"
: engines.ocr?.preferred === "tawkeed-ocr"
? "Tawkeed Arabic OCR is ready"
: engines.ocr?.preferred === "katib-ocr"
? "KATIB Arabic OCR is ready"
: engines.ocr?.preferred === "arabic-qwen-ocr"
? "Arabic-Qwen OCR is ready"
: engines.ocr?.preferred === "arabic-glm-ocr"
? "Arabic-GLM OCR is ready"
: engines.ocr?.preferred === "baseer-ocr"
? "Baseer Arabic OCR is ready"
: engines.ocr?.preferred === "paddleocr"
? "PaddleOCR Arabic is ready, but less readable"
: engines.ocr?.preferred === "paddleocr-vl"
? "PaddleOCR-VL heavy OCR is ready"
: engines.ocr?.preferred === "surya"
? "Surya heavy OCR is ready"
: engines.ocr?.preferred === "tesseract"
? "Rank 1 readable Tesseract Arabic OCR is ready"
: engines.ocr?.preferred === "tesseract-fast"
? "Rank 2 faster Tesseract Arabic OCR is ready"
: engines.ocr?.preferred === "best"
? "Best Arabic OCR test mode is ready"
: engines.ocr?.preferred
? `${ocrModeLabels[engines.ocr.preferred] || "Arabic OCR"} is ready`
: "Arabic OCR is not ready";
engineNotice.textContent =
`${label} is ready. ${ocrLabel}. Uploads up to ${payload.maxUploadMb} MB are accepted.${installedArabicText}`;
engineNotice.classList.remove("warning");
return;
}
engineNotice.textContent = `A better Arabic voice is not installed yet. Upload limit: ${payload.maxUploadMb} MB.`;
engineNotice.classList.add("warning");
} catch (error) {
engineNotice.textContent = "Unable to check the voice.";
engineNotice.classList.add("warning");
}
}
async function loadWorkerDiagnostic(error) {
try {
const diagnostic = await fetch("/api/worker-diagnostics").then(readJson);
if (diagnostic?.message) return diagnostic;
} catch (_diagnosticError) {
// Keep the original worker error visible when the diagnostic endpoint cannot run.
}
return {
status: "browser-worker-error",
reachable: false,
workerBaseUrl,
message: `The OCR/TTS worker could not be reached from this browser. ${error.message}`,
};
}
async function requireWorkerUnlock() {
pendingUnlockCode = "";
sessionUnlockCode = "";
remoteWorkerMode = false;
cloudMode = false;
await fetch("/api/logout", { method: "POST" }).catch(() => null);
setAuthenticated(false);
setError(loginError, "Enter the code once more to unlock the OCR worker.");
engineNotice.textContent = "The OCR worker needs to be unlocked.";
engineNotice.classList.add("warning");
}
async function loginRemoteWorker() {
const code = pendingUnlockCode || sessionUnlockCode;
if (!remoteWorkerMode || !code) return false;
const formData = new FormData();
formData.append("code", code);
try {
await fetch(`${workerBaseUrl}/api/login`, {
method: "POST",
body: formData,
credentials: "include",
}).then(readJson);
pendingUnlockCode = "";
return true;
} catch (_error) {
return false;
}
}
async function ensureWorkerSessionForUpload() {
if (!remoteWorkerMode) return true;
try {
const session = await apiFetch("/api/session").then(readJson);
if (session.authenticated) return true;
} catch (_error) {
// Try a fresh login below before surfacing an upload-specific network message.
}
if (await loginRemoteWorker()) return true;
try {
const session = await apiFetch("/api/session").then(readJson);
return Boolean(session.authenticated);
} catch (_error) {
return false;
}
}
async function loadHistory() {
if (cloudMode && !remoteWorkerMode) {
historyPanel.classList.add("hidden");
return;
}
try {
const payload = await apiFetch("/api/jobs").then(readJson);
const jobs = payload.jobs || [];
renderHistory(jobs);
resumeActiveJob(jobs);
} catch (error) {
historyPanel.classList.add("hidden");
}
}
function renderHistory(jobs) {
const visibleJobs = jobs.filter((job) => job.filename);
historyPanel.classList.toggle("hidden", visibleJobs.length === 0);
historyList.innerHTML = "";
for (const job of visibleJobs) {
const item = document.createElement("li");
const details = document.createElement("div");
const name = document.createElement("strong");
const meta = document.createElement("span");
name.textContent = job.filename;
meta.textContent = `${statusLabels[job.status] || job.status} - ${formatPages(job)}`;
if (job.pageLimit) {
meta.textContent += ` - ${pageLimitLabels[job.pageLimit] || `${job.pageLimit} pages`}`;
}
if (job.ttsSpeed && job.ttsSpeed !== 1) {
meta.textContent += ` - ${speedLabels[job.ttsSpeed] || `${job.ttsSpeed}x`}`;
}
if (job.ocrEngine && job.extraction && job.extraction !== "embedded") {
meta.textContent += ` - ${ocrModeLabels[job.ocrEngine] || job.ocrEngine}`;
}
if (job.textQuality) {
meta.textContent += ` - ${qualityLabels[job.textQuality] || job.textQuality}`;
}
if (job.audioFormat) {
meta.textContent += ` - ${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}`;
}
details.append(name, meta);
item.append(details);
const actions = document.createElement("div");
actions.className = "history-actions";
if (["queued", "reading", "speaking"].includes(job.status)) {
const watchButton = document.createElement("button");
watchButton.type = "button";
watchButton.className = "link-button";
watchButton.textContent = "Watch";
watchButton.addEventListener("click", () => startPolling(job.id));
actions.append(watchButton);
}
if (["failed", "complete"].includes(job.status)) {
const settingsButton = document.createElement("button");
settingsButton.type = "button";
settingsButton.className = "link-button";
settingsButton.textContent = "Use settings";
settingsButton.addEventListener("click", () => applyJobSettings(job));
actions.append(settingsButton);
}
if (job.downloadUrl) {
const link = document.createElement("a");
link.href = apiUrl(job.downloadUrl);
link.textContent = "Download";
actions.append(link);
}
if (actions.children.length) item.append(actions);
historyList.append(item);
}
}
function resumeActiveJob(jobs) {
if (activeJobId || pollTimer) return;
const activeJob = jobs.find((job) => ["queued", "reading", "speaking"].includes(job.status));
if (activeJob) startPolling(activeJob.id);
}
function applyJobSettings(job) {
setSelectValue(voiceSelect, job.voiceId);
setSelectValue(speedSelect, String(job.ttsSpeed || 1));
setSelectValue(ocrModeSelect, job.ocrEngine);
setSelectValue(pageLimitSelect, String(job.pageLimit || 0));
describeOcrMode();
setError(uploadError, `Settings loaded for ${job.filename || "that job"}. Choose the PDF again, then create audio.`);
pdfInput.focus();
}
function setSelectValue(select, value) {
if (value === undefined || value === null || value === "") return;
const text = String(value);
if ([...select.options].some((option) => option.value === text)) {
select.value = text;
}
}
loginForm.addEventListener("submit", async (event) => {
event.preventDefault();
setError(loginError, "");
const formData = new FormData();
pendingUnlockCode = codeInput.value.trim();
sessionUnlockCode = pendingUnlockCode;
formData.append("code", pendingUnlockCode);
try {
await fetch("/api/login", { method: "POST", body: formData }).then(readJson);
codeInput.value = "";
setAuthenticated(true);
} catch (error) {
setError(loginError, error.message);
}
});
logoutButton.addEventListener("click", async () => {
if (remoteWorkerMode) {
await apiFetch("/api/logout", { method: "POST" }).catch(() => null);
}
await fetch("/api/logout", { method: "POST" }).then(readJson);
pendingUnlockCode = "";
sessionUnlockCode = "";
setAuthenticated(false);
});
pdfInput.addEventListener("change", () => {
describeFile(pdfInput.files[0]);
});
ocrModeSelect.addEventListener("change", () => {
describeOcrMode();
});
browserSpeakButton.addEventListener("click", () => {
speakBrowserSpeechText();
});
browserStopButton.addEventListener("click", () => {
if (canUseBrowserSpeech()) window.speechSynthesis.cancel();
jobMessage.textContent = "Browser read-aloud stopped.";
});
uploadForm.addEventListener("submit", async (event) => {
event.preventDefault();
setError(uploadError, "");
const file = pdfInput.files[0];
if (!file) {
setError(uploadError, "Choose a PDF first.");
return;
}
if (!file.name.toLowerCase().endsWith(".pdf")) {
setError(uploadError, "Choose a PDF file.");
return;
}
if (maxUploadMb && file.size > maxUploadMb * 1024 * 1024) {
setError(uploadError, `This PDF is larger than ${maxUploadMb} MB.`);
return;
}
if (hostedShellMode && workerBaseUrl && !remoteWorkerMode) {
setError(
uploadError,
formatDiagnosticMessage(workerUnavailableDiagnostic) ||
"The OCR/TTS worker is configured but not reachable. Open the Hugging Face Space, confirm it is running, set CORS_ORIGINS to this Vercel URL, then redeploy.",
);
return;
}
if (cloudMode && !remoteWorkerMode) {
setError(
uploadError,
formatDiagnosticMessage(
workerUnavailableDiagnostic,
"This Vercel site needs WORKER_BASE_URL before it can create downloadable Arabic audio. Deploy the Hugging Face Space worker, set Vercel WORKER_BASE_URL to that https://*.hf.space URL, remove the temporary Hugging Face cloud TTS variables, then redeploy.",
),
);
updateStage({
id: "upload",
label: "Worker needed",
phase: "Vercel setup needed",
detail: "Downloadable audio on Vercel is created by the OCR/TTS worker, not the temporary cloud voice fallback.",
progress: 0,
steps: defaultStageSteps.map((step) => ({ ...step, state: "pending" })),
});
return;
}
await createLocalAudio(file);
});
uploadForm.addEventListener("dragover", (event) => {
event.preventDefault();
uploadForm.classList.add("is-dragging");
});
uploadForm.addEventListener("dragleave", () => {
uploadForm.classList.remove("is-dragging");
});
uploadForm.addEventListener("drop", (event) => {
event.preventDefault();
uploadForm.classList.remove("is-dragging");
const file = event.dataTransfer.files[0];
if (!file) return;
const transfer = new DataTransfer();
transfer.items.add(file);
pdfInput.files = transfer.files;
describeFile(file);
});
async function createLocalAudio(file) {
const formData = new FormData();
formData.append("pdf", file);
formData.append("voice_id", voiceSelect.value);
formData.append("tts_speed", speedSelect.value);
formData.append("ocr_engine", ocrModeSelect.value);
formData.append("page_limit", pageLimitSelect.value);
resetJob(file.name, "Uploading PDF.");
createButton.disabled = true;
try {
if (remoteWorkerMode && !(await ensureWorkerSessionForUpload())) {
throw new Error("Enter the code again so this browser can upload directly to the OCR/TTS worker.");
}
const payload = await apiFetch("/api/jobs", { method: "POST", body: formData }).then(readJson);
startPolling(payload.jobId);
} catch (error) {
setError(uploadError, describeNetworkError(error));
updateProgress(0);
setJobState("failed");
createButton.disabled = false;
}
}
async function createCloudAudio(file) {
const browserSpeechAvailable = canUseBrowserSpeech();
if (!cloudTtsReady && !browserSpeechAvailable) {
setError(
uploadError,
formatDiagnosticMessage(
workerUnavailableDiagnostic,
"Downloadable Vercel audio needs WORKER_BASE_URL. Browser read-aloud can be used for a short temporary test.",
),
);
return;
}
resetJob(file.name, "Reading PDF in this browser.");
createButton.disabled = true;
let speechTextForFallback = "";
try {
const extracted = await extractPdfText(file, getSelectedPageLimit());
const speechText = prepareTextForSpeech(extracted.text);
speechTextForFallback = speechText;
const chunks = chunkText(speechText, cloudTtsMaxChars);
const quality = assessTextQuality(extracted.text, speechText);
pagesValue.textContent = formatPages({ pages: extracted.pages, totalPages: extracted.totalPages });
charactersValue.textContent = speechText.length.toLocaleString();
engineValue.textContent = "cloud";
extractionValue.textContent = extracted.extraction;
qualityValue.textContent = formatQuality(quality);
showQualityHint(quality);
if (!chunks.length || !quality.readyForTts) {
const reason = quality.reasons.length ? ` ${quality.reasons.join("; ")}.` : "";
throw new Error(`Text quality is poor, so audio was not created.${reason}`);
}
if (!cloudTtsReady) {
showBrowserSpeechFallback(
speechText,
file.name,
formatDiagnosticMessage(workerUnavailableDiagnostic, "Downloadable Vercel audio needs WORKER_BASE_URL."),
);
return;
}
audioFormatValue.textContent = "WAV";
setJobState("speaking", "Creating");
updateStage({
id: "voice",
label: "Voice",
phase: "Creating voice",
detail: "Creating Arabic audio from the cleaned text.",
progress: progressRanges.voiceStart,
step: 4,
totalSteps: 5,
itemProgress: stageItem("Audio part", 0, chunks.length),
steps: stepStates("voice"),
});
const audioBuffers = [];
for (let index = 0; index < chunks.length; index += 1) {
const label = `Creating audio part ${index + 1} of ${chunks.length}`;
jobMessage.textContent = label;
updateStage({
id: "voice",
label: "Voice",
phase: "Creating audio parts",
detail: label,
progress: rangeProgress(progressRanges.voiceStart, progressRanges.voiceEnd, index + 1, chunks.length),
step: 4,
totalSteps: 5,
itemProgress: stageItem("Audio part", index + 1, chunks.length),
steps: stepStates("voice"),
});
const response = await fetch("/api/cloud-tts", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ text: chunks[index], voiceId: voiceSelect.value }),
});
if (!response.ok) {
const payload = await response.json().catch(() => ({}));
throw new Error(payload.detail || "Cloud voice failed.");
}
audioBuffers.push(await response.arrayBuffer());
}
const audioBlob = mergeWavBuffers(audioBuffers);
showAudio(audioBlob, file.name);
setJobState("complete");
jobMessage.textContent = "Audio is ready.";
updateStage({
id: "ready",
label: "Ready",
phase: "Audio ready",
detail: "Audio is ready to play or download.",
progress: 100,
step: 5,
totalSteps: 5,
steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })),
});
} catch (error) {
if (speechTextForFallback && canUseBrowserSpeech()) {
const fallbackMessage = formatDiagnosticMessage(
workerUnavailableDiagnostic,
`${error.message} Browser read-aloud is available now, but downloadable Vercel audio needs WORKER_BASE_URL.`,
);
showBrowserSpeechFallback(
speechTextForFallback,
file.name,
fallbackMessage,
);
setError(uploadError, fallbackMessage);
return;
}
setError(uploadError, error.message);
setJobState("failed");
jobMessage.textContent = "Processing failed.";
updateStage({
id: "ocr",
label: "Stopped",
phase: "Processing stopped",
detail: "Processing stopped before audio could be created.",
progress: 0,
steps: defaultStageSteps.map((step, index) => ({
...step,
state: index === 2 ? "failed" : index < 2 ? "done" : "pending",
})),
});
} finally {
createButton.disabled = false;
}
}
async function extractPdfText(file, pageLimit = 0) {
const pdfjs = await getPdfJs();
const data = await file.arrayBuffer();
const document = await pdfjs.getDocument({ data }).promise;
const pageTexts = [];
const pagesToProcess = effectivePageCount(document.numPages, pageLimit);
for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) {
const page = await document.getPage(pageNumber);
const content = await page.getTextContent();
const pageText = cleanText(content.items.map((item) => item.str || "").join(" "));
pageTexts.push(pageText);
pagesValue.textContent = pageNumber;
jobMessage.textContent = `Reading page ${pageNumber} of ${pagesToProcess}`;
updateStage({
id: "text",
label: "Text scan",
phase: "Checking text layer",
detail: `Checking the PDF text layer: page ${pageNumber} of ${pagesToProcess}.`,
progress: rangeProgress(progressRanges.textStart, progressRanges.textEnd, pageNumber, pagesToProcess),
step: 2,
totalSteps: 5,
itemProgress: stageItem("PDF page", pageNumber, pagesToProcess),
steps: stepStates("text"),
});
}
const embeddedText = cleanText(pageTexts.filter(Boolean).join("\n\n"));
if (embeddedText.length >= 20 && !shouldOcrMixedPdf(pageTexts)) {
return { pages: pagesToProcess, totalPages: document.numPages, text: embeddedText, extraction: "browser" };
}
if (embeddedText.length >= 20) {
const ratio = embeddedTextMissingPageRatio(pageTexts);
jobMessage.textContent = `Embedded text is incomplete on ${Math.round(ratio * 100)}% of pages. Starting Arabic OCR.`;
} else {
jobMessage.textContent = "No text layer found. Starting Arabic OCR.";
}
const ocrText = await ocrPdfText(document, pagesToProcess);
return { pages: pagesToProcess, totalPages: document.numPages, text: cleanText(ocrText), extraction: "ocr" };
}
function embeddedTextMissingPageRatio(pageTexts) {
if (!pageTexts.length) return 1;
const missingPages = pageTexts.filter((text) => text.length < 20).length;
return missingPages / pageTexts.length;
}
function shouldOcrMixedPdf(pageTexts) {
if (!pageTexts.length) return true;
if (!pageTexts.some((text) => text.length >= 20)) return true;
return embeddedTextMissingPageRatio(pageTexts) > mixedPdfOcrMissingPageRatio;
}
async function ocrPdfText(document, pagesToProcess) {
const worker = await getOcrWorker();
const pieces = [];
for (let pageNumber = 1; pageNumber <= pagesToProcess; pageNumber += 1) {
browserOcrProgress = { page: pageNumber, total: pagesToProcess };
jobMessage.textContent = `OCR reading page ${pageNumber} of ${pagesToProcess}`;
updateStage({
id: "ocr",
label: "Arabic OCR",
phase: "Rendering scanned pages",
detail: `Preparing scanned page ${pageNumber} of ${pagesToProcess}.`,
progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber - 1, pagesToProcess),
step: 3,
totalSteps: 5,
itemProgress: stageItem("Scanned page", pageNumber - 1, pagesToProcess),
steps: stepStates("ocr"),
});
const imageUrl = await renderPageForOcr(document, pageNumber);
const result = await withTesseractNoiseFilter(() => worker.recognize(imageUrl));
URL.revokeObjectURL(imageUrl);
if (result.data.text.trim()) pieces.push(result.data.text);
updateStage({
id: "ocr",
label: "Arabic OCR",
phase: "Reading scanned pages",
detail: `Finished scanned page ${pageNumber} of ${pagesToProcess}.`,
progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, pageNumber, pagesToProcess),
step: 3,
totalSteps: 5,
itemProgress: stageItem("Scanned page", pageNumber, pagesToProcess),
steps: stepStates("ocr"),
});
}
return pieces.join("\n\n");
}
function effectivePageCount(totalPages, pageLimit) {
return pageLimit > 0 ? Math.max(0, Math.min(totalPages, pageLimit)) : totalPages;
}
function getSelectedPageLimit() {
return Number.parseInt(pageLimitSelect.value, 10) || 0;
}
async function getOcrWorker() {
if (ocrWorker) return ocrWorker;
jobMessage.textContent = "Loading Arabic OCR. This can take a minute the first time.";
updateStage({
id: "ocr",
label: "Arabic OCR",
phase: "Loading Arabic OCR",
detail: "Loading Arabic OCR files. This can take a minute the first time.",
progress: progressRanges.ocrStart,
step: 3,
totalSteps: 5,
steps: stepStates("ocr"),
});
const createWorker = await getTesseractCreateWorker();
ocrWorker = await createWorker("ara", 1, {
langPath: "https://tessdata.projectnaptha.com/4.0.0",
workerPath: "https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js",
corePath: "https://cdn.jsdelivr.net/npm/tesseract.js-core@5",
logger: (event) => {
if (event.status === "recognizing text" && event.progress) {
const percent = Math.round(event.progress * 100);
const page = browserOcrProgress.page || 1;
const total = browserOcrProgress.total || 1;
jobMessage.textContent = `OCR reading page ${page} of ${total}: ${percent}%`;
updateStage({
id: "ocr",
label: "Arabic OCR",
phase: "Reading scanned pages",
detail: `Reading scanned page ${page} of ${total}: ${percent}%.`,
progress: rangeProgress(progressRanges.ocrStart, progressRanges.ocrEnd, page - 1 + event.progress, total),
step: 3,
totalSteps: 5,
itemProgress: stageItem("Scanned page", page - 1 + event.progress, total),
steps: stepStates("ocr"),
});
}
},
});
if (typeof ocrWorker.setParameters === "function") {
await ocrWorker.setParameters({
tessedit_pageseg_mode: "6",
preserve_interword_spaces: "1",
});
}
return ocrWorker;
}
async function withTesseractNoiseFilter(callback) {
const originalWarn = console.warn;
const originalError = console.error;
const isKnownNoise = (message) =>
message.includes("Image too small to scale") ||
message.includes("Line cannot be recognized");
const filter = (original) => (...args) => {
const message = args.map((arg) => String(arg)).join(" ");
if (isKnownNoise(message)) return;
original(...args);
};
console.warn = filter(originalWarn);
console.error = filter(originalError);
try {
return await callback();
} finally {
console.warn = originalWarn;
console.error = originalError;
}
}
async function getPdfJs() {
if (pdfjsLib) return pdfjsLib;
pdfjsLib = await import("https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.min.mjs");
pdfjsLib.GlobalWorkerOptions.workerSrc =
"https://cdnjs.cloudflare.com/ajax/libs/pdf.js/4.10.38/pdf.worker.min.mjs";
return pdfjsLib;
}
async function getTesseractCreateWorker() {
if (tesseractCreateWorker) return tesseractCreateWorker;
const tesseract = await import(
"https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.esm.min.js"
);
tesseractCreateWorker = tesseract.createWorker || tesseract.default?.createWorker;
if (!tesseractCreateWorker) {
throw new Error("Arabic OCR could not load. Try the local version for scanned PDFs.");
}
return tesseractCreateWorker;
}
async function renderPageForOcr(document, pageNumber) {
const page = await document.getPage(pageNumber);
const viewport = page.getViewport({ scale: 2 });
const canvas = documentCanvas(viewport.width, viewport.height);
const context = canvas.getContext("2d");
await page.render({ canvasContext: context, viewport }).promise;
return await canvasToObjectUrl(canvas);
}
function documentCanvas(width, height) {
const canvas = document.createElement("canvas");
canvas.width = Math.ceil(width);
canvas.height = Math.ceil(height);
return canvas;
}
function canvasToObjectUrl(canvas) {
return new Promise((resolve, reject) => {
canvas.toBlob((blob) => {
if (!blob) {
reject(new Error("Could not render PDF page for OCR."));
return;
}
resolve(URL.createObjectURL(blob));
}, "image/png");
});
}
function cleanText(text) {
return text
.normalize("NFKC")
.replace(/[\u200e\u200f]/g, " ")
.replace(/[\t\r\f\v]+/g, " ")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ ]{2,}/g, " ")
.trim();
}
function prepareTextForSpeech(text) {
const cleaned = cleanText(text);
const rawLines = cleaned.split(/\n/).map((line) => line.trim());
const counts = new Map();
for (const line of rawLines) {
if (!line) continue;
counts.set(line, (counts.get(line) || 0) + 1);
}
const repeatedLines = new Set(
[...counts.entries()].filter(([line, count]) => count >= 3 && line.length <= 48).map(([line]) => line),
);
const output = [];
let previousLine = "";
let blankPending = false;
for (const line of rawLines) {
if (!line) {
blankPending = output.length > 0;
continue;
}
if (line === previousLine) continue;
previousLine = line;
if (shouldDropSpeechLine(line, repeatedLines)) continue;
if (blankPending && output.length && output[output.length - 1] !== "") output.push("");
output.push(line);
blankPending = false;
}
return output.join("\n").replace(/\n{3,}/g, "\n\n").trim();
}
function shouldDropSpeechLine(line, repeatedLines) {
const compact = line.trim();
if (!compact) return true;
if (/^[\s\-–—_.:|/\\()[\]{}]*(?:[0-9٠-٩۰-۹]+|[ivxlcdmIVXLCDM]+)[\s\-–—_.:|/\\()[\]{}]*$/.test(compact)) {
return true;
}
const metrics = lineNoiseMetrics(compact);
if (repeatedLines.has(compact)) return true;
if (
!metrics.arabicWords &&
compact.length <= 80 &&
(compact.length <= 24 || metrics.digits >= 3 || metrics.symbols >= 2 || metrics.latinWords)
) {
return true;
}
if (compact.length <= 2 && !metrics.arabicWords) return true;
if (metrics.digits >= 4 && metrics.arabicWords <= 3) return true;
if (metrics.digits >= 6 && metrics.digits > metrics.arabicChars) return true;
if (metrics.symbols >= 3 && metrics.arabicWords <= 4) return true;
if (metrics.placeholders >= 2 && metrics.arabicWords <= 4) return true;
return false;
}
function lineNoiseMetrics(line) {
const arabicWords = line.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || [];
const digits = line.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || [];
const symbols = line.match(/[!@#$%^&*_+=<>|~`]/g) || [];
const placeholders = line.match(/[?\ufffd]/g) || [];
const latinWords = line.match(/[A-Za-z]{3,}/g) || [];
return {
arabicWords: arabicWords.length,
arabicChars: arabicWords.reduce((total, word) => total + word.length, 0),
digits: digits.length,
symbols: symbols.length,
placeholders: placeholders.length,
latinWords: latinWords.length,
};
}
function scoreTextQuality(text) {
const speechText = prepareTextForSpeech(text);
const arabicWords = speechText.match(/[\u0600-\u06ff\ufb50-\ufdff\ufe70-\ufeff]+/g) || [];
const placeholderCount = (speechText.match(/[?\ufffd]/g) || []).length;
const latinWords = speechText.match(/[A-Za-z]{3,}/g) || [];
const digitNoise = (speechText.match(/[0-9\u0660-\u0669\u06f0-\u06f9]/g) || []).length;
const lines = speechText.split(/\n/).map((line) => line.trim()).filter(Boolean);
const metrics = lines.map((line) => lineNoiseMetrics(line));
const fragmentLines = metrics.filter((item, index) => (
item.arabicWords <= 2 && item.arabicChars <= 18 && lines[index].length <= 28
)).length;
const singleArabicWords = arabicWords.filter((word) => word.length === 1).length;
const singleArabicWordRatio = singleArabicWords / Math.max(arabicWords.length, 1);
const fragmentLineRatio = fragmentLines / Math.max(lines.length, 1);
const shortLines = lines.filter((line) => line.length <= 3).length;
const repeatedLines = lines.length - new Set(lines).size;
const commonWords = new Set(["في", "من", "على", "هذا", "هذه", "التي", "الذي", "كان", "إلى", "الى", "عن", "مع", "هو", "هي"]);
const commonHits = arabicWords.filter((word) => commonWords.has(word)).length;
const score =
speechText.length * 0.05 +
arabicWords.length * 3 +
commonHits * 18 -
placeholderCount * 25 -
shortLines * 8 -
repeatedLines * 6 -
latinWords.length * 4 -
digitNoise * 3 -
singleArabicWords * 6 -
fragmentLines * 14;
return {
score,
speechText,
arabicWords,
placeholderCount,
latinWords,
digitNoise,
singleArabicWords,
singleArabicWordRatio,
fragmentLines,
fragmentLineRatio,
};
}
function assessTextQuality(text, speechText = prepareTextForSpeech(text)) {
const scored = scoreTextQuality(speechText);
const placeholderRatio = scored.placeholderCount / Math.max(speechText.length, 1);
const reasons = [];
if (speechText.length < 20) reasons.push("too little readable text after cleanup");
if (scored.arabicWords.length < 5) reasons.push("too few Arabic words");
if (placeholderRatio >= 0.2) reasons.push("too many unreadable placeholder characters");
else if (placeholderRatio > 0) reasons.push("some unreadable placeholder characters remain");
if (scored.digitNoise >= Math.max(20, scored.arabicWords.length)) reasons.push("digit-heavy OCR noise remains");
if (scored.singleArabicWordRatio >= 0.1 && scored.arabicWords.length >= 25) {
reasons.push("many one-letter Arabic OCR fragments remain");
}
if (scored.fragmentLineRatio >= 0.25 && speechText.split(/\n/).length >= 8) {
reasons.push("many low-information OCR lines remain");
}
if (scored.latinWords.length >= 3 && scored.latinWords.length >= scored.arabicWords.length) {
reasons.push("non-Arabic OCR text dominates");
}
const blockingReasons = new Set([
"too little readable text after cleanup",
"too few Arabic words",
"too many unreadable placeholder characters",
"non-Arabic OCR text dominates",
]);
const quality = reasons.some((reason) => blockingReasons.has(reason))
? "poor"
: reasons.length
? "warning"
: "good";
return {
quality,
readyForTts: quality !== "poor",
reasons,
score: Math.round(scored.score * 100) / 100,
singleArabicWordRatio: Math.round(scored.singleArabicWordRatio * 10000) / 10000,
fragmentLineRatio: Math.round(scored.fragmentLineRatio * 10000) / 10000,
};
}
function formatQuality(quality) {
if (!quality?.quality) return "-";
const label = qualityLabels[quality.quality] || quality.quality;
return `${label} ${quality.score ? Math.round(quality.score) : ""}`.trim();
}
function splitLongTextAtWordBoundaries(text, size) {
const pieces = [];
let remaining = text.trim();
while (remaining.length > size) {
let splitAt = remaining.lastIndexOf(" ", size);
if (splitAt < Math.max(1, Math.floor(size * 0.45))) splitAt = size;
const piece = remaining.slice(0, splitAt).trim();
if (piece) pieces.push(piece);
remaining = remaining.slice(splitAt).trim();
}
if (remaining) pieces.push(remaining);
return pieces;
}
function chunkText(text, size) {
text = prepareTextForSpeech(text);
const paragraphs = text.split(/\n{2,}/).map((part) => part.trim()).filter(Boolean);
const chunks = [];
let current = "";
for (const paragraph of paragraphs) {
if ((current.length + paragraph.length + 2) <= size) {
current = `${current}\n\n${paragraph}`.trim();
continue;
}
if (current) chunks.push(current);
if (paragraph.length <= size) {
current = paragraph;
continue;
}
const sentences = paragraph.split(/(?<=[.!\u061f?\u060c\u061b])\s+/);
current = "";
for (const sentence of sentences) {
let remaining = sentence;
if ((current.length + remaining.length + 1) <= size) {
current = `${current} ${remaining}`.trim();
} else {
if (current) chunks.push(current);
const sentenceParts = splitLongTextAtWordBoundaries(remaining, size);
chunks.push(...sentenceParts.slice(0, -1));
current = sentenceParts[sentenceParts.length - 1] || "";
}
}
}
if (current) chunks.push(current);
return chunks;
}
function mergeWavBuffers(buffers) {
if (buffers.length === 1) return new Blob([buffers[0]], { type: "audio/wav" });
const wavs = buffers.map(parseWav);
const fmt = wavs[0].fmt;
const dataLength = wavs.reduce((total, wav) => total + wav.data.length, 0);
const output = new ArrayBuffer(12 + 8 + fmt.length + 8 + dataLength);
const view = new DataView(output);
const bytes = new Uint8Array(output);
let offset = 0;
offset = writeString(bytes, offset, "RIFF");
view.setUint32(offset, output.byteLength - 8, true);
offset += 4;
offset = writeString(bytes, offset, "WAVE");
offset = writeString(bytes, offset, "fmt ");
view.setUint32(offset, fmt.length, true);
offset += 4;
bytes.set(fmt, offset);
offset += fmt.length;
offset = writeString(bytes, offset, "data");
view.setUint32(offset, dataLength, true);
offset += 4;
for (const wav of wavs) {
bytes.set(wav.data, offset);
offset += wav.data.length;
}
return new Blob([output], { type: "audio/wav" });
}
function parseWav(buffer) {
const view = new DataView(buffer);
const bytes = new Uint8Array(buffer);
if (readString(bytes, 0, 4) !== "RIFF" || readString(bytes, 8, 4) !== "WAVE") {
throw new Error("Cloud voice returned an audio format that cannot be merged.");
}
let offset = 12;
let fmt = null;
let data = null;
while (offset + 8 <= bytes.length) {
const id = readString(bytes, offset, 4);
const length = view.getUint32(offset + 4, true);
const start = offset + 8;
const end = start + length;
if (id === "fmt ") fmt = bytes.slice(start, end);
if (id === "data") data = bytes.slice(start, end);
offset = end + (length % 2);
}
if (!fmt || !data) throw new Error("Cloud voice returned invalid WAV audio.");
return { fmt, data };
}
function readString(bytes, offset, length) {
return String.fromCharCode(...bytes.slice(offset, offset + length));
}
function writeString(bytes, offset, value) {
for (let index = 0; index < value.length; index += 1) {
bytes[offset + index] = value.charCodeAt(index);
}
return offset + value.length;
}
function showAudio(blob, sourceName) {
resetBrowserSpeechFallback();
if (currentAudioUrl) URL.revokeObjectURL(currentAudioUrl);
currentAudioUrl = URL.createObjectURL(blob);
audioPlayer.src = currentAudioUrl;
downloadLink.href = currentAudioUrl;
downloadLink.download = `${sourceName.replace(/\.pdf$/i, "") || "arabic-pdf"}.wav`;
audioPanel.classList.remove("hidden");
}
function canUseBrowserSpeech() {
return "speechSynthesis" in window && typeof window.SpeechSynthesisUtterance === "function";
}
function resetBrowserSpeechFallback() {
if (canUseBrowserSpeech()) window.speechSynthesis.cancel();
browserSpeechText = "";
browserSpeechSourceName = "";
browserSpeechPanel.classList.add("hidden");
browserSpeechMessage.textContent = "Browser read-aloud is ready.";
}
function showBrowserSpeechFallback(text, sourceName, message) {
browserSpeechText = text;
browserSpeechSourceName = sourceName;
audioPanel.classList.add("hidden");
audioPlayer.removeAttribute("src");
downloadLink.removeAttribute("href");
downloadLink.removeAttribute("download");
audioFormatValue.textContent = "Browser only";
engineValue.textContent = "browser voice";
setJobState("speaking", "Listen only");
jobMessage.textContent = "Browser read-aloud is ready.";
browserSpeechMessage.textContent = message;
browserSpeechPanel.classList.remove("hidden");
updateStage({
id: "ready",
label: "Listen",
phase: "Browser read-aloud ready",
detail: "Downloadable audio needs the Vercel worker. Browser read-aloud is ready for listening.",
progress: 100,
step: 5,
totalSteps: 5,
steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })),
});
}
function getBrowserArabicVoice() {
if (!canUseBrowserSpeech()) return null;
const voices = window.speechSynthesis.getVoices();
return (
voices.find((voice) => voice.lang?.toLowerCase().startsWith("ar")) ||
voices.find((voice) => /arabic|ar[-_]/i.test(`${voice.name} ${voice.lang}`)) ||
null
);
}
function speakBrowserSpeechText() {
if (!browserSpeechText || !canUseBrowserSpeech()) return;
const chunks = chunkText(browserSpeechText, 700);
if (!chunks.length) return;
const synth = window.speechSynthesis;
const voice = getBrowserArabicVoice();
const rate = Number(speedSelect.value) || 1;
let index = 0;
synth.cancel();
const speakNext = () => {
if (index >= chunks.length) {
jobMessage.textContent = "Browser read-aloud finished.";
return;
}
const utterance = new window.SpeechSynthesisUtterance(chunks[index]);
utterance.lang = voice?.lang || "ar";
utterance.voice = voice;
utterance.rate = Math.max(0.6, Math.min(1.35, rate));
utterance.onend = () => {
index += 1;
const nextPart = Math.min(index + 1, chunks.length);
updateStage({
id: "voice",
label: "Listening",
phase: "Browser read-aloud",
detail: index >= chunks.length ? "Browser read-aloud finished." : `Reading part ${nextPart} of ${chunks.length}.`,
progress: rangeProgress(progressRanges.voiceStart, 100, index, chunks.length),
step: 5,
totalSteps: 5,
itemProgress: stageItem("Read-aloud part", index, chunks.length),
steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })),
});
speakNext();
};
utterance.onerror = () => {
jobMessage.textContent = "Browser read-aloud stopped.";
};
jobMessage.textContent = `Reading ${browserSpeechSourceName || "PDF"} in the browser.`;
synth.speak(utterance);
};
updateStage({
id: "voice",
label: "Listening",
phase: "Browser read-aloud",
detail: `Reading part 1 of ${chunks.length}.`,
progress: progressRanges.voiceStart,
step: 5,
totalSteps: 5,
itemProgress: stageItem("Read-aloud part", 0, chunks.length),
steps: defaultStageSteps.map((step) => ({ ...step, state: "done" })),
});
speakNext();
}
function describeFile(file) {
fileName.textContent = file ? `${file.name} (${Math.ceil(file.size / 1024 / 1024)} MB)` : "No file selected";
}
function describeOcrMode() {
const installedText = installedArabicOcrModels.length
? ` Installed now: ${installedArabicOcrModels.join(", ")}.`
: " Install QARI, Tawkeed, or KATIB on the worker for the best Arabic-trained OCR.";
if (ocrModeSelect.value === "arabic-max") {
engineNotice.textContent = `Maximum Arabic OCR selected. It tries the most engines and keeps the cleanest text, but it is slower. Use Quick test first.${installedText}`;
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "arabic") {
engineNotice.textContent = `Arabic OCR comparison selected. It compares installed OCR paths and can be much slower than the recommended Tesseract setting.${installedText}`;
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "qari-ocr") {
engineNotice.textContent = "QARI Arabic books selected. Use this on a short sample or strong worker; it is trained for Arabic books, Islamic texts, manuscripts, and layout-aware Arabic transcription.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "tawkeed-ocr") {
engineNotice.textContent = "Tawkeed Arabic OCR selected. Use this on a short sample or worker; it is Arabic-first and trained for documents, handwriting, and scene text.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "katib-ocr") {
engineNotice.textContent = "KATIB Arabic OCR selected. Use this on a short sample; it is a smaller Arabic-trained model for printed and handwritten Arabic.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "arabic-qwen-ocr") {
engineNotice.textContent = "Arabic-Qwen OCR selected. Use this on a short sample or worker; it is a 0.9B Arabic-trained OCR model.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "arabic-glm-ocr") {
engineNotice.textContent = "Arabic-GLM OCR selected. Use this on a short sample or strong worker; it is a recent Arabic-trained OCR model for books and scanned documents.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "baseer-ocr") {
engineNotice.textContent = "Baseer Arabic OCR selected. Use this on a short sample or worker for complex Arabic document layouts.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "best") {
engineNotice.textContent = "Best scan test selected. Use this on a short sample, then run the winning engine for the full book.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "paddleocr") {
engineNotice.textContent = "Rank 3 PaddleOCR Arabic selected. It works, but the 5-page benchmark produced more fragmented text than Tesseract.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "tesseract") {
engineNotice.textContent = "Rank 1 Tesseract Arabic selected. This produced the best readable text in the 5-page OCR benchmark.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "tesseract-fast") {
engineNotice.textContent = "Rank 2 Tesseract Arabic selected. This was the faster readable runner-up in the 5-page OCR benchmark.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "paddleocr-vl") {
engineNotice.textContent = "PaddleOCR-VL selected. Use this only on a short sample or strong worker; it is much heavier than normal Arabic OCR.";
engineNotice.classList.remove("warning");
} else if (ocrModeSelect.value === "surya") {
engineNotice.textContent = "Surya heavy OCR selected. Use this only on a worker or powerful computer for difficult scanned PDFs.";
engineNotice.classList.remove("warning");
}
}
function renderVoiceOptions() {
const voices = cloudMode && !remoteWorkerMode ? voiceCatalog.cloud : voiceCatalog.local;
const fallback = cloudMode && !remoteWorkerMode
? [{ id: "mms-ara", label: "Arabic Standard" }]
: [{ id: "espeak-ar", label: "Local Arabic" }];
const options = voices.length ? voices : fallback;
const current = voiceSelect.value || voiceCatalog.default;
voiceSelect.innerHTML = "";
for (const voice of options) {
const option = document.createElement("option");
option.value = voice.id;
option.textContent = voice.label;
voiceSelect.append(option);
}
if (options.some((voice) => voice.id === current)) {
voiceSelect.value = current;
} else if (options.some((voice) => voice.id === voiceCatalog.default)) {
voiceSelect.value = voiceCatalog.default;
}
}
function resetJob(name, message = "Uploading PDF.") {
jobTitle.textContent = name;
setJobState("queued", cloudMode && !remoteWorkerMode ? "Reading" : "Uploading");
jobMessage.textContent = message;
updateStage({
id: "upload",
label: cloudMode && !remoteWorkerMode ? "Text scan" : "Upload",
phase: cloudMode && !remoteWorkerMode ? "Checking text layer" : "Uploading PDF",
detail: message,
progress: 2,
steps: defaultStageSteps.map((step, index) => ({
...step,
state: index === 0 ? "active" : "pending",
})),
});
showQualityHint(null);
pagesValue.textContent = "0";
charactersValue.textContent = "0";
engineValue.textContent = "-";
extractionValue.textContent = "-";
qualityValue.textContent = "-";
audioFormatValue.textContent = "-";
audioPanel.classList.add("hidden");
audioPlayer.removeAttribute("src");
downloadLink.removeAttribute("href");
downloadLink.removeAttribute("download");
resetBrowserSpeechFallback();
updateProgress(2);
}
function updateProgress(value) {
const safeValue = Math.max(0, Math.min(100, Math.round(Number(value) || 0)));
progressBar.style.width = `${safeValue}%`;
progressMeter?.setAttribute("aria-valuenow", String(safeValue));
progressPercent.textContent = `${safeValue}%`;
}
function stepStates(activeId) {
const activeIndex = defaultStageSteps.findIndex((step) => step.id === activeId);
return defaultStageSteps.map((step, index) => ({
...step,
state: activeIndex < 0 ? "pending" : index < activeIndex ? "done" : index === activeIndex ? "active" : "pending",
}));
}
function stageItem(label, current, total) {
const safeTotal = Math.max(1, Number(total) || 1);
const safeCurrent = Math.max(0, Math.min(safeTotal, Number(current) || 0));
return {
label,
current: safeCurrent,
total: safeTotal,
percent: Math.round((safeCurrent / safeTotal) * 100),
};
}
function formatProgressCount(value) {
const number = Number(value) || 0;
if (Number.isInteger(number)) return String(number);
return number.toFixed(1);
}
function updateStage(stage) {
const progress = stage?.progress ?? 0;
updateProgress(progress);
const label = stage?.label || "Working";
const phase = stage?.phase || "Current step";
const stepText = stage?.step && stage?.totalSteps ? `Step ${stage.step} of ${stage.totalSteps}` : "Current step";
const overallLabel = stage?.overallLabel || "Overall progress";
const labelWithStep = stage?.label
? `${label}${stage.step && stage.totalSteps ? ` (${stage.step} of ${stage.totalSteps})` : ""}`
: "Working";
stageLabel.textContent = labelWithStep;
stagePhaseLabel.textContent = `${stepText} - ${overallLabel} ${Math.round(Number(progress) || 0)}%`;
stageDetailTitle.textContent = phase === "Current step" ? labelWithStep : phase;
stageDetailText.textContent = stage?.detail || jobMessage.textContent || "Working on the PDF.";
const itemProgress = stage?.itemProgress;
if (itemProgress && itemProgress.total) {
const percent = Math.max(0, Math.min(100, Math.round(Number(itemProgress.percent) || 0)));
stageItemProgress.classList.remove("hidden");
stageItemLabel.textContent = itemProgress.label || "Progress";
stageItemValue.textContent =
`${formatProgressCount(itemProgress.current)} of ${formatProgressCount(itemProgress.total)} (${percent}%)`;
stageItemBar.style.width = `${percent}%`;
} else {
stageItemProgress.classList.add("hidden");
stageItemLabel.textContent = "Progress";
stageItemValue.textContent = "0 of 0";
stageItemBar.style.width = "0%";
}
const isServerJob = !cloudMode || remoteWorkerMode;
const backgroundText = isServerJob
? "The server keeps working even if you are not watching this screen."
: "Keep this tab open while browser OCR is reading scanned pages.";
stageMetaText.textContent = `Updated just now. ${backgroundText}`;
const steps = stage?.steps?.length ? stage.steps : defaultStageSteps;
stageList.innerHTML = "";
for (const step of steps) {
const item = document.createElement("li");
item.className = step.state || "pending";
const dot = document.createElement("span");
dot.className = "stage-dot";
dot.setAttribute("aria-hidden", "true");
const text = document.createElement("span");
text.textContent = step.label;
item.append(dot, text);
stageList.append(item);
}
}
function setJobState(status, label = statusLabels[status] || status) {
jobState.textContent = label;
jobState.classList.toggle("complete", status === "complete");
jobState.classList.toggle("failed", status === "failed");
}
function startPolling(jobId) {
activeJobId = jobId;
clearInterval(pollTimer);
pollTimer = setInterval(() => pollJob(jobId), 750);
pollJob(jobId);
}
async function pollJob(jobId) {
try {
const job = await apiFetch(`/api/jobs/${jobId}`).then(readJson);
jobTitle.textContent = job.filename || "Arabic PDF";
setJobState(job.status);
jobMessage.textContent = job.error || job.message;
showQualityHint({
quality: job.textQuality,
reasons: job.qualityReasons || [],
readyForTts: job.textQuality && job.textQuality !== "poor",
});
pagesValue.textContent = formatPages(job);
charactersValue.textContent = (job.characters || 0).toLocaleString();
engineValue.textContent = job.engine || "-";
extractionValue.textContent = job.extraction || "-";
qualityValue.textContent = job.textQuality
? `${qualityLabels[job.textQuality] || job.textQuality} ${job.qualityScore ? Math.round(job.qualityScore) : ""}`.trim()
: "-";
audioFormatValue.textContent = job.audioFormat
? `${job.audioFormat.toUpperCase()} ${formatBytes(job.audioBytes)}`
: "-";
updateStage(job.stage || { progress: job.progress || 0, label: statusLabels[job.status] || job.status });
if (job.status === "complete") {
clearInterval(pollTimer);
pollTimer = null;
activeJobId = null;
createButton.disabled = false;
audioPlayer.src = apiUrl(job.audioUrl);
downloadLink.href = apiUrl(job.downloadUrl);
audioPanel.classList.remove("hidden");
loadHistory();
}
if (job.status === "failed") {
clearInterval(pollTimer);
pollTimer = null;
activeJobId = null;
createButton.disabled = false;
loadHistory();
}
} catch (error) {
clearInterval(pollTimer);
pollTimer = null;
activeJobId = null;
createButton.disabled = false;
setError(uploadError, error.message);
}
}
function formatBytes(value) {
if (!value) return "";
const units = ["B", "KB", "MB", "GB"];
let size = Number(value);
let unit = 0;
while (size >= 1024 && unit < units.length - 1) {
size /= 1024;
unit += 1;
}
return `${size >= 10 || unit === 0 ? size.toFixed(0) : size.toFixed(1)} ${units[unit]}`;
}
function formatPages(job) {
const pages = job.pages || 0;
const total = job.totalPages || pages;
if (total && total > pages) return `${pages} / ${total}`;
return `${pages}`;
}
function showQualityHint(quality) {
if (!quality || !quality.quality || quality.quality === "good") {
qualityHint.textContent = "";
qualityHint.classList.add("hidden");
qualityHint.classList.remove("poor");
return;
}
const reasons = quality.reasons?.length ? ` ${quality.reasons.join("; ")}.` : "";
const action = quality.quality === "poor"
? "Try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode before creating audio."
: "Listen to a short sample before running the full book. If it sounds wrong, try 1. Tesseract Arabic - Best readable, Best scan test, or another OCR mode.";
qualityHint.textContent = `Text needs checking.${reasons} ${action}`;
qualityHint.classList.remove("hidden");
qualityHint.classList.toggle("poor", quality.quality === "poor");
}
renderVoiceOptions();
checkSession();