Syncre commited on
Commit
6d5a99d
·
verified ·
1 Parent(s): 088795a

Deploy Arabic Audio Reader worker

Browse files
.export-manifest.json CHANGED
@@ -1,17 +1,18 @@
1
  {
2
  "files": {
3
- "Dockerfile": "6d7affbb656f2fc67c333ee8059486fbff99f965e72f10a2c9aec0be5436837d",
4
  "api/index.py": "b0fd5c43eadd241aea79131d12ea40fe032a97f06410ce1b607e81c45f33d6f2",
5
  "app/__init__.py": "7eb70257593da06f682a3ddda54a9d260d4fc514f645237f5ca74b08f8da61a6",
6
- "app/main.py": "73c5814e8fdb13e919ebb6f8efbbc6431021a5a1fe2f58b01bf6b275ac12653c",
7
- "docs/best-free-arabic-pdf-audio-stack.md": "d73c1c1fca7a04517ae1abecb893175e238d36318c39c0755e9b4fa570d3910a",
8
- "docs/father-user-guide.md": "8ecc38f999b58942cffd4f7626b324a32f62c421cebd280651a6e884b8ae4ecf",
9
  "docs/huggingface-model-metadata.md": "4590229078c2048b184787e85e5a00dd687ef5fc90e8d8d0af32538b38363cc2",
10
- "docs/live-deployment-checklist.md": "9ba8b210b1c37f2dc6cad2556e17fa54621951be095394df595c373956f9a420",
11
- "docs/production-worker-architecture.md": "8af1b5e5d9a586957e137f711e4a217a98580efba7848294fa9d3e23b62e0846",
12
- "docs/recommended-decision-card.json": "dcedfce18b5c064b9e1f43e42d334e04b42e921e1298f573a672fd0b0ba37b66",
13
- "docs/recommended-decision-card.md": "c87d8071e4d4bbd68d6a80dcd477821c38aa8919492efad5b30e32a083c0b8df",
14
- "docs/recommended-free-stack.md": "f73d1fdf0e1d1edbab6b8b22c9424a420859d692f69cfc6914dc1bf348802516",
 
15
  "docs/research-watchlist.md": "9ea43e6f3f1d434e514e451ccc8974faa469f4533d0ffe924c8db3d80755e592",
16
  "docs/source-evidence.md": "f308cbd0dc83a5cf34ceb5a010c354cda9acbc690e9b2c93a80cdb519ec07976",
17
  "requirements-arabic-glm-ocr.txt": "b4c950c1ef221bfe6e2deda1a93605377619059eb02019931dbfe1fe7bd49e10",
@@ -27,21 +28,21 @@
27
  "requirements.txt": "59d736ba33b31a828a5987f3477bec3f7ed6f60ceafaf730ef027a0dbbcd0def",
28
  "scripts/arabic_glm_ocr_extract.py": "f56578018b81ac8cd7928baa9576878791214e5659f972520e8817370a9d39ad",
29
  "scripts/arabic_qwen_ocr_extract.py": "485e9f3cdf2ced92c666b2f483d4aa37a65cb34052a4967beac7183d02c9ddcc",
30
- "scripts/audit_goal_readiness.py": "b05d40b8299521a9e98005e51886b5d68993e3ec7541558bef6fbfe7dd4c8e8d",
31
  "scripts/baseer_ocr_extract.py": "056ca9cc33591db804639030a16d9635931b720d0d499b444ed6e7d0a653605a",
32
  "scripts/benchmark_ocr.py": "b5ffb17845a7945b2a5c52e38bfabb6d82f3a8fbc8f2cdd5528843e09ad4deb4",
33
  "scripts/benchmark_voices.py": "705bdfb6260fe90a4a68d9d2455953ea7221d282bbf0cc1cc4fa32cd5ed10205",
34
- "scripts/check_deployment_readiness.py": "475e24adaff28c4378d3eea325f4b09f30c1566355e9ea5e2d5b1b07064c9df4",
35
- "scripts/check_research_sources.py": "670565c01cb48462eefe3171996963f326a22d4b106889e87a6644f3bf3aca71",
36
  "scripts/check_test_environment.py": "7150b13aabad03a9b7ae2527f7cdd942511658eeafb76b41128eab7e0a6dc1ea",
37
  "scripts/cleanup_outputs.py": "de8beacd9b8511dc3775d9c232a2c86dc5cbe91c532cb3c130c304117f0d6bfd",
38
  "scripts/configure_vercel_worker.py": "76051d6853a60df2ff614b5aa629bac241a32c85baeaf6234f734bac1f49a61f",
39
  "scripts/deploy_hf_space.py": "173be92b31c6bcb854eaf23004b0cf4029c79498bd1eff27e6b54c32370e2e22",
40
- "scripts/deployment_handoff.py": "4b24f16b79b71c53419c1e75ee62dd44fdb024eee8e13094b0f6efaf4e50551e",
41
  "scripts/deployment_status.py": "0437afcb47147b3825978d63ff36d38157135b2faf1ee658c203ae77735a3418",
42
  "scripts/dry_run_pdf.py": "f162b566fb51d824d484a479a1337d4ee7e9a6762c0f2ccb5acd3173c1cf1bf8",
43
  "scripts/easyocr_extract.py": "5a728a80bac7d49281113410316b5004cb6538ad50d1bc7c431eaea9c006ada9",
44
- "scripts/export_hf_space.py": "52eb44509cfcf9c9a27f1fc78e907e83d7495bd1dc0c1451e96fb54f219e6110",
45
  "scripts/export_ocr_sample_images.py": "eaf2ed2dca63f649317d283a1339ddae64d79b8d79eb42fe601d3b4a92ce8f45",
46
  "scripts/export_tts_sample.py": "477ae98e81c60bc3336012167355b0b8724cd1988047e2af41f443b23de7e9f3",
47
  "scripts/finish_live_deployment.py": "e3e7e2600071be49b747cf61f9788427339da2d2017825bda12048285e5232f8",
@@ -58,7 +59,7 @@
58
  "scripts/prove_local_readiness.py": "7ccb00fc2d4aa086f8cec5ac9886f87fe044eb46c6f9a7fa7c5eb095d44095ad",
59
  "scripts/qari_ocr_extract.py": "82ac22dae63e415c9795f4f597c000beba32af028b0a5afc749ba11bfebe2b22",
60
  "scripts/refresh_research_evidence.py": "52209edf8485ff459bda6ada6cb1b978f12b22cdacb03413d293dab9245303fc",
61
- "scripts/research_watchlist.py": "b29d34c8a5915ec8d2c210feac84186362d170eabd411baef468759ba08b9a25",
62
  "scripts/score_external_ocr.py": "e4b36187949dd38eaa9395979a97ccef88f7aff24a1404e3bf0793776eea136f",
63
  "scripts/score_tts_preprocessor.py": "7b9afce94bbc914b884a09bb83bd4d267770f6b712ffb5721cfef7c34e2718a2",
64
  "scripts/score_voice_listening.py": "22287145ab5677c4e4383a01dc9cb2090f3f22e20a37e0902bcfed8df7c7e5f6",
@@ -73,8 +74,8 @@
73
  "scripts/setup_habibi.sh": "a737e7a8266fa47eb1eba3deeed52ff2bc91042646fff3b721afe7edefaf41d8",
74
  "scripts/setup_katib_ocr.ps1": "10b3e2a0781bcccec8a344f79b2639d13abcf26904025bf91ed1646fec34115e",
75
  "scripts/setup_katib_ocr.sh": "ee59fccd22a76c1d773c40d1e734b6f33e12d04a3e65294c635892d019c9f673",
76
- "scripts/setup_paddleocr.ps1": "afaaa71132582aa7bbaaa6cf45ebc6bfffe716ca542da66c0f9e7bb849dca689",
77
- "scripts/setup_paddleocr.sh": "2365608707febc705a88df84d4323fae619020011e71cb1f3d0346bcf6deb548",
78
  "scripts/setup_paddleocr_vl.ps1": "0840fc9f181b246bff754bff6ad6c28a2aaf80fc0771a745f0577737dea3a806",
79
  "scripts/setup_paddleocr_vl.sh": "38d048a154d8c55e9ab4c068a7177e33618d921951c753745ab54312ec24e1ea",
80
  "scripts/setup_qari_ocr.ps1": "d8dc9e30df59476dfc737d2538d84523a694c97122196690d175f13e5b5e9e6b",
@@ -91,13 +92,13 @@
91
  "scripts/supertonic_synthesize.py": "8223e3982de99e06091cff419d9b4584a56823b67c94b1493ce7143dd8c7f4f4",
92
  "scripts/surya_extract.py": "7361a8a667779c46aed71fa67b7f869d16f8067b55591d202fa968b8fc7628d7",
93
  "scripts/tawkeed_ocr_extract.py": "da554d5620237b70e234032b5525fcee8e9bebe9a924e5750746530c41972318",
94
- "scripts/validate_deployment_env.py": "2acf5dbbed165b549e0f3b4bb72fd47aa49ed705a4a4ac49e908a63592467491",
95
  "scripts/verify_pipeline.py": "57359e0e4399352976100f633ce780d9a022e96885e18d29d7f5bd4c4a43a857",
96
- "scripts/verify_site.py": "275e71cbb1ac19bf18286136601184dd1ab866a8e0318c8263e76e513002d071",
97
  "scripts/verify_voice.py": "d8fb7e473e47060b2d2f957c5c230807a205e95b1469eef9c32b76d2bc8585b2",
98
- "scripts/verify_worker.py": "16b5cbef6c1ea89ada2c9df476033eda93753d0a70830b637b24cf1c833f6054",
99
- "static/app.js": "375131fab5c6d9cfc3ff0d46f6443cb350451e994cec168a831f7444ef87812d",
100
- "static/index.html": "eb3b3f5eb45eb58b10186f145a62822c8ae352d0783e7cb914eba6221291e853",
101
  "static/styles.css": "a45485cf99eaae8a46e57437a736ce1ebad2528dbf219c5bc79f124ec3c47164"
102
  },
103
  "source": "ArabicTranslator",
 
1
  {
2
  "files": {
3
+ "Dockerfile": "eb8b1b840c8303bbcc2354a0e60896cf96593ac6828df6179877b19022e97c1c",
4
  "api/index.py": "b0fd5c43eadd241aea79131d12ea40fe032a97f06410ce1b607e81c45f33d6f2",
5
  "app/__init__.py": "7eb70257593da06f682a3ddda54a9d260d4fc514f645237f5ca74b08f8da61a6",
6
+ "app/main.py": "585db9d4acd34f7b69591ae0f6c0807154b9317bb2a6830e1a3642bf50414e47",
7
+ "docs/best-free-arabic-pdf-audio-stack.md": "08234106caacc0207f404b11023656cdc39525b28fedf526e97369edf926c48f",
8
+ "docs/father-user-guide.md": "a05534fa8ecc4bee94704b6691947ac189f6767a95fd12eb65ae27c4ede1182f",
9
  "docs/huggingface-model-metadata.md": "4590229078c2048b184787e85e5a00dd687ef5fc90e8d8d0af32538b38363cc2",
10
+ "docs/live-deployment-checklist.md": "7fd21a9316c1d018e2bec0620defcaaca2a690f109e51b5902c7d157244834ac",
11
+ "docs/ocr-readability-benchmark.md": "f93f09729f5e8bd5f938afad9490b471452ca549d081ff7700161cc1dc961453",
12
+ "docs/production-worker-architecture.md": "1264c16b83948385026aca0fab18e7963fa5056a178fa381380659352274b4ff",
13
+ "docs/recommended-decision-card.json": "97e4607db20ac19cadc9b894d6406517bcb37f8ccc6ecbe6c0c41f5f2463398e",
14
+ "docs/recommended-decision-card.md": "f69bbe66d7977a4877f934212862159495ea5a4547997e059f5c4e1b8d6d6cb9",
15
+ "docs/recommended-free-stack.md": "6156deac80f5656ff4cd33d726061965b6e2a6fbc8db4ee4123b2b43e42aa40d",
16
  "docs/research-watchlist.md": "9ea43e6f3f1d434e514e451ccc8974faa469f4533d0ffe924c8db3d80755e592",
17
  "docs/source-evidence.md": "f308cbd0dc83a5cf34ceb5a010c354cda9acbc690e9b2c93a80cdb519ec07976",
18
  "requirements-arabic-glm-ocr.txt": "b4c950c1ef221bfe6e2deda1a93605377619059eb02019931dbfe1fe7bd49e10",
 
28
  "requirements.txt": "59d736ba33b31a828a5987f3477bec3f7ed6f60ceafaf730ef027a0dbbcd0def",
29
  "scripts/arabic_glm_ocr_extract.py": "f56578018b81ac8cd7928baa9576878791214e5659f972520e8817370a9d39ad",
30
  "scripts/arabic_qwen_ocr_extract.py": "485e9f3cdf2ced92c666b2f483d4aa37a65cb34052a4967beac7183d02c9ddcc",
31
+ "scripts/audit_goal_readiness.py": "4fe8f36c4ef9b8e3c492dcef894cabc7afe98b5396e1c4bd15bdcfef3da733d7",
32
  "scripts/baseer_ocr_extract.py": "056ca9cc33591db804639030a16d9635931b720d0d499b444ed6e7d0a653605a",
33
  "scripts/benchmark_ocr.py": "b5ffb17845a7945b2a5c52e38bfabb6d82f3a8fbc8f2cdd5528843e09ad4deb4",
34
  "scripts/benchmark_voices.py": "705bdfb6260fe90a4a68d9d2455953ea7221d282bbf0cc1cc4fa32cd5ed10205",
35
+ "scripts/check_deployment_readiness.py": "c371706cf94f807354a1a08f274dc17b1c02d68347b98f70b177b4c14f73bf17",
36
+ "scripts/check_research_sources.py": "49bc5a15cddf040f134d21e042d064d64fce2235f2ff1dd01f6b9c69cdf0c3e0",
37
  "scripts/check_test_environment.py": "7150b13aabad03a9b7ae2527f7cdd942511658eeafb76b41128eab7e0a6dc1ea",
38
  "scripts/cleanup_outputs.py": "de8beacd9b8511dc3775d9c232a2c86dc5cbe91c532cb3c130c304117f0d6bfd",
39
  "scripts/configure_vercel_worker.py": "76051d6853a60df2ff614b5aa629bac241a32c85baeaf6234f734bac1f49a61f",
40
  "scripts/deploy_hf_space.py": "173be92b31c6bcb854eaf23004b0cf4029c79498bd1eff27e6b54c32370e2e22",
41
+ "scripts/deployment_handoff.py": "f11b974c9bd9661f6f2fb1f893385515676c4248fce11b1177ca2bac87ce9f71",
42
  "scripts/deployment_status.py": "0437afcb47147b3825978d63ff36d38157135b2faf1ee658c203ae77735a3418",
43
  "scripts/dry_run_pdf.py": "f162b566fb51d824d484a479a1337d4ee7e9a6762c0f2ccb5acd3173c1cf1bf8",
44
  "scripts/easyocr_extract.py": "5a728a80bac7d49281113410316b5004cb6538ad50d1bc7c431eaea9c006ada9",
45
+ "scripts/export_hf_space.py": "5d6cd097cd7e251f6ced6c2198a9bb0de64000004e60d16b820a87824fe7c223",
46
  "scripts/export_ocr_sample_images.py": "eaf2ed2dca63f649317d283a1339ddae64d79b8d79eb42fe601d3b4a92ce8f45",
47
  "scripts/export_tts_sample.py": "477ae98e81c60bc3336012167355b0b8724cd1988047e2af41f443b23de7e9f3",
48
  "scripts/finish_live_deployment.py": "e3e7e2600071be49b747cf61f9788427339da2d2017825bda12048285e5232f8",
 
59
  "scripts/prove_local_readiness.py": "7ccb00fc2d4aa086f8cec5ac9886f87fe044eb46c6f9a7fa7c5eb095d44095ad",
60
  "scripts/qari_ocr_extract.py": "82ac22dae63e415c9795f4f597c000beba32af028b0a5afc749ba11bfebe2b22",
61
  "scripts/refresh_research_evidence.py": "52209edf8485ff459bda6ada6cb1b978f12b22cdacb03413d293dab9245303fc",
62
+ "scripts/research_watchlist.py": "9d50b16d7aeb7838e983e441032ea45e7ecb56015556c88a0fdf9ef1aa273649",
63
  "scripts/score_external_ocr.py": "e4b36187949dd38eaa9395979a97ccef88f7aff24a1404e3bf0793776eea136f",
64
  "scripts/score_tts_preprocessor.py": "7b9afce94bbc914b884a09bb83bd4d267770f6b712ffb5721cfef7c34e2718a2",
65
  "scripts/score_voice_listening.py": "22287145ab5677c4e4383a01dc9cb2090f3f22e20a37e0902bcfed8df7c7e5f6",
 
74
  "scripts/setup_habibi.sh": "a737e7a8266fa47eb1eba3deeed52ff2bc91042646fff3b721afe7edefaf41d8",
75
  "scripts/setup_katib_ocr.ps1": "10b3e2a0781bcccec8a344f79b2639d13abcf26904025bf91ed1646fec34115e",
76
  "scripts/setup_katib_ocr.sh": "ee59fccd22a76c1d773c40d1e734b6f33e12d04a3e65294c635892d019c9f673",
77
+ "scripts/setup_paddleocr.ps1": "1bc345d3d0f6bc0614a1b2d50fd6c3325b725a807d98c25d6b9b57c0f363ac49",
78
+ "scripts/setup_paddleocr.sh": "9ee6d8aa3107bd040a16e84d2b5a62e2084546d847e5b827dcf0483fd464476a",
79
  "scripts/setup_paddleocr_vl.ps1": "0840fc9f181b246bff754bff6ad6c28a2aaf80fc0771a745f0577737dea3a806",
80
  "scripts/setup_paddleocr_vl.sh": "38d048a154d8c55e9ab4c068a7177e33618d921951c753745ab54312ec24e1ea",
81
  "scripts/setup_qari_ocr.ps1": "d8dc9e30df59476dfc737d2538d84523a694c97122196690d175f13e5b5e9e6b",
 
92
  "scripts/supertonic_synthesize.py": "8223e3982de99e06091cff419d9b4584a56823b67c94b1493ce7143dd8c7f4f4",
93
  "scripts/surya_extract.py": "7361a8a667779c46aed71fa67b7f869d16f8067b55591d202fa968b8fc7628d7",
94
  "scripts/tawkeed_ocr_extract.py": "da554d5620237b70e234032b5525fcee8e9bebe9a924e5750746530c41972318",
95
+ "scripts/validate_deployment_env.py": "d42531933369e541cc451dabdb2542b9a8cc9b8739a1be292252a8b054613f37",
96
  "scripts/verify_pipeline.py": "57359e0e4399352976100f633ce780d9a022e96885e18d29d7f5bd4c4a43a857",
97
+ "scripts/verify_site.py": "7a09c02f0063f913ac76f0793dcf359684cb6d210c3c851e86934527b277295d",
98
  "scripts/verify_voice.py": "d8fb7e473e47060b2d2f957c5c230807a205e95b1469eef9c32b76d2bc8585b2",
99
+ "scripts/verify_worker.py": "73329f87852ce805ab7144df6faaab4e081099f7ebc9a2e66e93735ee7fa82cc",
100
+ "static/app.js": "735d2ba288d8f96b7e99d4009d0ad5ef2db845562ea5defb5a6725b3c4dc6993",
101
+ "static/index.html": "0877f04c78afa4078c92fea23a93ff2f97851a8c3d17dd005e3c5a56b8508288",
102
  "static/styles.css": "a45485cf99eaae8a46e57437a736ce1ebad2528dbf219c5bc79f124ec3c47164"
103
  },
104
  "source": "ArabicTranslator",
Dockerfile CHANGED
@@ -6,7 +6,9 @@ ENV PYTHONUNBUFFERED=1 \
6
  DATABASE_PATH=/data/arabic-translator/data/arabic_reader.sqlite3 \
7
  TESSDATA_DIR=/usr/share/tesseract-ocr/5/tessdata \
8
  ESPEAK_NG_EXE=/usr/bin/espeak-ng \
9
- OCR_ENGINE=arabic \
 
 
10
  DEFAULT_VOICE_ID=silma-local \
11
  MAX_UPLOAD_MB=512 \
12
  OUTPUT_RETENTION_DAYS=7 \
 
6
  DATABASE_PATH=/data/arabic-translator/data/arabic_reader.sqlite3 \
7
  TESSDATA_DIR=/usr/share/tesseract-ocr/5/tessdata \
8
  ESPEAK_NG_EXE=/usr/bin/espeak-ng \
9
+ OCR_ENGINE=tesseract \
10
+ OCR_RENDER_ZOOM=2 \
11
+ TESSERACT_PSM=4 \
12
  DEFAULT_VOICE_ID=silma-local \
13
  MAX_UPLOAD_MB=512 \
14
  OUTPUT_RETENTION_DAYS=7 \
README.md CHANGED
@@ -28,7 +28,9 @@ SECRET_KEY=<generated by outputs\deployment-handoff.md>
28
  CORS_ORIGINS=https://your-vercel-app.vercel.app
29
  COOKIE_SAMESITE=none
30
  COOKIE_SECURE=1
31
- OCR_ENGINE=arabic
 
 
32
  DEFAULT_VOICE_ID=silma-local
33
  OUTPUT_RETENTION_DAYS=7
34
  OUTPUT_MAX_FILES=25
@@ -44,7 +46,7 @@ python scripts\deployment_handoff.py https://your-space.hf.space --origin https:
44
 
45
  Keep `outputs\deployment-handoff.md` private because it contains deployment secrets.
46
 
47
- The compact process recommendation is included at `docs/recommended-free-stack.md`, with the machine-readable deployment decision card at `docs/recommended-decision-card.json` and its readable companion at `docs/recommended-decision-card.md`. The current practical default is PyMuPDF embedded text first, `OCR_ENGINE=arabic` for balanced scanned Arabic OCR, SILMA TTS for the first clean voice, and downloadable worker audio.
48
 
49
  Optional stronger-worker build args:
50
 
 
28
  CORS_ORIGINS=https://your-vercel-app.vercel.app
29
  COOKIE_SAMESITE=none
30
  COOKIE_SECURE=1
31
+ OCR_ENGINE=tesseract
32
+ OCR_RENDER_ZOOM=2
33
+ TESSERACT_PSM=4
34
  DEFAULT_VOICE_ID=silma-local
35
  OUTPUT_RETENTION_DAYS=7
36
  OUTPUT_MAX_FILES=25
 
46
 
47
  Keep `outputs\deployment-handoff.md` private because it contains deployment secrets.
48
 
49
+ The compact process recommendation is included at `docs/recommended-free-stack.md`, with the machine-readable deployment decision card at `docs/recommended-decision-card.json` and its readable companion at `docs/recommended-decision-card.md`. The current practical default is PyMuPDF embedded text first, `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` for the most readable tested scanned Arabic OCR, SILMA TTS for the first clean voice, and downloadable worker audio.
50
 
51
  Optional stronger-worker build args:
52
 
app/main.py CHANGED
@@ -104,7 +104,7 @@ PIPER_MODEL = os.getenv("PIPER_MODEL")
104
  ESPEAK_NG_EXE = os.getenv("ESPEAK_NG_EXE")
105
  TESSERACT_EXE = os.getenv("TESSERACT_EXE")
106
  TESSDATA_DIR = Path(os.getenv("TESSDATA_DIR", str(DATA_DIR / "tessdata")))
107
- OCR_ENGINE = os.getenv("OCR_ENGINE", "arabic").lower()
108
  OCR_ENGINE_CHOICES = {
109
  "arabic",
110
  "arabic-max",
@@ -1203,7 +1203,7 @@ def get_engine_status() -> dict[str, object]:
1203
  or easyocr_ready
1204
  or tesseract_path
1205
  ),
1206
- "label": "Arabic OCR - Recommended balance",
1207
  "trainedFor": "Arabic printed text",
1208
  "models": [
1209
  "QARI-OCR Arabic book VLM",
@@ -1251,10 +1251,10 @@ def get_engine_status() -> dict[str, object]:
1251
  "easyocr": {"available": easyocr_ready, "label": "General Arabic OCR"},
1252
  "paddleocr": {
1253
  "available": paddleocr_ready,
1254
- "label": "PaddleOCR Arabic - Recommended balance",
1255
  "trainedFor": "Arabic printed text",
1256
  "model": "arabic_PP-OCRv5_mobile_rec",
1257
- "recommendedFor": "Best quality/speed balance on the current free worker",
1258
  },
1259
  "paddleocrVl": {
1260
  "available": paddleocr_vl_ready,
@@ -1311,7 +1311,12 @@ def get_engine_status() -> dict[str, object]:
1311
  "model": "Surya OCR 2",
1312
  "recommendedFor": "Hard scans on a real worker, not Vercel serverless",
1313
  },
1314
- "tesseract": {"available": bool(tesseract_path), "label": "Tesseract Arabic fallback"},
 
 
 
 
 
1315
  "language": os.getenv("OCR_LANGUAGE", "ara"),
1316
  },
1317
  "readyForArabic": bool(
@@ -1327,7 +1332,8 @@ def get_engine_status() -> dict[str, object]:
1327
  },
1328
  "recommendedStack": {
1329
  "pdf": "PyMuPDF embedded text first",
1330
- "ocrEngine": "arabic",
 
1331
  "voiceId": "silma-local",
1332
  "audioStorage": "worker-local retained downloads",
1333
  "benchmarkRule": "Run a representative 5-page Arabic sample before full-book audio.",
@@ -2627,7 +2633,7 @@ def ocr_pdf_text_with_tesseract(pdf_path: Path, job: Job, render_zoom: float | N
2627
  )
2628
  variant = render_zoom is not None or psm is not None
2629
  render_zoom = render_zoom or float(os.getenv("OCR_RENDER_ZOOM", "2.0"))
2630
- psm = psm or int(os.getenv("TESSERACT_PSM", "6"))
2631
  temp_dir = UPLOAD_DIR / f"ocr_{uuid.uuid4().hex}"
2632
  temp_dir.mkdir(parents=True, exist_ok=True)
2633
  pieces: list[str] = []
 
104
  ESPEAK_NG_EXE = os.getenv("ESPEAK_NG_EXE")
105
  TESSERACT_EXE = os.getenv("TESSERACT_EXE")
106
  TESSDATA_DIR = Path(os.getenv("TESSDATA_DIR", str(DATA_DIR / "tessdata")))
107
+ OCR_ENGINE = os.getenv("OCR_ENGINE", "tesseract").lower()
108
  OCR_ENGINE_CHOICES = {
109
  "arabic",
110
  "arabic-max",
 
1203
  or easyocr_ready
1204
  or tesseract_path
1205
  ),
1206
+ "label": "Arabic OCR comparison - slower",
1207
  "trainedFor": "Arabic printed text",
1208
  "models": [
1209
  "QARI-OCR Arabic book VLM",
 
1251
  "easyocr": {"available": easyocr_ready, "label": "General Arabic OCR"},
1252
  "paddleocr": {
1253
  "available": paddleocr_ready,
1254
+ "label": "PaddleOCR Arabic - faster, less readable",
1255
  "trainedFor": "Arabic printed text",
1256
  "model": "arabic_PP-OCRv5_mobile_rec",
1257
+ "recommendedFor": "Usable fallback, but the 5-page benchmark produced more fragmented text than Tesseract",
1258
  },
1259
  "paddleocrVl": {
1260
  "available": paddleocr_vl_ready,
 
1311
  "model": "Surya OCR 2",
1312
  "recommendedFor": "Hard scans on a real worker, not Vercel serverless",
1313
  },
1314
+ "tesseract": {
1315
+ "available": bool(tesseract_path),
1316
+ "label": "Tesseract Arabic - Recommended readable",
1317
+ "trainedFor": "Arabic printed text",
1318
+ "recommendedFor": "Best readable output on the 5-page Arabic benchmark; uses OCR_RENDER_ZOOM=2 and TESSERACT_PSM=4 by default",
1319
+ },
1320
  "language": os.getenv("OCR_LANGUAGE", "ara"),
1321
  },
1322
  "readyForArabic": bool(
 
1332
  },
1333
  "recommendedStack": {
1334
  "pdf": "PyMuPDF embedded text first",
1335
+ "ocrEngine": "tesseract",
1336
+ "ocrSettings": "OCR_RENDER_ZOOM=2 TESSERACT_PSM=4",
1337
  "voiceId": "silma-local",
1338
  "audioStorage": "worker-local retained downloads",
1339
  "benchmarkRule": "Run a representative 5-page Arabic sample before full-book audio.",
 
2633
  )
2634
  variant = render_zoom is not None or psm is not None
2635
  render_zoom = render_zoom or float(os.getenv("OCR_RENDER_ZOOM", "2.0"))
2636
+ psm = psm or int(os.getenv("TESSERACT_PSM", "4"))
2637
  temp_dir = UPLOAD_DIR / f"ocr_{uuid.uuid4().hex}"
2638
  temp_dir.mkdir(parents=True, exist_ok=True)
2639
  pieces: list[str] = []
docs/best-free-arabic-pdf-audio-stack.md CHANGED
@@ -9,7 +9,7 @@ The source evidence is summarized in `docs/source-evidence.md`; verify the resea
9
  For this project, the best practical free local stack is:
10
 
11
  1. PyMuPDF for embedded PDF text.
12
- 2. `OCR_ENGINE=arabic-max` for scanned pages by default; it compares the strongest available Arabic OCR outputs and chooses the cleanest text. When QARI-OCR, Tawkeed, KATIB, Arabic-Qwen, or Baseer is installed, this includes Arabic-trained VLM OCR candidates.
13
  3. EasyOCR Arabic as a strong alternate for older scans and difficult layouts.
14
  4. `OCR_ENGINE=best` for short quality tests, which compares the free local OCR engines and picks the best-looking Arabic text.
15
  5. QARI-OCR as the optional Arabic-native heavy OCR path for strong workers.
 
9
  For this project, the best practical free local stack is:
10
 
11
  1. PyMuPDF for embedded PDF text.
12
+ 2. `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` for scanned pages by default; it produced the most readable text on the 5-page Arabic benchmark. Use `OCR_ENGINE=arabic-max` only on short samples when the default reads a specific book badly.
13
  3. EasyOCR Arabic as a strong alternate for older scans and difficult layouts.
14
  4. `OCR_ENGINE=best` for short quality tests, which compares the free local OCR engines and picks the best-looking Arabic text.
15
  5. QARI-OCR as the optional Arabic-native heavy OCR path for strong workers.
docs/father-user-guide.md CHANGED
@@ -8,7 +8,7 @@ This guide is for the person using the website, not for setup.
8
  2. Enter the access code.
9
  3. Choose the Arabic PDF.
10
  4. Leave **Voice** on the best Arabic voice unless someone tells you to change it.
11
- 5. Leave **Text quality** on **Arabic OCR - Recommended balance** for a new scanned book.
12
  6. Leave **Pages** on **Quick test** first.
13
  7. Press **Create Audio**.
14
  8. Wait until the status says the audio is ready.
@@ -18,9 +18,9 @@ This guide is for the person using the website, not for setup.
18
 
19
  ## Which Text Quality To Choose
20
 
21
- Use **Arabic OCR - Recommended balance** first. It is the best normal choice for scanned Arabic books because it keeps quality high without running the slowest checks.
22
 
23
- Use **Maximum Arabic OCR - slower** on a short sample when the recommended option reads badly. It compares more OCR results and keeps the cleanest text, but it can take much longer.
24
 
25
  Use **QARI Arabic books** for a difficult scanned book when the normal option reads badly and the worker is strong enough. It is trained for Arabic books and manuscripts, but it can be much slower.
26
 
@@ -28,13 +28,13 @@ Use **KATIB Arabic OCR** when QARI is too slow or too heavy. It is also trained
28
 
29
  Use **Best scan test** only on a short sample. It is useful for deciding which OCR engine works best for one book, but it is too slow for most full books.
30
 
31
- Use **PaddleOCR Arabic - Recommended balance** when the test sounds good and you want the full book to run faster.
32
 
33
  Use **Tesseract Arabic fallback** when the other options are broken or when a benchmark says Tesseract worked best for that book.
34
 
35
  ## If Something Fails
36
 
37
- If the app says the text quality is poor, do not make full-book audio yet. Try **Arabic OCR - Recommended balance**, then **Maximum Arabic OCR - slower** on a short test, then **Best scan test**.
38
 
39
  If the first pages are title pages or blank pages, make a 5-page test PDF from better pages and test that before the full book.
40
 
 
8
  2. Enter the access code.
9
  3. Choose the Arabic PDF.
10
  4. Leave **Voice** on the best Arabic voice unless someone tells you to change it.
11
+ 5. Leave **Text quality** on **Tesseract Arabic - Recommended readable** for a new scanned book.
12
  6. Leave **Pages** on **Quick test** first.
13
  7. Press **Create Audio**.
14
  8. Wait until the status says the audio is ready.
 
18
 
19
  ## Which Text Quality To Choose
20
 
21
+ Use **Tesseract Arabic - Recommended readable** first. It produced the most readable text in the 5-page Arabic OCR benchmark and is much faster than the comparison modes.
22
 
23
+ Use **Arabic OCR comparison - slower** or **Maximum Arabic OCR - slower** on a short sample when the recommended option reads badly. They compare more OCR results and keep the cleanest text, but they can take much longer.
24
 
25
  Use **QARI Arabic books** for a difficult scanned book when the normal option reads badly and the worker is strong enough. It is trained for Arabic books and manuscripts, but it can be much slower.
26
 
 
28
 
29
  Use **Best scan test** only on a short sample. It is useful for deciding which OCR engine works best for one book, but it is too slow for most full books.
30
 
31
+ Use **PaddleOCR Arabic - faster, less readable** only when Tesseract is unavailable or a short test sounds better for that book.
32
 
33
  Use **Tesseract Arabic fallback** when the other options are broken or when a benchmark says Tesseract worked best for that book.
34
 
35
  ## If Something Fails
36
 
37
+ If the app says the text quality is poor, do not make full-book audio yet. Try **Tesseract Arabic - Recommended readable**, then **Arabic OCR comparison - slower**, then **Best scan test** on a short sample.
38
 
39
  If the first pages are title pages or blank pages, make a 5-page test PDF from better pages and test that before the full book.
40
 
docs/live-deployment-checklist.md CHANGED
@@ -157,7 +157,9 @@ SECRET_KEY=<generated by outputs\deployment-handoff.md>
157
  CORS_ORIGINS=https://your-vercel-app.vercel.app
158
  COOKIE_SAMESITE=none
159
  COOKIE_SECURE=1
160
- OCR_ENGINE=arabic-max
 
 
161
  DEFAULT_VOICE_ID=silma-local
162
  OUTPUT_RETENTION_DAYS=7
163
  OUTPUT_MAX_FILES=25
@@ -168,7 +170,7 @@ SILMA_FORCE_TASHKEEL=0
168
  SILMA_NORMALIZE_NUMBERS=0
169
  ```
170
 
171
- Keep `OCR_ENGINE=arabic-max` for the first real deployment. It compares the strongest installed Arabic OCR outputs instead of trusting one engine.
172
 
173
  ## 3. Vercel Website
174
 
 
157
  CORS_ORIGINS=https://your-vercel-app.vercel.app
158
  COOKIE_SAMESITE=none
159
  COOKIE_SECURE=1
160
+ OCR_ENGINE=tesseract
161
+ OCR_RENDER_ZOOM=2
162
+ TESSERACT_PSM=4
163
  DEFAULT_VOICE_ID=silma-local
164
  OUTPUT_RETENTION_DAYS=7
165
  OUTPUT_MAX_FILES=25
 
170
  SILMA_NORMALIZE_NUMBERS=0
171
  ```
172
 
173
+ Keep `OCR_ENGINE=tesseract`, `OCR_RENDER_ZOOM=2`, and `TESSERACT_PSM=4` for the first real deployment. That setting produced the most readable text on the 5-page Arabic benchmark. Use `arabic-max` only on short samples when this setting reads a specific book badly.
174
 
175
  ## 3. Vercel Website
176
 
docs/ocr-readability-benchmark.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Arabic OCR Readability Benchmark
2
+
3
+ Last run: June 8, 2026.
4
+
5
+ Benchmark file: `test_pdfs/arabic-reader-5-page-test.pdf`
6
+
7
+ Scoring uses the app's `assess_text_quality` and speech-readiness metrics: Arabic word count, common Arabic word hits, one-letter fragment ratio, low-information line ratio, placeholder ratio, and total quality score. Higher score is better; `good` is preferred over `warning`.
8
+
9
+ ## Result
10
+
11
+ Recommended OCR:
12
+
13
+ ```text
14
+ OCR_ENGINE=tesseract
15
+ OCR_RENDER_ZOOM=2
16
+ TESSERACT_PSM=4
17
+ ```
18
+
19
+ This setting produced the most readable 5-page output while staying practical for full-book jobs.
20
+
21
+ | OCR setting | Pages | Seconds | Quality | Score | Arabic words | Fragment line ratio | Extraction |
22
+ | --- | ---: | ---: | --- | ---: | ---: | ---: | --- |
23
+ | Tesseract 2x PSM 4 | 5 | 37.30 | good | 11919.05 | 3120 | 0.0433 | `tesseract@2x-psm4` |
24
+ | Tesseract default PSM 6 | 5 | 28.88 | good | 11510.50 | 3284 | 0.0166 | `tesseract@1.5x-psm6` |
25
+ | PaddleOCR Arabic | 5 | 106.91 | warning | 8105.80 | 2251 | 0.3133 | `paddleocr` |
26
+ | Auto fallback | 5 | 104.47 | warning | 8105.80 | 2251 | 0.3133 | `paddleocr` |
27
+ | EasyOCR mode | 5 | 102.39 | warning | 8105.80 | 2251 | 0.3133 | `paddleocr` |
28
+
29
+ The slower comparison modes were tested on the 1-page sample because the full 5-page comparison exceeded the 10-minute run window. Both selected the same underlying winner, `tesseract@2x-psm4`, but took about 4.5 minutes for one page:
30
+
31
+ | OCR setting | Pages | Seconds | Quality | Score | Arabic words | Extraction |
32
+ | --- | ---: | ---: | --- | ---: | ---: | --- |
33
+ | Arabic OCR comparison | 1 | 280.76 | good | 3565.85 | 719 | `arabic:tesseract@2x-psm4` |
34
+ | Maximum Arabic OCR | 1 | 268.47 | good | 3565.85 | 719 | `arabic-max:tesseract@2x-psm4` |
35
+
36
+ ## Interpretation
37
+
38
+ `arabic` and `arabic-max` are useful short-sample diagnostics because they can compare installed OCR engines and pick the cleanest text. They are not the right default for long PDFs on the current free worker because they spend minutes per page and selected Tesseract anyway.
39
+
40
+ PaddleOCR is available and works, but on this book sample it returned many low-information lines and more fragmented Arabic text. It remains a fallback, not the recommendation.
41
+
42
+ The live/default website setting should therefore be `Tesseract Arabic - Recommended readable`.
docs/production-worker-architecture.md CHANGED
@@ -93,7 +93,7 @@ The worker bundle also includes setup scripts for optional heavy paths, but they
93
 
94
  `Dockerfile.worker` exposes `INSTALL_QARI_OCR=1`, `INSTALL_KATIB_OCR=1`, `INSTALL_ARABIC_QWEN_OCR=1`, `INSTALL_BASEER_OCR=1`, `INSTALL_PADDLEOCR_VL=1`, and `INSTALL_SUPERTONIC=1` build args so stronger workers can install QARI-OCR, KATIB, Arabic-Qwen, Baseer, PaddleOCR-VL, and the optional Supertonic CPU voice without editing the Dockerfile. This keeps the free CPU image practical while making the higher-quality free OCR paths and fast voice comparison path deployable.
95
 
96
- QARI-OCR 0.4 is the strongest Arabic-native OCR upgrade to test for a stronger worker. It is a 4B VLM fine-tuned for Islamic books and Arabic manuscripts, so keep it out of the default free CPU family-site worker unless a short Arabic-book benchmark proves it improves the actual pages and the worker has enough RAM/GPU. KATIB 0.8B and Arabic-Qwen3.5-OCR-v4 are the smaller Arabic-trained OCR upgrades to try when QARI is too heavy. If the worker is too small for QARI, set `QARI_OCR_MODEL=NAMAA-Space/Qari-OCR-v0.3-VL-2B-Instruct` to test the lighter older QARI path. PaddleOCR-VL-1.6 remains the main general document-parser upgrade to watch. In the website, start with `Arabic OCR - Recommended balance` or `PaddleOCR Arabic - Recommended balance`; use `Maximum Arabic OCR - slower` only for short tests or difficult pages.
97
 
98
  The repo includes optional KATIB, Arabic-Qwen, QARI-OCR, and PaddleOCR-VL sidecar scripts for this evaluation path:
99
 
@@ -131,7 +131,7 @@ Create a new Space:
131
  - `CORS_ORIGINS=https://your-vercel-app.vercel.app`
132
  - `COOKIE_SAMESITE=none`
133
  - `COOKIE_SECURE=1`
134
- 5. Keep `OCR_ENGINE=arabic-max` for quality, or switch to `OCR_ENGINE=paddleocr` only when you need a faster single-engine run.
135
 
136
  Then set this on Vercel:
137
 
 
93
 
94
  `Dockerfile.worker` exposes `INSTALL_QARI_OCR=1`, `INSTALL_KATIB_OCR=1`, `INSTALL_ARABIC_QWEN_OCR=1`, `INSTALL_BASEER_OCR=1`, `INSTALL_PADDLEOCR_VL=1`, and `INSTALL_SUPERTONIC=1` build args so stronger workers can install QARI-OCR, KATIB, Arabic-Qwen, Baseer, PaddleOCR-VL, and the optional Supertonic CPU voice without editing the Dockerfile. This keeps the free CPU image practical while making the higher-quality free OCR paths and fast voice comparison path deployable.
95
 
96
+ QARI-OCR 0.4 is the strongest Arabic-native OCR upgrade to test for a stronger worker. It is a 4B VLM fine-tuned for Islamic books and Arabic manuscripts, so keep it out of the default free CPU family-site worker unless a short Arabic-book benchmark proves it improves the actual pages and the worker has enough RAM/GPU. KATIB 0.8B and Arabic-Qwen3.5-OCR-v4 are the smaller Arabic-trained OCR upgrades to try when QARI is too heavy. If the worker is too small for QARI, set `QARI_OCR_MODEL=NAMAA-Space/Qari-OCR-v0.3-VL-2B-Instruct` to test the lighter older QARI path. PaddleOCR-VL-1.6 remains the main general document-parser upgrade to watch. In the website, start with `Tesseract Arabic - Recommended readable`; use `Arabic OCR comparison - slower` or `Maximum Arabic OCR - slower` only for short tests or difficult pages.
97
 
98
  The repo includes optional KATIB, Arabic-Qwen, QARI-OCR, and PaddleOCR-VL sidecar scripts for this evaluation path:
99
 
 
131
  - `CORS_ORIGINS=https://your-vercel-app.vercel.app`
132
  - `COOKIE_SAMESITE=none`
133
  - `COOKIE_SECURE=1`
134
+ 5. Keep `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` for readable full-book runs, or switch to a slower comparison mode only when a short sample proves it is better for that book.
135
 
136
  Then set this on Vercel:
137
 
docs/recommended-decision-card.json CHANGED
@@ -2,7 +2,7 @@
2
  "title": "Recommended Free Arabic PDF To Audio Decision Card",
3
  "currentDefault": {
4
  "pdf": "PyMuPDF embedded text first",
5
- "scannedPdfOcr": "OCR_ENGINE=arabic",
6
  "voice": "SILMA TTS (silma-local)",
7
  "audioStorage": "worker-local retained downloads",
8
  "hosting": "Vercel shell plus Docker worker via WORKER_BASE_URL"
 
2
  "title": "Recommended Free Arabic PDF To Audio Decision Card",
3
  "currentDefault": {
4
  "pdf": "PyMuPDF embedded text first",
5
+ "scannedPdfOcr": "OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4",
6
  "voice": "SILMA TTS (silma-local)",
7
  "audioStorage": "worker-local retained downloads",
8
  "hosting": "Vercel shell plus Docker worker via WORKER_BASE_URL"
docs/recommended-decision-card.md CHANGED
@@ -5,7 +5,7 @@
5
  | Layer | Choice |
6
  | --- | --- |
7
  | pdf | PyMuPDF embedded text first |
8
- | scannedPdfOcr | OCR_ENGINE=arabic |
9
  | voice | SILMA TTS (silma-local) |
10
  | audioStorage | worker-local retained downloads |
11
  | hosting | Vercel shell plus Docker worker via WORKER_BASE_URL |
 
5
  | Layer | Choice |
6
  | --- | --- |
7
  | pdf | PyMuPDF embedded text first |
8
+ | scannedPdfOcr | OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4 |
9
  | voice | SILMA TTS (silma-local) |
10
  | audioStorage | worker-local retained downloads |
11
  | hosting | Vercel shell plus Docker worker via WORKER_BASE_URL |
docs/recommended-free-stack.md CHANGED
@@ -7,7 +7,7 @@ This is the compact decision report generated from the current research watchlis
7
  | Layer | Recommendation | Why |
8
  | --- | --- | --- |
9
  | Embedded PDFs | PyMuPDF text extraction first | It is free, fast, and avoids OCR errors when the PDF already contains usable Arabic text. |
10
- | Scanned PDFs | `OCR_ENGINE=arabic` | It uses the best installed Arabic OCR path without the slowest heavy tests, keeping quality high while staying practical for full books. |
11
  | Default voice | SILMA TTS | Arabic-focused Fusha/MSA voice with normalization and tashkeel options. |
12
  | Download/storage | Worker-local retained audio files | Free by default and avoids Vercel's 4.5 MB function payload limit; Hugging Face free CPU disk is 50 GB but non-persistent, so downloads are short-lived. |
13
  | Hosted shape | Vercel shell plus Docker worker via `WORKER_BASE_URL` | Vercel serves the easy website while the worker handles large PDFs, OCR, and TTS on free CPU Space hardware when the job size is reasonable. |
@@ -83,4 +83,4 @@ Promote a model only when all of these are true:
83
  5. Its runtime is acceptable for the target worker.
84
  6. The generated JSON score passes `scripts\model_promotion_gate.py` after human review.
85
 
86
- Current practical default: PyMuPDF -> `arabic` OCR -> SILMA TTS -> downloadable worker audio.
 
7
  | Layer | Recommendation | Why |
8
  | --- | --- | --- |
9
  | Embedded PDFs | PyMuPDF text extraction first | It is free, fast, and avoids OCR errors when the PDF already contains usable Arabic text. |
10
+ | Scanned PDFs | `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` | It produced the most readable text on the 5-page Arabic OCR benchmark while staying much faster than the comparison modes. |
11
  | Default voice | SILMA TTS | Arabic-focused Fusha/MSA voice with normalization and tashkeel options. |
12
  | Download/storage | Worker-local retained audio files | Free by default and avoids Vercel's 4.5 MB function payload limit; Hugging Face free CPU disk is 50 GB but non-persistent, so downloads are short-lived. |
13
  | Hosted shape | Vercel shell plus Docker worker via `WORKER_BASE_URL` | Vercel serves the easy website while the worker handles large PDFs, OCR, and TTS on free CPU Space hardware when the job size is reasonable. |
 
83
  5. Its runtime is acceptable for the target worker.
84
  6. The generated JSON score passes `scripts\model_promotion_gate.py` after human review.
85
 
86
+ Current practical default: PyMuPDF -> `tesseract@2x-psm4` OCR -> SILMA TTS -> downloadable worker audio.
scripts/audit_goal_readiness.py CHANGED
@@ -259,7 +259,7 @@ def collect_checks(
259
  "PASS"
260
  if has_all(
261
  readme + production + deployment_checklist + dockerfile,
262
- ["WORKER_BASE_URL", "Docker", "OCR_ENGINE=arabic", "AUDIO_FORMAT=mp3", "worker-verification.json"],
263
  )
264
  and has_all(deployment_handoff, ["WORKER_BASE_URL", "prove_live_deployment.py", "worker-verification.json"])
265
  and has_all(
@@ -395,12 +395,12 @@ def collect_checks(
395
  [
396
  "Recommended Free Arabic PDF To Audio Stack",
397
  "PyMuPDF text extraction first",
398
- "`OCR_ENGINE=arabic`",
399
  "SILMA TTS",
400
  "4.5 MB function payload limit",
401
  "50 GB but non-persistent",
402
  "Benchmark Before Promoting",
403
- "PyMuPDF -> `arabic` OCR -> SILMA TTS",
404
  ],
405
  )
406
  and has_all(
 
259
  "PASS"
260
  if has_all(
261
  readme + production + deployment_checklist + dockerfile,
262
+ ["WORKER_BASE_URL", "Docker", "OCR_ENGINE=tesseract", "OCR_RENDER_ZOOM=2", "TESSERACT_PSM=4", "AUDIO_FORMAT=mp3", "worker-verification.json"],
263
  )
264
  and has_all(deployment_handoff, ["WORKER_BASE_URL", "prove_live_deployment.py", "worker-verification.json"])
265
  and has_all(
 
395
  [
396
  "Recommended Free Arabic PDF To Audio Stack",
397
  "PyMuPDF text extraction first",
398
+ "`OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4`",
399
  "SILMA TTS",
400
  "4.5 MB function payload limit",
401
  "50 GB but non-persistent",
402
  "Benchmark Before Promoting",
403
+ "PyMuPDF -> `tesseract@2x-psm4` OCR -> SILMA TTS",
404
  ],
405
  )
406
  and has_all(
scripts/check_deployment_readiness.py CHANGED
@@ -216,7 +216,7 @@ def check_worker(root: Path = ROOT_DIR) -> list[Check]:
216
  add(checks, "Worker", "base image", "PASS" if "python:3.10" in dockerfile else "WARN", "Python 3.10 is expected")
217
  for package in ["tesseract-ocr-ara", "espeak-ng", "ffmpeg"]:
218
  add(checks, "Worker", f"apt package {package}", "PASS" if package in dockerfile else "FAIL", package)
219
- for env_key in ["WORK_DIR", "DATABASE_PATH", "OCR_ENGINE=arabic", "AUDIO_FORMAT=mp3"]:
220
  add(checks, "Worker", f"env {env_key}", "PASS" if env_key in dockerfile else "WARN", env_key)
221
  for arg in [
222
  "ARG INSTALL_QARI_OCR=0",
 
216
  add(checks, "Worker", "base image", "PASS" if "python:3.10" in dockerfile else "WARN", "Python 3.10 is expected")
217
  for package in ["tesseract-ocr-ara", "espeak-ng", "ffmpeg"]:
218
  add(checks, "Worker", f"apt package {package}", "PASS" if package in dockerfile else "FAIL", package)
219
+ for env_key in ["WORK_DIR", "DATABASE_PATH", "OCR_ENGINE=tesseract", "OCR_RENDER_ZOOM=2", "TESSERACT_PSM=4", "AUDIO_FORMAT=mp3"]:
220
  add(checks, "Worker", f"env {env_key}", "PASS" if env_key in dockerfile else "WARN", env_key)
221
  for arg in [
222
  "ARG INSTALL_QARI_OCR=0",
scripts/check_research_sources.py CHANGED
@@ -265,17 +265,17 @@ REQUIRED_METADATA_MARKERS = [
265
  REQUIRED_RECOMMENDATION_MARKERS = [
266
  "Recommended Free Arabic PDF To Audio Stack",
267
  "PyMuPDF text extraction first",
268
- "`OCR_ENGINE=arabic`",
269
  "SILMA TTS",
270
  "Vercel shell plus Docker worker",
271
  "Benchmark Before Promoting",
272
  "model_promotion_gate.py",
273
- "PyMuPDF -> `arabic` OCR -> SILMA TTS",
274
  ]
275
  REQUIRED_DECISION_CARD_MARKERS = [
276
  "Recommended Free Arabic PDF To Audio Decision Card",
277
  "PyMuPDF embedded text first",
278
- "OCR_ENGINE=arabic",
279
  "SILMA TTS",
280
  "worker-local retained downloads",
281
  "Vercel shell plus Docker worker",
 
265
  REQUIRED_RECOMMENDATION_MARKERS = [
266
  "Recommended Free Arabic PDF To Audio Stack",
267
  "PyMuPDF text extraction first",
268
+ "`OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4`",
269
  "SILMA TTS",
270
  "Vercel shell plus Docker worker",
271
  "Benchmark Before Promoting",
272
  "model_promotion_gate.py",
273
+ "PyMuPDF -> `tesseract@2x-psm4` OCR -> SILMA TTS",
274
  ]
275
  REQUIRED_DECISION_CARD_MARKERS = [
276
  "Recommended Free Arabic PDF To Audio Decision Card",
277
  "PyMuPDF embedded text first",
278
+ "OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4",
279
  "SILMA TTS",
280
  "worker-local retained downloads",
281
  "Vercel shell plus Docker worker",
scripts/deployment_handoff.py CHANGED
@@ -72,7 +72,9 @@ def build_handoff(
72
  "CORS_ORIGINS": vercel_origin,
73
  "COOKIE_SAMESITE": "none",
74
  "COOKIE_SECURE": "1",
75
- "OCR_ENGINE": "arabic",
 
 
76
  "DEFAULT_VOICE_ID": "silma-local",
77
  "OUTPUT_RETENTION_DAYS": "7",
78
  "OUTPUT_MAX_FILES": "25",
@@ -223,7 +225,7 @@ def write_markdown(path: Path, handoff: DeploymentHandoff) -> None:
223
  "- Set Vercel `WORKER_BASE_URL` to the exact Hugging Face worker URL shown above.",
224
  "- After both deployments finish, run the Vercel worker diagnostic command below before uploading a large PDF. It must show `site worker reachable from vercel` and `site worker CORS ready`.",
225
  "- Remove Vercel's temporary direct Hugging Face TTS fallback variables for production: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.",
226
- "- Keep `OCR_ENGINE=arabic` for normal scanned Arabic books; use `arabic-max` only when a short sample needs the slower maximum comparison.",
227
  "- Do not commit this handoff; it contains the deployment `SECRET_KEY`.",
228
  "",
229
  "## Hugging Face Docker Build Args",
 
72
  "CORS_ORIGINS": vercel_origin,
73
  "COOKIE_SAMESITE": "none",
74
  "COOKIE_SECURE": "1",
75
+ "OCR_ENGINE": "tesseract",
76
+ "OCR_RENDER_ZOOM": "2",
77
+ "TESSERACT_PSM": "4",
78
  "DEFAULT_VOICE_ID": "silma-local",
79
  "OUTPUT_RETENTION_DAYS": "7",
80
  "OUTPUT_MAX_FILES": "25",
 
225
  "- Set Vercel `WORKER_BASE_URL` to the exact Hugging Face worker URL shown above.",
226
  "- After both deployments finish, run the Vercel worker diagnostic command below before uploading a large PDF. It must show `site worker reachable from vercel` and `site worker CORS ready`.",
227
  "- Remove Vercel's temporary direct Hugging Face TTS fallback variables for production: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.",
228
+ "- Keep `OCR_ENGINE=tesseract`, `OCR_RENDER_ZOOM=2`, and `TESSERACT_PSM=4` for normal scanned Arabic books; use `arabic-max` only when a short sample needs the slower maximum comparison.",
229
  "- Do not commit this handoff; it contains the deployment `SECRET_KEY`.",
230
  "",
231
  "## Hugging Face Docker Build Args",
scripts/export_hf_space.py CHANGED
@@ -216,7 +216,9 @@ SECRET_KEY=<generated by outputs\\deployment-handoff.md>
216
  CORS_ORIGINS=https://your-vercel-app.vercel.app
217
  COOKIE_SAMESITE=none
218
  COOKIE_SECURE=1
219
- OCR_ENGINE=arabic
 
 
220
  DEFAULT_VOICE_ID=silma-local
221
  OUTPUT_RETENTION_DAYS=7
222
  OUTPUT_MAX_FILES=25
@@ -232,7 +234,7 @@ python scripts\\deployment_handoff.py https://your-space.hf.space --origin https
232
 
233
  Keep `outputs\\deployment-handoff.md` private because it contains deployment secrets.
234
 
235
- The compact process recommendation is included at `docs/recommended-free-stack.md`, with the machine-readable deployment decision card at `docs/recommended-decision-card.json` and its readable companion at `docs/recommended-decision-card.md`. The current practical default is PyMuPDF embedded text first, `OCR_ENGINE=arabic` for balanced scanned Arabic OCR, SILMA TTS for the first clean voice, and downloadable worker audio.
236
 
237
  Optional stronger-worker build args:
238
 
 
216
  CORS_ORIGINS=https://your-vercel-app.vercel.app
217
  COOKIE_SAMESITE=none
218
  COOKIE_SECURE=1
219
+ OCR_ENGINE=tesseract
220
+ OCR_RENDER_ZOOM=2
221
+ TESSERACT_PSM=4
222
  DEFAULT_VOICE_ID=silma-local
223
  OUTPUT_RETENTION_DAYS=7
224
  OUTPUT_MAX_FILES=25
 
234
 
235
  Keep `outputs\\deployment-handoff.md` private because it contains deployment secrets.
236
 
237
+ The compact process recommendation is included at `docs/recommended-free-stack.md`, with the machine-readable deployment decision card at `docs/recommended-decision-card.json` and its readable companion at `docs/recommended-decision-card.md`. The current practical default is PyMuPDF embedded text first, `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` for the most readable tested scanned Arabic OCR, SILMA TTS for the first clean voice, and downloadable worker audio.
238
 
239
  Optional stronger-worker build args:
240
 
scripts/research_watchlist.py CHANGED
@@ -1488,7 +1488,7 @@ def build_recommendation_report(candidates: list[Candidate]) -> str:
1488
  "| Layer | Recommendation | Why |",
1489
  "| --- | --- | --- |",
1490
  "| Embedded PDFs | PyMuPDF text extraction first | It is free, fast, and avoids OCR errors when the PDF already contains usable Arabic text. |",
1491
- "| Scanned PDFs | `OCR_ENGINE=arabic` | It uses the best installed Arabic OCR path without the slowest heavy tests, keeping quality high while staying practical for full books. |",
1492
  f"| Default voice | {default_voice.name} | {default_voice.why} |",
1493
  "| Download/storage | Worker-local retained audio files | Free by default and avoids Vercel's 4.5 MB function payload limit; Hugging Face free CPU disk is 50 GB but non-persistent, so downloads are short-lived. |",
1494
  "| Hosted shape | Vercel shell plus Docker worker via `WORKER_BASE_URL` | Vercel serves the easy website while the worker handles large PDFs, OCR, and TTS on free CPU Space hardware when the job size is reasonable. |",
@@ -1544,7 +1544,7 @@ def build_recommendation_report(candidates: list[Candidate]) -> str:
1544
  "5. Its runtime is acceptable for the target worker.",
1545
  "6. The generated JSON score passes `scripts\\model_promotion_gate.py` after human review.",
1546
  "",
1547
- "Current practical default: PyMuPDF -> `arabic` OCR -> SILMA TTS -> downloadable worker audio.",
1548
  "",
1549
  ]
1550
  )
@@ -1563,7 +1563,7 @@ def build_decision_card(candidates: list[Candidate]) -> dict[str, object]:
1563
  "title": "Recommended Free Arabic PDF To Audio Decision Card",
1564
  "currentDefault": {
1565
  "pdf": "PyMuPDF embedded text first",
1566
- "scannedPdfOcr": "OCR_ENGINE=arabic",
1567
  "voice": "SILMA TTS (silma-local)",
1568
  "audioStorage": "worker-local retained downloads",
1569
  "hosting": "Vercel shell plus Docker worker via WORKER_BASE_URL",
 
1488
  "| Layer | Recommendation | Why |",
1489
  "| --- | --- | --- |",
1490
  "| Embedded PDFs | PyMuPDF text extraction first | It is free, fast, and avoids OCR errors when the PDF already contains usable Arabic text. |",
1491
+ "| Scanned PDFs | `OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4` | It produced the most readable text on the 5-page Arabic OCR benchmark while staying much faster than the comparison modes. |",
1492
  f"| Default voice | {default_voice.name} | {default_voice.why} |",
1493
  "| Download/storage | Worker-local retained audio files | Free by default and avoids Vercel's 4.5 MB function payload limit; Hugging Face free CPU disk is 50 GB but non-persistent, so downloads are short-lived. |",
1494
  "| Hosted shape | Vercel shell plus Docker worker via `WORKER_BASE_URL` | Vercel serves the easy website while the worker handles large PDFs, OCR, and TTS on free CPU Space hardware when the job size is reasonable. |",
 
1544
  "5. Its runtime is acceptable for the target worker.",
1545
  "6. The generated JSON score passes `scripts\\model_promotion_gate.py` after human review.",
1546
  "",
1547
+ "Current practical default: PyMuPDF -> `tesseract@2x-psm4` OCR -> SILMA TTS -> downloadable worker audio.",
1548
  "",
1549
  ]
1550
  )
 
1563
  "title": "Recommended Free Arabic PDF To Audio Decision Card",
1564
  "currentDefault": {
1565
  "pdf": "PyMuPDF embedded text first",
1566
+ "scannedPdfOcr": "OCR_ENGINE=tesseract OCR_RENDER_ZOOM=2 TESSERACT_PSM=4",
1567
  "voice": "SILMA TTS (silma-local)",
1568
  "audioStorage": "worker-local retained downloads",
1569
  "hosting": "Vercel shell plus Docker worker via WORKER_BASE_URL",
scripts/setup_paddleocr.ps1 CHANGED
@@ -15,4 +15,4 @@ $pythonExe = Join-Path $venv "Scripts\python.exe"
15
  & $pythonExe -m pip install -r (Join-Path $root "requirements-paddleocr.txt")
16
 
17
  Write-Host "PaddleOCR Arabic PP-OCRv5 sidecar is ready at $venv"
18
- Write-Host "Use OCR_ENGINE=arabic-max or choose Maximum Arabic-trained OCR in the website."
 
15
  & $pythonExe -m pip install -r (Join-Path $root "requirements-paddleocr.txt")
16
 
17
  Write-Host "PaddleOCR Arabic PP-OCRv5 sidecar is ready at $venv"
18
+ Write-Host "Use OCR_ENGINE=paddleocr only as a fallback; the website's readable default is Tesseract Arabic."
scripts/setup_paddleocr.sh CHANGED
@@ -13,4 +13,4 @@ fi
13
  "$VENV/bin/python" -m pip install -r "$ROOT/requirements-paddleocr.txt"
14
 
15
  echo "PaddleOCR Arabic PP-OCRv5 sidecar is ready at $VENV"
16
- echo "Use OCR_ENGINE=arabic-max or choose Maximum Arabic-trained OCR in the website."
 
13
  "$VENV/bin/python" -m pip install -r "$ROOT/requirements-paddleocr.txt"
14
 
15
  echo "PaddleOCR Arabic PP-OCRv5 sidecar is ready at $VENV"
16
+ echo "Use OCR_ENGINE=paddleocr only as a fallback; the website's readable default is Tesseract Arabic."
scripts/validate_deployment_env.py CHANGED
@@ -228,8 +228,22 @@ def validate_worker_env(
228
  checks,
229
  "Worker",
230
  "OCR_ENGINE",
231
- "PASS" if env.get("OCR_ENGINE", "arabic") == "arabic" else "WARN",
232
- env.get("OCR_ENGINE", "arabic"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  )
234
  add(
235
  checks,
 
228
  checks,
229
  "Worker",
230
  "OCR_ENGINE",
231
+ "PASS" if env.get("OCR_ENGINE", "tesseract") == "tesseract" else "WARN",
232
+ env.get("OCR_ENGINE", "tesseract"),
233
+ )
234
+ add(
235
+ checks,
236
+ "Worker",
237
+ "OCR_RENDER_ZOOM",
238
+ "PASS" if env.get("OCR_RENDER_ZOOM", "2") == "2" else "WARN",
239
+ env.get("OCR_RENDER_ZOOM", "2"),
240
+ )
241
+ add(
242
+ checks,
243
+ "Worker",
244
+ "TESSERACT_PSM",
245
+ "PASS" if env.get("TESSERACT_PSM", "4") == "4" else "WARN",
246
+ env.get("TESSERACT_PSM", "4"),
247
  )
248
  add(
249
  checks,
scripts/verify_site.py CHANGED
@@ -75,7 +75,8 @@ def verify_site(
75
  checks,
76
  "site recommended stack documented",
77
  recommended_stack.get("pdf") == "PyMuPDF embedded text first"
78
- and recommended_stack.get("ocrEngine") == "arabic"
 
79
  and recommended_stack.get("voiceId") == "silma-local"
80
  and recommended_stack.get("audioStorage") == "worker-local retained downloads",
81
  json.dumps(recommended_stack),
 
75
  checks,
76
  "site recommended stack documented",
77
  recommended_stack.get("pdf") == "PyMuPDF embedded text first"
78
+ and recommended_stack.get("ocrEngine") == "tesseract"
79
+ and recommended_stack.get("ocrSettings") == "OCR_RENDER_ZOOM=2 TESSERACT_PSM=4"
80
  and recommended_stack.get("voiceId") == "silma-local"
81
  and recommended_stack.get("audioStorage") == "worker-local retained downloads",
82
  json.dumps(recommended_stack),
scripts/verify_worker.py CHANGED
@@ -113,7 +113,8 @@ def has_recommended_stack(engines: dict[str, Any]) -> bool:
113
  stack = recommended_stack_summary(engines)
114
  return bool(
115
  stack.get("pdf") == "PyMuPDF embedded text first"
116
- and stack.get("ocrEngine") == "arabic"
 
117
  and stack.get("voiceId") == "silma-local"
118
  and stack.get("audioStorage") == "worker-local retained downloads"
119
  )
 
113
  stack = recommended_stack_summary(engines)
114
  return bool(
115
  stack.get("pdf") == "PyMuPDF embedded text first"
116
+ and stack.get("ocrEngine") == "tesseract"
117
+ and stack.get("ocrSettings") == "OCR_RENDER_ZOOM=2 TESSERACT_PSM=4"
118
  and stack.get("voiceId") == "silma-local"
119
  and stack.get("audioStorage") == "worker-local retained downloads"
120
  )
static/app.js CHANGED
@@ -84,19 +84,19 @@ let browserSpeechSourceName = "";
84
 
85
  const ocrModeLabels = {
86
  "arabic-max": "Maximum Arabic OCR - slower",
87
- arabic: "Arabic OCR - Recommended balance",
88
  "qari-ocr": "QARI Arabic books (best)",
89
  "tawkeed-ocr": "Tawkeed Arabic OCR",
90
  "katib-ocr": "KATIB Arabic OCR (lighter)",
91
  "arabic-qwen-ocr": "Arabic-Qwen OCR",
92
  "arabic-glm-ocr": "Arabic-GLM OCR v2",
93
  "baseer-ocr": "Baseer Arabic OCR",
94
- paddleocr: "PaddleOCR Arabic - Recommended balance",
95
  "paddleocr-vl": "PaddleOCR-VL heavy",
96
  best: "Best scan test",
97
  surya: "Surya heavy OCR",
98
  easyocr: "General Arabic OCR",
99
- tesseract: "Tesseract Arabic fallback",
100
  auto: "Auto fallback",
101
  };
102
 
@@ -316,7 +316,7 @@ async function loadHealth() {
316
  engines.ocr?.preferred === "arabic-max"
317
  ? "Maximum Arabic OCR is ready, but slower"
318
  : engines.ocr?.preferred === "arabic"
319
- ? "Recommended balanced Arabic OCR is ready"
320
  : engines.ocr?.preferred === "qari-ocr"
321
  ? "QARI Arabic book OCR is ready"
322
  : engines.ocr?.preferred === "tawkeed-ocr"
@@ -330,11 +330,13 @@ async function loadHealth() {
330
  : engines.ocr?.preferred === "baseer-ocr"
331
  ? "Baseer Arabic OCR is ready"
332
  : engines.ocr?.preferred === "paddleocr"
333
- ? "Recommended fast PaddleOCR Arabic is ready"
334
  : engines.ocr?.preferred === "paddleocr-vl"
335
  ? "PaddleOCR-VL heavy OCR is ready"
336
  : engines.ocr?.preferred === "surya"
337
  ? "Surya heavy OCR is ready"
 
 
338
  : engines.ocr?.preferred === "best"
339
  ? "Best Arabic OCR test mode is ready"
340
  : engines.ocr?.preferred
@@ -1389,7 +1391,7 @@ function describeOcrMode() {
1389
  engineNotice.textContent = `Maximum Arabic OCR selected. It tries the most engines and keeps the cleanest text, but it is slower. Use Quick test first.${installedText}`;
1390
  engineNotice.classList.remove("warning");
1391
  } else if (ocrModeSelect.value === "arabic") {
1392
- engineNotice.textContent = `Recommended balance selected. It uses the best installed Arabic OCR path without the slowest heavy tests, so it is the best starting choice for full books.${installedText}`;
1393
  engineNotice.classList.remove("warning");
1394
  } else if (ocrModeSelect.value === "qari-ocr") {
1395
  engineNotice.textContent = "QARI Arabic books selected. Use this on a short sample or strong worker; it is trained for Arabic books, Islamic texts, manuscripts, and layout-aware Arabic transcription.";
@@ -1413,7 +1415,10 @@ function describeOcrMode() {
1413
  engineNotice.textContent = "Best scan test selected. Use this on a short sample, then run the winning engine for the full book.";
1414
  engineNotice.classList.remove("warning");
1415
  } else if (ocrModeSelect.value === "paddleocr") {
1416
- engineNotice.textContent = "PaddleOCR Arabic selected. This is the fastest recommended balance on the current worker for scanned Arabic text.";
 
 
 
1417
  engineNotice.classList.remove("warning");
1418
  } else if (ocrModeSelect.value === "paddleocr-vl") {
1419
  engineNotice.textContent = "PaddleOCR-VL selected. Use this only on a short sample or strong worker; it is much heavier than normal Arabic OCR.";
@@ -1644,8 +1649,8 @@ function showQualityHint(quality) {
1644
  }
1645
  const reasons = quality.reasons?.length ? ` ${quality.reasons.join("; ")}.` : "";
1646
  const action = quality.quality === "poor"
1647
- ? "Try Arabic OCR - Recommended balance, Best scan test, or another OCR mode before creating audio."
1648
- : "Listen to a short sample before running the full book. If it sounds wrong, try Arabic OCR - Recommended balance, Best scan test, or another OCR mode.";
1649
  qualityHint.textContent = `Text needs checking.${reasons} ${action}`;
1650
  qualityHint.classList.remove("hidden");
1651
  qualityHint.classList.toggle("poor", quality.quality === "poor");
 
84
 
85
  const ocrModeLabels = {
86
  "arabic-max": "Maximum Arabic OCR - slower",
87
+ arabic: "Arabic OCR comparison - slower",
88
  "qari-ocr": "QARI Arabic books (best)",
89
  "tawkeed-ocr": "Tawkeed Arabic OCR",
90
  "katib-ocr": "KATIB Arabic OCR (lighter)",
91
  "arabic-qwen-ocr": "Arabic-Qwen OCR",
92
  "arabic-glm-ocr": "Arabic-GLM OCR v2",
93
  "baseer-ocr": "Baseer Arabic OCR",
94
+ paddleocr: "PaddleOCR Arabic - faster, less readable",
95
  "paddleocr-vl": "PaddleOCR-VL heavy",
96
  best: "Best scan test",
97
  surya: "Surya heavy OCR",
98
  easyocr: "General Arabic OCR",
99
+ tesseract: "Tesseract Arabic - Recommended readable",
100
  auto: "Auto fallback",
101
  };
102
 
 
316
  engines.ocr?.preferred === "arabic-max"
317
  ? "Maximum Arabic OCR is ready, but slower"
318
  : engines.ocr?.preferred === "arabic"
319
+ ? "Arabic OCR comparison is ready, but slower"
320
  : engines.ocr?.preferred === "qari-ocr"
321
  ? "QARI Arabic book OCR is ready"
322
  : engines.ocr?.preferred === "tawkeed-ocr"
 
330
  : engines.ocr?.preferred === "baseer-ocr"
331
  ? "Baseer Arabic OCR is ready"
332
  : engines.ocr?.preferred === "paddleocr"
333
+ ? "PaddleOCR Arabic is ready, but less readable"
334
  : engines.ocr?.preferred === "paddleocr-vl"
335
  ? "PaddleOCR-VL heavy OCR is ready"
336
  : engines.ocr?.preferred === "surya"
337
  ? "Surya heavy OCR is ready"
338
+ : engines.ocr?.preferred === "tesseract"
339
+ ? "Recommended readable Tesseract Arabic OCR is ready"
340
  : engines.ocr?.preferred === "best"
341
  ? "Best Arabic OCR test mode is ready"
342
  : engines.ocr?.preferred
 
1391
  engineNotice.textContent = `Maximum Arabic OCR selected. It tries the most engines and keeps the cleanest text, but it is slower. Use Quick test first.${installedText}`;
1392
  engineNotice.classList.remove("warning");
1393
  } else if (ocrModeSelect.value === "arabic") {
1394
+ engineNotice.textContent = `Arabic OCR comparison selected. It compares installed OCR paths and can be much slower than the recommended Tesseract setting.${installedText}`;
1395
  engineNotice.classList.remove("warning");
1396
  } else if (ocrModeSelect.value === "qari-ocr") {
1397
  engineNotice.textContent = "QARI Arabic books selected. Use this on a short sample or strong worker; it is trained for Arabic books, Islamic texts, manuscripts, and layout-aware Arabic transcription.";
 
1415
  engineNotice.textContent = "Best scan test selected. Use this on a short sample, then run the winning engine for the full book.";
1416
  engineNotice.classList.remove("warning");
1417
  } else if (ocrModeSelect.value === "paddleocr") {
1418
+ engineNotice.textContent = "PaddleOCR Arabic selected. It works, but the 5-page benchmark produced more fragmented text than Tesseract.";
1419
+ engineNotice.classList.remove("warning");
1420
+ } else if (ocrModeSelect.value === "tesseract") {
1421
+ engineNotice.textContent = "Tesseract Arabic selected. This is the recommended readable option from the 5-page OCR benchmark.";
1422
  engineNotice.classList.remove("warning");
1423
  } else if (ocrModeSelect.value === "paddleocr-vl") {
1424
  engineNotice.textContent = "PaddleOCR-VL selected. Use this only on a short sample or strong worker; it is much heavier than normal Arabic OCR.";
 
1649
  }
1650
  const reasons = quality.reasons?.length ? ` ${quality.reasons.join("; ")}.` : "";
1651
  const action = quality.quality === "poor"
1652
+ ? "Try Tesseract Arabic - Recommended readable, Best scan test, or another OCR mode before creating audio."
1653
+ : "Listen to a short sample before running the full book. If it sounds wrong, try Tesseract Arabic - Recommended readable, Best scan test, or another OCR mode.";
1654
  qualityHint.textContent = `Text needs checking.${reasons} ${action}`;
1655
  qualityHint.classList.remove("hidden");
1656
  qualityHint.classList.toggle("poor", quality.quality === "poor");
static/index.html CHANGED
@@ -67,7 +67,8 @@
67
  <div class="field-group">
68
  <label for="ocrModeSelect">Text quality</label>
69
  <select id="ocrModeSelect" name="ocrMode">
70
- <option value="arabic">Arabic OCR - Recommended balance</option>
 
71
  <option value="arabic-max">Maximum Arabic OCR - slower</option>
72
  <option value="qari-ocr">QARI Arabic books (best)</option>
73
  <option value="tawkeed-ocr">Tawkeed Arabic OCR</option>
@@ -76,11 +77,10 @@
76
  <option value="arabic-glm-ocr">Arabic-GLM OCR v2</option>
77
  <option value="baseer-ocr">Baseer Arabic OCR</option>
78
  <option value="best">Best scan test</option>
79
- <option value="paddleocr">PaddleOCR Arabic - Recommended balance</option>
80
  <option value="paddleocr-vl">PaddleOCR-VL heavy</option>
81
  <option value="surya">Surya heavy OCR</option>
82
  <option value="easyocr">General Arabic OCR</option>
83
- <option value="tesseract">Tesseract Arabic fallback</option>
84
  <option value="auto">Auto fallback</option>
85
  </select>
86
  </div>
 
67
  <div class="field-group">
68
  <label for="ocrModeSelect">Text quality</label>
69
  <select id="ocrModeSelect" name="ocrMode">
70
+ <option value="tesseract">Tesseract Arabic - Recommended readable</option>
71
+ <option value="arabic">Arabic OCR comparison - slower</option>
72
  <option value="arabic-max">Maximum Arabic OCR - slower</option>
73
  <option value="qari-ocr">QARI Arabic books (best)</option>
74
  <option value="tawkeed-ocr">Tawkeed Arabic OCR</option>
 
77
  <option value="arabic-glm-ocr">Arabic-GLM OCR v2</option>
78
  <option value="baseer-ocr">Baseer Arabic OCR</option>
79
  <option value="best">Best scan test</option>
80
+ <option value="paddleocr">PaddleOCR Arabic - faster, less readable</option>
81
  <option value="paddleocr-vl">PaddleOCR-VL heavy</option>
82
  <option value="surya">Surya heavy OCR</option>
83
  <option value="easyocr">General Arabic OCR</option>
 
84
  <option value="auto">Auto fallback</option>
85
  </select>
86
  </div>