arabic-audio-reader-worker / scripts /prepare_book_workflow.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
import tempfile
from pathlib import Path
from typing import Any
import fitz
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts.benchmark_ocr import benchmark_engine
from scripts.benchmark_voices import DEFAULT_VOICES, benchmark_voices as benchmark_voice_set
from scripts.dry_run_pdf import dry_run_pdf
from scripts.select_test_pages import build_test_pdf
from scripts.verify_pipeline import verify_pipeline
from app import main
ENGINE_PRESETS: dict[str, list[str]] = {
"practical": ["arabic", "paddleocr", "tesseract"],
"balanced": [
"arabic-max",
"arabic",
"tawkeed-ocr",
"katib-ocr",
"arabic-qwen-ocr",
"arabic-glm-ocr",
"baseer-ocr",
"paddleocr",
"tesseract",
],
"maximum": [
"arabic-max",
"arabic",
"tawkeed-ocr",
"katib-ocr",
"arabic-qwen-ocr",
"arabic-glm-ocr",
"baseer-ocr",
"qari-ocr",
"paddleocr-vl",
"paddleocr",
"tesseract",
],
}
def choose_best_result(results: list[dict[str, Any]]) -> dict[str, Any] | None:
successful = [item for item in results if item.get("ok")]
if not successful:
return None
return max(successful, key=lambda item: (item.get("qualityScore", 0), item.get("arabicWords", 0), -item.get("seconds", 0)))
def env_text(env: dict[str, str]) -> str:
return " ".join(f"{key}={value}" for key, value in env.items())
def quote_arg(value: str | Path) -> str:
text = str(value)
if not text:
return '""'
if any(char.isspace() for char in text):
return f'"{text}"'
return text
def build_commands(
pdf_path: Path,
extraction: str,
voice_id: str,
audio_max_chars: int,
audio_out: Path | None = None,
env_file: Path | None = None,
external_tts_out_dir: Path | None = None,
external_ocr_out_dir: Path | None = None,
) -> dict[str, str]:
pdf = quote_arg(pdf_path)
extraction_arg = quote_arg(extraction)
settings_arg = f"--env-file {quote_arg(env_file)}" if env_file else f"--from-extraction {extraction_arg}"
smoke_out = quote_arg(audio_out or (ROOT_DIR / "outputs" / f"{pdf_path.stem}-sample-smoke.wav"))
tts_sample_out = quote_arg(external_tts_out_dir or (ROOT_DIR / "outputs" / "external-tts-sample"))
ocr_sample_out = quote_arg(external_ocr_out_dir or (ROOT_DIR / "outputs" / "external-ocr-sample"))
return {
"dryRunRecommended": f"python scripts\\dry_run_pdf.py {pdf} {settings_arg}",
"audioSmokeRecommended": (
f"python scripts\\verify_pipeline.py --pdf {pdf} {settings_arg} "
f"--voice-id {voice_id} --max-speech-chars {audio_max_chars} --out {smoke_out}"
),
"externalTtsSample": f"python scripts\\export_tts_sample.py {pdf} {settings_arg} --out-dir {tts_sample_out}",
"externalOcrSample": f"python scripts\\export_ocr_sample_images.py {pdf} --out-dir {ocr_sample_out}",
"fullPipelineRecommended": (
f"python scripts\\verify_pipeline.py --pdf {pdf} {settings_arg} "
f"--voice-id {voice_id} --out outputs\\full-book-smoke.wav"
),
}
def seconds_label(seconds: float | None) -> str:
if seconds is None:
return "unknown"
if seconds < 60:
return f"{round(seconds, 1)} seconds"
minutes = seconds / 60
if minutes < 60:
return f"{round(minutes, 1)} minutes"
return f"{round(minutes / 60, 1)} hours"
def estimate_full_book(
total_pages: int,
sample_page_count: int,
selected: dict[str, Any],
dry_run: dict[str, Any],
audio_smoke: dict[str, Any] | None = None,
) -> dict[str, Any]:
sample_page_count = max(sample_page_count, 1)
pages_multiplier = total_pages / sample_page_count if total_pages else 0
ocr_seconds_per_page = float(selected.get("seconds") or 0) / sample_page_count
speech_chars_per_page = float(dry_run.get("speechCharacters") or 0) / sample_page_count
estimated_ocr_seconds = round(ocr_seconds_per_page * total_pages, 2) if total_pages else None
estimated_speech_chars = int(round(speech_chars_per_page * total_pages)) if total_pages else None
tts_seconds_per_char = None
estimated_tts_seconds = None
if audio_smoke:
audio_chars = int(audio_smoke.get("audioSpeechCharacters") or 0)
elapsed = float(audio_smoke.get("elapsedSeconds") or 0)
if audio_chars > 0 and elapsed > 0 and estimated_speech_chars is not None:
tts_seconds_per_char = elapsed / audio_chars
estimated_tts_seconds = round(tts_seconds_per_char * estimated_speech_chars, 2)
estimated_total_seconds = None
if estimated_ocr_seconds is not None:
estimated_total_seconds = estimated_ocr_seconds + (estimated_tts_seconds or 0)
return {
"basis": "sample",
"totalPages": total_pages,
"samplePages": sample_page_count,
"pagesMultiplier": round(pages_multiplier, 2),
"ocrSecondsPerPage": round(ocr_seconds_per_page, 2),
"estimatedOcrSeconds": estimated_ocr_seconds,
"estimatedOcrTime": seconds_label(estimated_ocr_seconds),
"speechCharactersPerPage": round(speech_chars_per_page, 2),
"estimatedSpeechCharacters": estimated_speech_chars,
"ttsSecondsPerCharacter": round(tts_seconds_per_char, 5) if tts_seconds_per_char is not None else None,
"estimatedTtsSeconds": estimated_tts_seconds,
"estimatedTtsTime": seconds_label(estimated_tts_seconds),
"estimatedTotalSeconds": round(estimated_total_seconds, 2) if estimated_total_seconds is not None else None,
"estimatedTotalTime": seconds_label(estimated_total_seconds),
"note": "Estimate is based on selected sample pages; dense or scanned pages can vary a lot.",
}
def build_next_steps(result: dict[str, Any]) -> list[str]:
steps: list[str] = []
dry_run = result.get("dryRun") or {}
estimate = result.get("estimateFullBook") or {}
env = result.get("recommendedEnvText") or ""
audio = result.get("audioSmoke")
if not dry_run.get("readyForTts"):
steps.append("OCR text is not ready for TTS. Try more sample pages, another OCR engine, or higher render zoom before creating audio.")
return steps
if dry_run.get("quality") == "warning":
reasons = "; ".join(dry_run.get("qualityReasons") or [])
detail = f" Warning reasons: {reasons}." if reasons else ""
steps.append(f"OCR is usable but should be checked before full-book TTS.{detail}")
else:
steps.append("OCR quality is usable for TTS on the selected sample.")
if env:
steps.append(f"Apply these OCR settings for the full book: {env}.")
if audio:
steps.append("Listen to the audio smoke file before processing the full book.")
else:
steps.append("Run again with --verify-audio to check pronunciation before processing the full book.")
voice_benchmark = result.get("voiceBenchmark") or {}
if voice_benchmark.get("ready"):
steps.append("Listen to the voice benchmark files and choose the most natural Arabic voice before the full-book run.")
elif result.get("voiceBenchmarkRequested"):
steps.append("Voice benchmarking did not produce audio. Run scripts/preflight_check.py and install the missing local voice setup.")
commands = result.get("commands") or {}
if commands.get("dryRunRecommended"):
steps.append(f"Recommended dry run command: {commands['dryRunRecommended']}")
if commands.get("externalTtsSample"):
steps.append(f"External voice comparison sample: {commands['externalTtsSample']}")
if commands.get("externalOcrSample"):
steps.append(f"External OCR model image sample: {commands['externalOcrSample']}")
total_seconds = estimate.get("estimatedTotalSeconds")
tts_seconds = estimate.get("estimatedTtsSeconds")
if isinstance(total_seconds, (int, float)) and total_seconds >= 3600:
steps.append("Estimated runtime is long. Use the Docker worker or an always-on computer, and process a small sample first.")
elif isinstance(total_seconds, (int, float)) and total_seconds >= 600:
steps.append("Estimated runtime is more than a few minutes. Keep the browser open or use the worker path for the full book.")
if isinstance(tts_seconds, (int, float)) and result.get("smokeVoiceId") == "silma-local" and tts_seconds >= 1800:
steps.append("SILMA sounds better but may be slow for the full book. Use --voice-id espeak-ar-clear for a faster fallback smoke test.")
return steps
def resolve_smoke_voice(voice_id: str | None = None) -> str:
if voice_id and voice_id != "auto":
return voice_id
if main.find_silma_python() is not None or main.importlib.util.find_spec("silma_tts") is not None:
return "silma-local"
if main.find_habibi_python() is not None:
return "habibi-msa"
if main.find_supertonic_python() is not None or main.importlib.util.find_spec("supertonic") is not None:
return "supertonic-ar"
if main.find_espeak_ng() is not None:
return "espeak-ar-clear"
return "silma-local"
def write_env_snippet(path: Path, result: dict[str, Any]) -> None:
env = result.get("recommendedEnv") or {}
path.parent.mkdir(parents=True, exist_ok=True)
lines = [
"# Arabic PDF Reader OCR settings",
"# Generated by scripts/prepare_book_workflow.py",
f"# Source PDF: {result.get('pdf', '')}",
f"# Sample PDF: {result.get('sample', {}).get('output', '')}",
"",
]
if not env:
lines.append("# No OCR settings were needed for this sample.")
else:
for key in sorted(env):
lines.append(f"{key}={env[key]}")
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def markdown_value(value: Any) -> str:
if value is None or value == "":
return "-"
if isinstance(value, bool):
return "yes" if value else "no"
return str(value)
def fenced_block(language: str, text: str) -> list[str]:
return [f"```{language}", text.strip() or "-", "```"]
def write_markdown_report(path: Path, result: dict[str, Any]) -> None:
selected = result.get("selected") or {}
sample = result.get("sample") or {}
dry_run = result.get("dryRun") or {}
audio = result.get("audioSmoke") or {}
voice_benchmark = result.get("voiceBenchmark") or {}
estimate = result.get("estimateFullBook") or {}
commands = result.get("commands") or {}
next_steps = result.get("nextSteps") or []
selected_pages = ", ".join(str(page) for page in sample.get("pages", [])) or "-"
command_text = "\n".join(command for command in commands.values() if command)
benchmark_lines = [
"| Engine | Quality | Score | Arabic words | Fragment ratio | Extraction | Notes |",
"| --- | --- | --- | --- | --- | --- | --- |",
]
for item in result.get("benchmark") or []:
notes = "; ".join(item.get("qualityReasons") or [])
if not item.get("ok"):
notes = item.get("error") or "failed"
benchmark_lines.append(
"| "
+ " | ".join(
[
markdown_value(item.get("engine")),
markdown_value(item.get("quality") if item.get("ok") else "failed"),
markdown_value(item.get("qualityScore")),
markdown_value(item.get("arabicWords")),
markdown_value(item.get("fragmentLineRatio")),
markdown_value(item.get("extraction")),
markdown_value(notes),
]
)
+ " |"
)
lines = [
"# Arabic Audio Preparation Report",
"",
"## Book",
"",
f"- PDF: {markdown_value(result.get('pdf'))}",
f"- Total pages: {markdown_value(result.get('totalPages'))}",
f"- Sample PDF: {markdown_value(sample.get('output'))}",
f"- Sample pages: {selected_pages}",
f"- Engine preset: {markdown_value(result.get('enginePreset'))}",
"",
"## Benchmark Results",
"",
*benchmark_lines,
"",
"## Selected OCR",
"",
f"- Extraction: {markdown_value(selected.get('extraction'))}",
f"- Engine: {markdown_value(selected.get('engine'))}",
f"- Quality score: {markdown_value(selected.get('qualityScore'))}",
f"- Arabic words: {markdown_value(selected.get('arabicWords'))}",
f"- Sample OCR time: {markdown_value(selected.get('seconds'))} seconds",
"",
"## Recommended OCR Settings",
"",
*fenced_block("text", result.get("recommendedEnvText") or "No OCR settings were needed for this sample."),
"",
"## Dry Run",
"",
f"- Quality: {markdown_value(dry_run.get('quality'))}",
f"- Quality reasons: {markdown_value('; '.join(dry_run.get('qualityReasons') or []))}",
f"- Ready for TTS: {markdown_value(dry_run.get('readyForTts'))}",
f"- Speech characters: {markdown_value(dry_run.get('speechCharacters'))}",
f"- One-letter Arabic word ratio: {markdown_value(dry_run.get('singleArabicWordRatio'))}",
f"- Low-information line ratio: {markdown_value(dry_run.get('fragmentLineRatio'))}",
f"- Chunks: {markdown_value(dry_run.get('chunks'))}",
f"- Extraction: {markdown_value(dry_run.get('extraction'))}",
"",
]
if audio:
lines.extend(
[
"## Audio Smoke",
"",
f"- Voice: {markdown_value(result.get('smokeVoiceId') or audio.get('voiceId'))}",
f"- Engine: {markdown_value(audio.get('engine'))}",
f"- Speech characters synthesized: {markdown_value(audio.get('audioSpeechCharacters'))}",
f"- Audio seconds: {markdown_value(audio.get('seconds'))}",
f"- Output: {markdown_value(audio.get('path'))}",
"",
]
)
else:
lines.extend(["## Audio Smoke", "", "- Not run. Use `--verify-audio` to create a short pronunciation sample.", ""])
if voice_benchmark:
lines.extend(
[
"## Voice Benchmark",
"",
f"- Output directory: {markdown_value(voice_benchmark.get('outputDir'))}",
f"- Text characters: {markdown_value(voice_benchmark.get('textCharacters'))}",
f"- Audio format: {markdown_value(voice_benchmark.get('audioFormat'))}",
f"- Recommended starting voice: {markdown_value((voice_benchmark.get('recommended') or {}).get('voiceId'))}",
f"- Fastest successful voice: {markdown_value((voice_benchmark.get('fastest') or {}).get('voiceId'))}",
"",
"| Voice | Label | Engine | Status | Time | Audio | Notes |",
"| --- | --- | --- | --- | ---: | --- | --- |",
]
)
for item in voice_benchmark.get("results", []):
status = "ok" if item.get("ok") else "failed"
elapsed = item.get("elapsedSeconds", "-")
audio_path = item.get("path", "-")
notes = item.get("error", "")
lines.append(
f"| {markdown_value(item.get('voiceId'))} | {markdown_value(item.get('label'))} | "
f"{markdown_value(item.get('engine'))} | {status} | {elapsed} | {audio_path} | {notes} |"
)
lines.append("")
elif result.get("voiceBenchmarkRequested"):
lines.extend(["## Voice Benchmark", "", "- Not run because no usable cleaned OCR sample was available.", ""])
lines.extend(
[
"## Full Book Estimate",
"",
f"- Estimated OCR time: {markdown_value(estimate.get('estimatedOcrTime'))}",
f"- Estimated TTS time: {markdown_value(estimate.get('estimatedTtsTime'))}",
f"- Estimated total time: {markdown_value(estimate.get('estimatedTotalTime'))}",
f"- Estimated speech characters: {markdown_value(estimate.get('estimatedSpeechCharacters'))}",
f"- Basis: {markdown_value(estimate.get('basis'))}",
"",
"> Estimates are based on the selected sample pages. Dense scanned pages, marginal scans, and different fonts can change runtime and quality.",
"",
"## Commands",
"",
*fenced_block("powershell", command_text),
"",
"## Next Steps",
"",
]
)
if next_steps:
lines.extend(f"- {step}" for step in next_steps)
else:
lines.append("- No next steps were generated.")
lines.append("")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("\n".join(lines), encoding="utf-8")
def prepare_book_workflow(
pdf_path: Path,
sample_pages: int = 1,
skip_first: int = 0,
engines: list[str] | None = None,
engine_preset: str = "balanced",
chunk_size: int = 900,
verify_audio: bool = False,
voice_id: str | None = "auto",
audio_out: Path | None = None,
audio_max_chars: int = 1200,
benchmark_voices: bool = False,
voice_ids: list[str] | None = None,
voice_benchmark_out_dir: Path | None = None,
voice_benchmark_format: str = "wav",
voice_benchmark_max_chars: int | None = None,
) -> dict[str, Any]:
if not pdf_path.exists():
raise FileNotFoundError(f"PDF not found: {pdf_path}")
if pdf_path.suffix.lower() != ".pdf":
raise ValueError("Input must be a PDF file.")
with fitz.open(pdf_path) as document:
total_pages = document.page_count
if engine_preset not in ENGINE_PRESETS:
raise ValueError(f"Unknown engine preset: {engine_preset}")
engines = engines or ENGINE_PRESETS[engine_preset]
sample_pdf = Path(tempfile.gettempdir()) / f"{pdf_path.stem}-arabic-audio-sample-{sample_pages}.pdf"
sample_info = build_test_pdf(pdf_path, sample_pdf, count=sample_pages, skip_first=skip_first)
benchmark_results = [benchmark_engine(sample_pdf, engine) for engine in engines]
best = choose_best_result(benchmark_results)
if best is None:
return {
"pdf": str(pdf_path),
"sample": sample_info,
"benchmark": benchmark_results,
"ready": False,
"error": "No OCR engine produced usable Arabic text on the sample.",
}
extraction = str(best.get("extraction") or "")
recommendation = best.get("recommendation")
speech_sample_chars = voice_benchmark_max_chars if voice_benchmark_max_chars is not None else audio_max_chars
dry_run = dry_run_pdf(
sample_pdf,
chunk_size=chunk_size,
from_extraction=extraction,
speech_sample_chars=speech_sample_chars,
)
audio_result = None
voice_benchmark_result = None
resolved_voice_id = resolve_smoke_voice(voice_id)
if verify_audio:
output = audio_out or (ROOT_DIR / "outputs" / f"{pdf_path.stem}-sample-smoke.wav")
audio_result = verify_pipeline(
sample_pdf,
resolved_voice_id,
output,
from_extraction=extraction,
max_speech_chars=audio_max_chars,
)
if benchmark_voices and dry_run.get("readyForTts"):
sample_text = str(dry_run.get("speechSampleText") or dry_run.get("speechPreview") or "").strip()
if sample_text:
output_dir = voice_benchmark_out_dir or (ROOT_DIR / "outputs" / f"{pdf_path.stem}-voice-benchmark")
selected_voices = voice_ids or DEFAULT_VOICES
voice_benchmark_result = benchmark_voice_set(
voices=selected_voices,
text=sample_text,
output_dir=output_dir,
audio_format=voice_benchmark_format,
)
estimate = estimate_full_book(
total_pages=total_pages,
sample_page_count=len(sample_info.get("pages", [])) or sample_pages,
selected=best,
dry_run=dry_run,
audio_smoke=audio_result,
)
commands = build_commands(
pdf_path=pdf_path,
extraction=extraction,
voice_id=resolved_voice_id,
audio_max_chars=audio_max_chars,
audio_out=audio_out,
)
result = {
"pdf": str(pdf_path),
"totalPages": total_pages,
"sample": sample_info,
"benchmark": benchmark_results,
"enginePreset": engine_preset if engines == ENGINE_PRESETS[engine_preset] else "custom",
"selected": best,
"recommendation": recommendation,
"recommendedEnv": recommendation.get("env", {}) if recommendation else {},
"recommendedEnvText": env_text(recommendation.get("env", {})) if recommendation else "",
"dryRun": dry_run,
"audioSmoke": audio_result,
"smokeVoiceId": resolved_voice_id,
"voiceBenchmark": voice_benchmark_result,
"voiceBenchmarkRequested": benchmark_voices,
"estimateFullBook": estimate,
"commands": commands,
"ready": bool(dry_run.get("readyForTts") and (audio_result is not None if verify_audio else True)),
}
result["nextSteps"] = build_next_steps(result)
return result
def print_summary(result: dict[str, Any]) -> None:
selected = result.get("selected") or {}
recommendation = result.get("recommendation") or {}
dry_run = result.get("dryRun") or {}
print("Arabic book preparation")
print(f"Sample: {result.get('sample', {}).get('output', '-')}")
print(f"Selected OCR: {selected.get('extraction', '-')} score={selected.get('qualityScore', '-')}")
if recommendation:
print(f"Full-book settings: {recommendation.get('summary')}")
print(
f"Dry run: quality={dry_run.get('quality', '-')} readyForTts={dry_run.get('readyForTts', False)} "
f"speechChars={dry_run.get('speechCharacters', 0)}"
)
audio = result.get("audioSmoke")
if audio:
print(f"Audio smoke: {audio.get('path')} {audio.get('seconds')}s {audio.get('bytes')} bytes")
voice_benchmark = result.get("voiceBenchmark") or {}
if voice_benchmark:
successful = [item for item in voice_benchmark.get("results", []) if item.get("ok")]
print(f"Voice benchmark: {len(successful)}/{len(voice_benchmark.get('results', []))} voices wrote to {voice_benchmark.get('outputDir')}")
estimate = result.get("estimateFullBook") or {}
if estimate:
print(
f"Estimate: OCR {estimate.get('estimatedOcrTime')} "
f"TTS {estimate.get('estimatedTtsTime')} total {estimate.get('estimatedTotalTime')}"
)
steps = result.get("nextSteps") or []
if steps:
print("Next steps:")
for step in steps:
print(f"- {step}")
print(f"Ready: {'yes' if result.get('ready') else 'no'}")
def main_cli() -> None:
if hasattr(sys.stdout, "reconfigure"):
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
parser = argparse.ArgumentParser(description="Benchmark, dry-run, and optionally audio-smoke an Arabic PDF sample.")
parser.add_argument("pdf", type=Path, help="Arabic PDF to prepare")
parser.add_argument("--sample-pages", type=int, default=1, help="Number of informative pages to sample.")
parser.add_argument("--skip-first", type=int, default=0, help="Ignore the first N pages when selecting sample pages.")
parser.add_argument(
"--engine-preset",
choices=sorted(ENGINE_PRESETS),
default="balanced",
help="OCR engine preset to use when --engines is not provided. balanced is the recommended free Arabic-trained stack.",
)
parser.add_argument(
"--engines",
nargs="+",
default=None,
choices=[
"arabic",
"arabic-max",
"qari-ocr",
"tawkeed-ocr",
"katib-ocr",
"arabic-qwen-ocr",
"arabic-glm-ocr",
"baseer-ocr",
"easyocr",
"paddleocr",
"paddleocr-vl",
"surya",
"tesseract",
"auto",
"best",
],
help="OCR engines to benchmark on the sample.",
)
parser.add_argument("--chunk-size", type=int, default=900, help="Dry-run chunk size.")
parser.add_argument("--verify-audio", action="store_true", help="Also create a short audio smoke test from the sample.")
parser.add_argument("--voice-id", default="auto", help="Local voice id for --verify-audio. Use auto to prefer SILMA when installed.")
parser.add_argument("--audio-out", type=Path, help="Audio output path for --verify-audio.")
parser.add_argument(
"--audio-max-chars",
type=int,
default=1200,
help="Maximum cleaned characters to synthesize for --verify-audio.",
)
parser.add_argument(
"--benchmark-voices",
action="store_true",
help="Compare local voices using the cleaned OCR sample text.",
)
parser.add_argument(
"--voices",
nargs="+",
choices=list(main.LOCAL_VOICES),
help="Voice ids to compare with --benchmark-voices.",
)
parser.add_argument(
"--voice-benchmark-out-dir",
type=Path,
help="Output directory for --benchmark-voices audio files.",
)
parser.add_argument(
"--voice-benchmark-format",
choices=["wav", "mp3"],
default="wav",
help="Audio format for --benchmark-voices.",
)
parser.add_argument(
"--voice-benchmark-max-chars",
type=int,
help="Maximum cleaned OCR characters to use for --benchmark-voices. Defaults to --audio-max-chars.",
)
parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact summary.")
parser.add_argument(
"--write-env",
type=Path,
help="Write recommended OCR settings to a small .env snippet. Secrets are never written.",
)
parser.add_argument(
"--write-report",
type=Path,
help="Write a readable Markdown report with OCR settings, estimates, commands, and next steps.",
)
args = parser.parse_args()
result = prepare_book_workflow(
args.pdf,
sample_pages=args.sample_pages,
skip_first=args.skip_first,
engines=args.engines,
engine_preset=args.engine_preset,
chunk_size=args.chunk_size,
verify_audio=args.verify_audio,
voice_id=args.voice_id,
audio_out=args.audio_out,
audio_max_chars=args.audio_max_chars,
benchmark_voices=args.benchmark_voices,
voice_ids=args.voices,
voice_benchmark_out_dir=args.voice_benchmark_out_dir,
voice_benchmark_format=args.voice_benchmark_format,
voice_benchmark_max_chars=args.voice_benchmark_max_chars,
)
if args.write_env:
write_env_snippet(args.write_env, result)
result["writtenEnv"] = str(args.write_env)
result["commands"] = build_commands(
pdf_path=args.pdf,
extraction=str(result.get("selected", {}).get("extraction") or ""),
voice_id=str(result.get("smokeVoiceId") or args.voice_id),
audio_max_chars=args.audio_max_chars,
audio_out=args.audio_out,
env_file=args.write_env,
)
result["nextSteps"] = build_next_steps(result)
if args.write_report:
write_markdown_report(args.write_report, result)
result["writtenReport"] = str(args.write_report)
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print_summary(result)
if not result.get("ready"):
raise SystemExit(1)
if __name__ == "__main__":
main_cli()