arabic-audio-reader-worker / scripts /prepare_live_deployment.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
from dataclasses import asdict
from pathlib import Path
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts.deployment_handoff import DEFAULT_OUT as DEFAULT_HANDOFF_OUT
from scripts.deployment_handoff import format_key_value_block
from scripts.deployment_handoff import build_handoff, write_markdown
from scripts.deployment_status import build_status
from scripts.export_hf_space import DEFAULT_OUTPUT as DEFAULT_SPACE_OUT
from scripts.export_hf_space import export_hf_space, validate_export
from scripts.next_deployment_step import choose_next_step
from scripts.validate_deployment_env import summarize as summarize_env_checks
from scripts.validate_deployment_env import validate_deployment_env
DEFAULT_REPORT = ROOT_DIR / "outputs" / "deployment-prep.json"
DEFAULT_QUICKSTART = ROOT_DIR / "outputs" / "deployment-quickstart.md"
DEFAULT_VERCEL_ENV_OUT = ROOT_DIR / "outputs" / "vercel-production.env"
DEFAULT_WORKER_ENV_OUT = ROOT_DIR / "outputs" / "worker-secrets.env"
DEFAULT_VERCEL_COMMANDS_OUT = ROOT_DIR / "outputs" / "apply-vercel-env.ps1"
DEFAULT_WORKER_SETTINGS_OUT = ROOT_DIR / "outputs" / "worker-space-settings.md"
def prepare_live_deployment(
worker_url: str | None = None,
origin: str | None = None,
code: str = "1234",
secret_key: str | None = None,
*,
space_out: Path = DEFAULT_SPACE_OUT,
report_out: Path = DEFAULT_REPORT,
handoff_out: Path = DEFAULT_HANDOFF_OUT,
quickstart_out: Path = DEFAULT_QUICKSTART,
vercel_env_out: Path = DEFAULT_VERCEL_ENV_OUT,
worker_env_out: Path = DEFAULT_WORKER_ENV_OUT,
vercel_commands_out: Path = DEFAULT_VERCEL_COMMANDS_OUT,
worker_settings_out: Path = DEFAULT_WORKER_SETTINGS_OUT,
force: bool = True,
) -> dict[str, object]:
export_result = export_hf_space(space_out, force=force)
export_issues = validate_export(space_out)
handoff_path: str | None = None
verify_command: str | None = None
vercel_env_path: str | None = None
worker_env_path: str | None = None
vercel_commands_path: str | None = None
worker_settings_path: str | None = None
env_validation: dict[str, object] | None = None
if worker_url and origin:
handoff = build_handoff(worker_url, origin, code, secret_key)
write_markdown(handoff_out, handoff)
write_env_file(vercel_env_out, handoff.vercel_env)
write_env_file(worker_env_out, handoff.worker_secrets)
write_vercel_commands(
vercel_commands_out,
handoff.vercel_cleanup_commands,
handoff.vercel_cli_commands,
validate_command=handoff.commands["validateEnv"],
preflight_command=handoff.commands["diagnoseVercelWorker"],
)
write_worker_settings(worker_settings_out, handoff)
handoff_path = str(handoff_out)
vercel_env_path = str(vercel_env_out)
worker_env_path = str(worker_env_out)
vercel_commands_path = str(vercel_commands_out)
worker_settings_path = str(worker_settings_out)
verify_command = handoff.commands["verifyLive"]
env_validation = summarize_env_checks(
validate_deployment_env(
handoff.vercel_env,
handoff.worker_secrets,
expected_worker_url=handoff.worker_url,
expected_origin=handoff.vercel_origin,
)
)
next_step = choose_next_step(worker_url=worker_url, vercel_origin=origin, code=code)
status = build_status(worker_url=worker_url, vercel_origin=origin, code=code)
write_quickstart(
quickstart_out,
space_out=space_out,
handoff_out=Path(handoff_path) if handoff_path else handoff_out,
worker_url=worker_url,
origin=origin,
code=code,
worker_settings_out=Path(worker_settings_path) if worker_settings_path else worker_settings_out,
next_title=next_step.title,
next_detail=next_step.detail,
next_command=next_step.command,
)
report: dict[str, object] = {
"ready": not export_issues,
"spaceBundle": export_result,
"spaceIssues": export_issues,
"handoff": handoff_path,
"vercelEnv": vercel_env_path,
"workerEnv": worker_env_path,
"vercelCommands": vercel_commands_path,
"workerSettings": worker_settings_path,
"envValidation": env_validation,
"quickstart": str(quickstart_out),
"verifyCommand": verify_command,
"nextStep": asdict(next_step),
"deploymentStatus": asdict(status),
}
report_out.parent.mkdir(parents=True, exist_ok=True)
report_out.write_text(json.dumps(report, indent=2), encoding="utf-8")
return report
def write_env_file(path: Path, values: dict[str, str]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(format_key_value_block(values) + "\n", encoding="utf-8")
def write_vercel_commands(
path: Path,
cleanup_commands: list[str],
env_commands: list[str],
*,
validate_command: str | None = None,
preflight_command: str | None = None,
) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
lines = [
"# Generated by scripts\\prepare_live_deployment.py.",
"# This file contains deployment secrets. Keep it private and do not commit it.",
"# Production Vercel audio must use WORKER_BASE_URL; direct Hugging Face cloud TTS is only for short tests.",
"",
"# Remove temporary direct-cloud fallback values from production.",
*cleanup_commands,
"",
"# Add/update the production variables for the Vercel shell.",
*env_commands,
"",
"# Verify that Vercel points to the worker and does not have direct-cloud fallback variables enabled.",
*(["# " + validate_command] if validate_command else []),
"",
"# After Vercel redeploys and the worker is awake, verify browser-to-worker reachability and CORS.",
*(["# " + preflight_command] if preflight_command else []),
"",
]
path.write_text("\n".join(lines), encoding="utf-8")
def write_worker_settings(path: Path, handoff) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
"\n".join(
[
"# Hugging Face Worker Settings",
"",
"Generated by `scripts\\prepare_live_deployment.py`. This file contains deployment secrets, so keep it private and do not commit it.",
"",
"## Space Secrets",
"",
"Paste these into the Hugging Face Space **Settings > Secrets** page:",
"",
"```text",
format_key_value_block(handoff.worker_secrets),
"```",
"",
"## Balanced Docker Build Args",
"",
"Use these first for a strong Arabic OCR worker without immediately enabling the heaviest QARI/PaddleOCR-VL paths:",
"",
"```text",
format_key_value_block(handoff.recommended_build_args),
"```",
"",
"## Maximum Quality Docker Build Args",
"",
"Use these only on larger hardware after a 5-page benchmark shows the heavier models help this book:",
"",
"```text",
format_key_value_block(handoff.maximum_quality_build_args),
"```",
"",
"## Verify The Settings",
"",
"After setting the Space secrets/build args, rebuild or restart the Space, then validate the generated env files:",
"",
"```powershell",
handoff.commands["validateEnv"],
"```",
"",
"Then run hosted preflight before any real PDF upload:",
"",
"```powershell",
handoff.commands["diagnoseVercelWorker"],
"```",
"",
"Then run the live proof:",
"",
"```powershell",
handoff.commands["verifyLive"],
"```",
"",
]
),
encoding="utf-8",
)
def write_quickstart(
path: Path,
*,
space_out: Path,
handoff_out: Path,
worker_url: str | None,
origin: str | None,
code: str,
worker_settings_out: Path,
next_title: str,
next_detail: str,
next_command: str,
) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
worker_display = worker_url or "https://your-space.hf.space"
origin_display = origin or "https://your-vercel-app.vercel.app"
handoff_command = (
f"python scripts\\deployment_handoff.py {worker_display} "
f"--origin {origin_display} --code {code}"
)
hosted_preflight_command = (
f"python scripts\\hosted_preflight.py {origin_display} "
f"--code {code} --worker-url {worker_display}"
)
prepare_command = (
f"python scripts\\prepare_live_deployment.py --worker-url {worker_display} "
f"--origin {origin_display} --code {code}"
)
path.write_text(
"\n".join(
[
"# Arabic Audio Reader Deployment Quickstart",
"",
"This file is generated by `scripts\\prepare_live_deployment.py` so the live deployment steps stay in one place.",
"",
"## What This Prepared",
"",
f"- Hugging Face Docker Space bundle: `{space_out}`",
f"- Deployment handoff, after real URLs exist: `{handoff_out}`",
f"- Hugging Face worker settings sheet, after real URLs exist: `{worker_settings_out}`",
f"- Access code: `{code}`",
"",
"## Create The Free Worker",
"",
"Open: https://huggingface.co/new-space",
"Reference: https://huggingface.co/docs/hub/main/en/spaces-sdks-docker",
"",
"1. Create a Hugging Face Space.",
"2. Choose **Docker** as the Space SDK.",
f"3. Upload or push the generated folder: `{space_out}`",
"4. Use the secrets/build args from the generated worker settings sheet after you know the real Vercel URL.",
"",
"Free CPU Spaces are enough for proof and small samples. For a full 100 MB+ scanned book, expect slow jobs and cold starts unless you move the worker to stronger hardware.",
"",
"## Create The Vercel Site",
"",
"Open: https://vercel.com/new",
"Reference: https://vercel.com/docs/frameworks/backend/fastapi",
"",
"1. Import this GitHub repo into Vercel.",
"2. Use the default/Other preset; the repo already has `vercel.json` for the Python FastAPI entrypoint.",
"3. Set `ACCESS_CODE`, `SECRET_KEY`, and `WORKER_BASE_URL` from the deployment handoff.",
"4. Remove any temporary direct-cloud TTS variables from earlier tests: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.",
"5. Deploy to Production.",
"6. If generated env files exist, run `scripts\\validate_deployment_env.py` before the final deploy.",
"7. If `outputs\\apply-vercel-env.ps1` exists, it contains the generated Vercel env commands.",
"",
"## After Both URLs Exist",
"",
"Regenerate the exact secret/env/verification sheet with the real URLs:",
"",
"```powershell",
handoff_command,
"```",
"",
"Or refresh the worker bundle and handoff together:",
"",
"```powershell",
prepare_command,
"```",
"",
"Before uploading a large PDF, run hosted preflight from Vercel to the worker:",
"",
"```powershell",
hosted_preflight_command,
"```",
"",
"It writes `outputs\\hosted-preflight.json` and must pass `site worker reachable from vercel` and `site worker CORS ready`.",
"",
"## Safety Checklist",
"",
"- Validate generated env files with `python scripts\\validate_deployment_env.py --vercel-env outputs\\vercel-production.env --worker-env outputs\\worker-secrets.env --worker-url "
f"{worker_display} --origin {origin_display}`.",
"- Put the same generated `SECRET_KEY` in Hugging Face and Vercel.",
"- Put the same access code in Hugging Face and Vercel.",
"- Point Hugging Face `CORS_ORIGINS` at the exact Vercel production URL.",
"- Point Vercel `WORKER_BASE_URL` at the exact Hugging Face Space URL.",
"- Keep Vercel direct Hugging Face TTS disabled in production; downloadable audio should come from the worker.",
"- Run the live proof command before giving the site to anyone.",
"",
"## Current Next Command",
"",
f"**{next_title}**",
"",
next_detail,
"",
"```powershell",
next_command,
"```",
"",
"The project is not considered live-finished until `outputs\\live-deployment-proof.json` says `complete: true` and the final audit has no live-deployment warnings.",
"",
]
),
encoding="utf-8",
)
def main() -> None:
parser = argparse.ArgumentParser(description="Prepare the live Vercel plus worker deployment artifacts.")
parser.add_argument("--worker-url", help="Live worker URL, for example https://your-space.hf.space")
parser.add_argument("--origin", help="Live Vercel origin, for example https://your-app.vercel.app")
parser.add_argument("--code", default="1234", help="Access code for the site and worker.")
parser.add_argument("--secret-key", help="Optional fixed cookie-signing secret. Omit to generate one in the handoff.")
parser.add_argument("--space-out", type=Path, default=DEFAULT_SPACE_OUT, help="Destination for the Hugging Face Space bundle.")
parser.add_argument("--report-out", type=Path, default=DEFAULT_REPORT, help="JSON deployment prep report path.")
parser.add_argument("--handoff-out", type=Path, default=DEFAULT_HANDOFF_OUT, help="Markdown handoff path when URLs are provided.")
parser.add_argument("--quickstart-out", type=Path, default=DEFAULT_QUICKSTART, help="Markdown quickstart path.")
parser.add_argument("--vercel-env-out", type=Path, default=DEFAULT_VERCEL_ENV_OUT, help="Generated .env-style Vercel production variables.")
parser.add_argument("--worker-env-out", type=Path, default=DEFAULT_WORKER_ENV_OUT, help="Generated .env-style worker/Space secrets.")
parser.add_argument("--vercel-commands-out", type=Path, default=DEFAULT_VERCEL_COMMANDS_OUT, help="Generated private PowerShell commands for Vercel production env.")
parser.add_argument("--worker-settings-out", type=Path, default=DEFAULT_WORKER_SETTINGS_OUT, help="Generated private Hugging Face worker settings sheet.")
parser.add_argument("--no-force", action="store_true", help="Do not replace an existing Space bundle.")
parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact summary.")
args = parser.parse_args()
report = prepare_live_deployment(
args.worker_url,
args.origin,
args.code,
args.secret_key,
space_out=args.space_out,
report_out=args.report_out,
handoff_out=args.handoff_out,
quickstart_out=args.quickstart_out,
vercel_env_out=args.vercel_env_out,
worker_env_out=args.worker_env_out,
vercel_commands_out=args.vercel_commands_out,
worker_settings_out=args.worker_settings_out,
force=not args.no_force,
)
if args.json:
print(json.dumps(report, indent=2))
else:
print(f"Wrote deployment prep report to {args.report_out}")
print(f"Exported Hugging Face Space bundle to {report['spaceBundle']['outputDir']}")
print(f"Wrote deployment quickstart to {report['quickstart']}")
if report["handoff"]:
print(f"Wrote deployment handoff to {report['handoff']}")
print(f"Wrote Vercel env file to {report['vercelEnv']}")
print(f"Wrote worker env file to {report['workerEnv']}")
print(f"Wrote Vercel command file to {report['vercelCommands']}")
print(f"Wrote worker settings sheet to {report['workerSettings']}")
print(report["nextStep"]["command"])
if report["spaceIssues"]:
raise SystemExit(1)
if __name__ == "__main__":
main()