from __future__ import annotations import argparse import json import secrets import sys from dataclasses import asdict, dataclass from pathlib import Path ROOT_DIR = Path(__file__).resolve().parent.parent if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from scripts.next_deployment_step import deployment_url_warnings DEFAULT_OUT = ROOT_DIR / "outputs" / "deployment-handoff.md" def normalize_origin(value: str) -> str: return value.rstrip("/") def generate_secret_key() -> str: return secrets.token_urlsafe(48) @dataclass class DeploymentHandoff: worker_url: str vercel_origin: str access_code: str urls_look_real: bool warnings: list[str] recommended_build_args: dict[str, str] maximum_quality_build_args: dict[str, str] worker_secrets: dict[str, str] vercel_env: dict[str, str] commands: dict[str, str] vercel_cli_commands: list[str] vercel_cleanup_commands: list[str] huggingface_cli_commands: list[str] def build_handoff( worker_url: str, vercel_origin: str, access_code: str = "1234", secret_key: str | None = None, ) -> DeploymentHandoff: worker_url = normalize_origin(worker_url) vercel_origin = normalize_origin(vercel_origin) secret_key = secret_key or generate_secret_key() warnings = deployment_url_warnings(worker_url, vercel_origin) urls_look_real = not warnings recommended_build_args = { "INSTALL_TAWKEED_OCR": "1", "INSTALL_KATIB_OCR": "1", "INSTALL_ARABIC_QWEN_OCR": "1", "INSTALL_ARABIC_GLM_OCR": "1", "INSTALL_BASEER_OCR": "1", "INSTALL_SUPERTONIC": "1", } maximum_quality_build_args = { **recommended_build_args, "INSTALL_QARI_OCR": "1", "INSTALL_PADDLEOCR_VL": "1", } worker_secrets = { "ACCESS_CODE": access_code, "SECRET_KEY": secret_key, "CORS_ORIGINS": vercel_origin, "COOKIE_SAMESITE": "none", "COOKIE_SECURE": "1", "OCR_ENGINE": "tesseract", "OCR_RENDER_ZOOM": "2", "TESSERACT_PSM": "4", "DEFAULT_VOICE_ID": "silma-local", "OUTPUT_RETENTION_DAYS": "7", "OUTPUT_MAX_FILES": "25", "AUDIO_FORMAT": "mp3", "MP3_BITRATE": "96k", "SILMA_ENABLE_NORMALIZER": "0", "SILMA_FORCE_TASHKEEL": "0", "SILMA_NORMALIZE_NUMBERS": "0", } vercel_env = { "ACCESS_CODE": access_code, "SECRET_KEY": secret_key, "WORKER_BASE_URL": worker_url, } commands = { "localReadiness": "python scripts\\prove_local_readiness.py --refresh-research", "refreshResearch": "python scripts\\refresh_research_evidence.py", "licensePolicy": "python scripts\\research_watchlist.py --check-license-policy --json", "researchSources": ( "python scripts\\check_research_sources.py " "--check-hf-metadata --write-hf-metadata-report outputs\\hf-model-metadata.md" ), "exportWorker": "python scripts\\export_hf_space.py --force", "prepareDeployment": ( f"python scripts\\prepare_live_deployment.py --worker-url {worker_url} " f"--origin {vercel_origin} --code {access_code}" ), "validateEnv": ( "python scripts\\validate_deployment_env.py " "--vercel-env outputs\\vercel-production.env " "--worker-env outputs\\worker-secrets.env " f"--worker-url {worker_url} --origin {vercel_origin}" ), "deploymentStatus": ( f"python scripts\\deployment_status.py --worker-url {worker_url} " f"--origin {vercel_origin} --code {access_code}" ), "diagnoseVercelWorker": ( f"python scripts\\hosted_preflight.py {vercel_origin} " f"--code {access_code} --worker-url {worker_url}" ), "configureVercelWorker": ( f"python scripts\\configure_vercel_worker.py {worker_url} " f"--site-url {vercel_origin} --code {access_code} --verify" ), "verifyLive": ( f"python scripts\\prove_live_deployment.py {worker_url} " f"--origin {vercel_origin} --code {access_code} " "--smoke-ocr-engine arabic " "--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md " "--proof-out outputs\\live-deployment-proof.json" ), "finalAudit": ( "python scripts\\audit_goal_readiness.py " "--worker-report outputs\\worker-verification.json " "--site-report outputs\\site-verification.json" ), } vercel_cli_commands = [ "npm i -g vercel", "vercel login", "vercel link --yes", f'cmd /c "echo {access_code}| vercel env add ACCESS_CODE production"', f'cmd /c "echo {secret_key}| vercel env add SECRET_KEY production"', f'cmd /c "echo {worker_url}| vercel env add WORKER_BASE_URL production"', f"python scripts\\configure_vercel_worker.py {worker_url} --site-url {vercel_origin} --code {access_code} --verify", "vercel --prod --yes", ] vercel_cleanup_commands = [ "vercel env rm ENABLE_DIRECT_CLOUD_TTS production --yes", "vercel env rm HF_API_TOKEN production --yes", "vercel env rm HF_TTS_MODEL production --yes", "vercel env rm DEFAULT_VOICE_ID production --yes", ] huggingface_cli_commands = [ "python -m pip install -U huggingface_hub", "python scripts\\export_hf_space.py --force", "python scripts\\deploy_hf_space.py / --bundle-dir outputs/huggingface-space --json", ] return DeploymentHandoff( worker_url=worker_url, vercel_origin=vercel_origin, access_code=access_code, urls_look_real=urls_look_real, warnings=warnings, recommended_build_args=recommended_build_args, maximum_quality_build_args=maximum_quality_build_args, worker_secrets=worker_secrets, vercel_env=vercel_env, commands=commands, vercel_cli_commands=vercel_cli_commands, vercel_cleanup_commands=vercel_cleanup_commands, huggingface_cli_commands=huggingface_cli_commands, ) def format_env_table(values: dict[str, str]) -> str: lines = ["| Name | Value |", "| --- | --- |"] lines.extend(f"| `{key}` | `{value}` |" for key, value in values.items()) return "\n".join(lines) def format_key_value_block(values: dict[str, str]) -> str: return "\n".join(f"{key}={value}" for key, value in values.items()) def write_markdown(path: Path, handoff: DeploymentHandoff) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text( "\n".join( [ "# Arabic Audio Reader Deployment Handoff", "", "Use this after the Hugging Face worker URL and Vercel production URL exist.", "", "## URLs", "", f"- Worker: {handoff.worker_url}", f"- Vercel site: {handoff.vercel_origin}", f"- URLs look real: {'yes' if handoff.urls_look_real else 'no'}", "", *( [ "## URL Warnings", "", *[f"- {warning}" for warning in handoff.warnings], "", "Replace placeholder/local/test URLs before running live proof. The final audit will not treat placeholder reports as completed deployment evidence.", "", ] if handoff.warnings else [] ), "## Hugging Face Space Secrets", "", format_env_table(handoff.worker_secrets), "", "Copy/paste secret values:", "", "```text", format_key_value_block(handoff.worker_secrets), "```", "", "## Safety Checklist", "", "- Set the same `ACCESS_CODE` and `SECRET_KEY` on both Hugging Face and Vercel.", "- Set Hugging Face `CORS_ORIGINS` to the exact Vercel production origin shown above.", "- Set Vercel `WORKER_BASE_URL` to the exact Hugging Face worker URL shown above.", "- After both deployments finish, run the Vercel worker diagnostic command below before uploading a large PDF. It must show `site worker reachable from vercel` and `site worker CORS ready`.", "- Remove Vercel's temporary direct Hugging Face TTS fallback variables for production: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.", "- Keep `OCR_ENGINE=tesseract`, `OCR_RENDER_ZOOM=2`, and `TESSERACT_PSM=4` for normal scanned Arabic books; use `arabic-max` only when a short sample needs the slower maximum comparison.", "- Do not commit this handoff; it contains the deployment `SECRET_KEY`.", "", "## Hugging Face Docker Build Args", "", "Balanced Arabic OCR worker for the first strong-worker deployment:", "", format_env_table(handoff.recommended_build_args), "", "Copy/paste balanced build args:", "", "```text", format_key_value_block(handoff.recommended_build_args), "```", "", "Maximum quality worker for a GPU or larger paid/owned worker after a 5-page benchmark proves the heavier models help:", "", format_env_table(handoff.maximum_quality_build_args), "", "Copy/paste maximum-quality build args:", "", "```text", format_key_value_block(handoff.maximum_quality_build_args), "```", "", "## Vercel Environment Variables", "", format_env_table(handoff.vercel_env), "", "Copy/paste Vercel values:", "", "```text", format_key_value_block(handoff.vercel_env), "```", "", "## Deploy Worker With Hugging Face CLI", "", "Create a Hugging Face Space with SDK **Docker**, then run these from the repo root. Replace `/` with the Space repo id:", "", "```powershell", *handoff.huggingface_cli_commands, "```", "", "Set the Hugging Face Space secrets and Docker build args from the tables above in the Space settings before the final smoke test.", "The `SECRET_KEY` value in this handoff is generated for this deployment; keep it private and do not commit the handoff output.", "", "## Deploy Vercel Site With CLI", "", "Run these from the repo root after the worker URL is known:", "", "```powershell", *handoff.vercel_cli_commands, "```", "", "If you previously tested direct Hugging Face cloud TTS on Vercel, remove those temporary variables before or after setting `WORKER_BASE_URL`. It is normal if a remove command says the variable does not exist:", "", "```powershell", *handoff.vercel_cleanup_commands, "vercel --prod --yes", "```", "", "## Commands", "", "Run these from the repo root:", "", "```powershell", handoff.commands["localReadiness"], handoff.commands["refreshResearch"], handoff.commands["licensePolicy"], handoff.commands["researchSources"], handoff.commands["exportWorker"], handoff.commands["prepareDeployment"], handoff.commands["validateEnv"], handoff.commands["deploymentStatus"], handoff.commands["diagnoseVercelWorker"], handoff.commands["verifyLive"], handoff.commands["finalAudit"], "```", "", "The hosted preflight command checks the hosted site's `/api/worker-diagnostics` endpoint and writes `outputs\\hosted-preflight.json`. If it says `cors-blocked`, set Hugging Face `CORS_ORIGINS` to the exact Vercel production URL, keep `COOKIE_SAMESITE=none` and `COOKIE_SECURE=1`, restart the Space, redeploy Vercel, and run the preflight again.", "", "The goal audit is complete only after `outputs\\site-verification.json` proves the Vercel shell login, worker routing, large-PDF readiness, hosted provider limits, recommended stack, disabled direct cloud fallback, worker diagnostics reachability, and worker CORS readiness; `outputs\\worker-verification.json` proves the worker recommended stack plus embedded-text and scanned-OCR smoke jobs, usable extracted text, scanned OCR extraction, audio URLs, download URLs, audio bytes, download bytes, and audio file signatures; and `outputs\\live-deployment-proof.json` records `complete: true`.", "", ] ), encoding="utf-8", ) def main() -> None: parser = argparse.ArgumentParser(description="Create a deployment handoff file with exact worker/Vercel settings and proof commands.") parser.add_argument("worker_url", help="Worker URL, for example https://your-space.hf.space") parser.add_argument("--origin", required=True, help="Vercel production origin, for example https://your-app.vercel.app") parser.add_argument("--code", default="1234", help="Access code for the site and worker.") parser.add_argument("--secret-key", help="Optional fixed cookie-signing secret. Omit to generate a random deployment secret.") parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="Markdown handoff destination.") parser.add_argument("--json", action="store_true", help="Print JSON instead of a short summary.") args = parser.parse_args() handoff = build_handoff(args.worker_url, args.origin, args.code, args.secret_key) write_markdown(args.out, handoff) result = {"out": str(args.out), **asdict(handoff)} if args.json: print(json.dumps(result, ensure_ascii=False, indent=2)) else: print(f"Wrote deployment handoff to {args.out}") print(handoff.commands["verifyLive"]) if __name__ == "__main__": main()