| from __future__ import annotations
|
|
|
| import argparse
|
| import json
|
| import secrets
|
| import sys
|
| from dataclasses import asdict, dataclass
|
| from pathlib import Path
|
|
|
|
|
| ROOT_DIR = Path(__file__).resolve().parent.parent
|
| if str(ROOT_DIR) not in sys.path:
|
| sys.path.insert(0, str(ROOT_DIR))
|
|
|
| from scripts.next_deployment_step import deployment_url_warnings
|
|
|
| DEFAULT_OUT = ROOT_DIR / "outputs" / "deployment-handoff.md"
|
|
|
|
|
| def normalize_origin(value: str) -> str:
|
| return value.rstrip("/")
|
|
|
|
|
| def generate_secret_key() -> str:
|
| return secrets.token_urlsafe(48)
|
|
|
|
|
| @dataclass
|
| class DeploymentHandoff:
|
| worker_url: str
|
| vercel_origin: str
|
| access_code: str
|
| urls_look_real: bool
|
| warnings: list[str]
|
| recommended_build_args: dict[str, str]
|
| maximum_quality_build_args: dict[str, str]
|
| worker_secrets: dict[str, str]
|
| vercel_env: dict[str, str]
|
| commands: dict[str, str]
|
| vercel_cli_commands: list[str]
|
| vercel_cleanup_commands: list[str]
|
| huggingface_cli_commands: list[str]
|
|
|
|
|
| def build_handoff(
|
| worker_url: str,
|
| vercel_origin: str,
|
| access_code: str = "1234",
|
| secret_key: str | None = None,
|
| ) -> DeploymentHandoff:
|
| worker_url = normalize_origin(worker_url)
|
| vercel_origin = normalize_origin(vercel_origin)
|
| secret_key = secret_key or generate_secret_key()
|
| warnings = deployment_url_warnings(worker_url, vercel_origin)
|
| urls_look_real = not warnings
|
| recommended_build_args = {
|
| "INSTALL_TAWKEED_OCR": "1",
|
| "INSTALL_KATIB_OCR": "1",
|
| "INSTALL_ARABIC_QWEN_OCR": "1",
|
| "INSTALL_ARABIC_GLM_OCR": "1",
|
| "INSTALL_BASEER_OCR": "1",
|
| "INSTALL_SUPERTONIC": "1",
|
| }
|
| maximum_quality_build_args = {
|
| **recommended_build_args,
|
| "INSTALL_QARI_OCR": "1",
|
| "INSTALL_PADDLEOCR_VL": "1",
|
| }
|
| worker_secrets = {
|
| "ACCESS_CODE": access_code,
|
| "SECRET_KEY": secret_key,
|
| "CORS_ORIGINS": vercel_origin,
|
| "COOKIE_SAMESITE": "none",
|
| "COOKIE_SECURE": "1",
|
| "OCR_ENGINE": "tesseract", |
| "OCR_RENDER_ZOOM": "2", |
| "TESSERACT_PSM": "4", |
| "DEFAULT_VOICE_ID": "silma-local",
|
| "OUTPUT_RETENTION_DAYS": "7",
|
| "OUTPUT_MAX_FILES": "25",
|
| "AUDIO_FORMAT": "mp3",
|
| "MP3_BITRATE": "96k",
|
| "SILMA_ENABLE_NORMALIZER": "0",
|
| "SILMA_FORCE_TASHKEEL": "0",
|
| "SILMA_NORMALIZE_NUMBERS": "0",
|
| }
|
| vercel_env = {
|
| "ACCESS_CODE": access_code,
|
| "SECRET_KEY": secret_key,
|
| "WORKER_BASE_URL": worker_url,
|
| }
|
| commands = {
|
| "localReadiness": "python scripts\\prove_local_readiness.py --refresh-research",
|
| "refreshResearch": "python scripts\\refresh_research_evidence.py",
|
| "licensePolicy": "python scripts\\research_watchlist.py --check-license-policy --json",
|
| "researchSources": (
|
| "python scripts\\check_research_sources.py "
|
| "--check-hf-metadata --write-hf-metadata-report outputs\\hf-model-metadata.md"
|
| ),
|
| "exportWorker": "python scripts\\export_hf_space.py --force",
|
| "prepareDeployment": (
|
| f"python scripts\\prepare_live_deployment.py --worker-url {worker_url} "
|
| f"--origin {vercel_origin} --code {access_code}"
|
| ),
|
| "validateEnv": (
|
| "python scripts\\validate_deployment_env.py "
|
| "--vercel-env outputs\\vercel-production.env "
|
| "--worker-env outputs\\worker-secrets.env "
|
| f"--worker-url {worker_url} --origin {vercel_origin}"
|
| ),
|
| "deploymentStatus": (
|
| f"python scripts\\deployment_status.py --worker-url {worker_url} "
|
| f"--origin {vercel_origin} --code {access_code}"
|
| ),
|
| "diagnoseVercelWorker": (
|
| f"python scripts\\hosted_preflight.py {vercel_origin} "
|
| f"--code {access_code} --worker-url {worker_url}"
|
| ),
|
| "configureVercelWorker": (
|
| f"python scripts\\configure_vercel_worker.py {worker_url} "
|
| f"--site-url {vercel_origin} --code {access_code} --verify"
|
| ),
|
| "verifyLive": (
|
| f"python scripts\\prove_live_deployment.py {worker_url} "
|
| f"--origin {vercel_origin} --code {access_code} "
|
| "--smoke-ocr-engine arabic "
|
| "--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md "
|
| "--proof-out outputs\\live-deployment-proof.json"
|
| ),
|
| "finalAudit": (
|
| "python scripts\\audit_goal_readiness.py "
|
| "--worker-report outputs\\worker-verification.json "
|
| "--site-report outputs\\site-verification.json"
|
| ),
|
| }
|
| vercel_cli_commands = [
|
| "npm i -g vercel",
|
| "vercel login",
|
| "vercel link --yes",
|
| f'cmd /c "echo {access_code}| vercel env add ACCESS_CODE production"',
|
| f'cmd /c "echo {secret_key}| vercel env add SECRET_KEY production"',
|
| f'cmd /c "echo {worker_url}| vercel env add WORKER_BASE_URL production"',
|
| f"python scripts\\configure_vercel_worker.py {worker_url} --site-url {vercel_origin} --code {access_code} --verify",
|
| "vercel --prod --yes",
|
| ]
|
| vercel_cleanup_commands = [
|
| "vercel env rm ENABLE_DIRECT_CLOUD_TTS production --yes",
|
| "vercel env rm HF_API_TOKEN production --yes",
|
| "vercel env rm HF_TTS_MODEL production --yes",
|
| "vercel env rm DEFAULT_VOICE_ID production --yes",
|
| ]
|
| huggingface_cli_commands = [
|
| "python -m pip install -U huggingface_hub",
|
| "python scripts\\export_hf_space.py --force",
|
| "python scripts\\deploy_hf_space.py <your-hf-username>/<your-space-name> --bundle-dir outputs/huggingface-space --json",
|
| ]
|
| return DeploymentHandoff(
|
| worker_url=worker_url,
|
| vercel_origin=vercel_origin,
|
| access_code=access_code,
|
| urls_look_real=urls_look_real,
|
| warnings=warnings,
|
| recommended_build_args=recommended_build_args,
|
| maximum_quality_build_args=maximum_quality_build_args,
|
| worker_secrets=worker_secrets,
|
| vercel_env=vercel_env,
|
| commands=commands,
|
| vercel_cli_commands=vercel_cli_commands,
|
| vercel_cleanup_commands=vercel_cleanup_commands,
|
| huggingface_cli_commands=huggingface_cli_commands,
|
| )
|
|
|
|
|
| def format_env_table(values: dict[str, str]) -> str:
|
| lines = ["| Name | Value |", "| --- | --- |"]
|
| lines.extend(f"| `{key}` | `{value}` |" for key, value in values.items())
|
| return "\n".join(lines)
|
|
|
|
|
| def format_key_value_block(values: dict[str, str]) -> str:
|
| return "\n".join(f"{key}={value}" for key, value in values.items())
|
|
|
|
|
| def write_markdown(path: Path, handoff: DeploymentHandoff) -> None:
|
| path.parent.mkdir(parents=True, exist_ok=True)
|
| path.write_text(
|
| "\n".join(
|
| [
|
| "# Arabic Audio Reader Deployment Handoff",
|
| "",
|
| "Use this after the Hugging Face worker URL and Vercel production URL exist.",
|
| "",
|
| "## URLs",
|
| "",
|
| f"- Worker: {handoff.worker_url}",
|
| f"- Vercel site: {handoff.vercel_origin}",
|
| f"- URLs look real: {'yes' if handoff.urls_look_real else 'no'}",
|
| "",
|
| *(
|
| [
|
| "## URL Warnings",
|
| "",
|
| *[f"- {warning}" for warning in handoff.warnings],
|
| "",
|
| "Replace placeholder/local/test URLs before running live proof. The final audit will not treat placeholder reports as completed deployment evidence.",
|
| "",
|
| ]
|
| if handoff.warnings
|
| else []
|
| ),
|
| "## Hugging Face Space Secrets",
|
| "",
|
| format_env_table(handoff.worker_secrets),
|
| "",
|
| "Copy/paste secret values:",
|
| "",
|
| "```text",
|
| format_key_value_block(handoff.worker_secrets),
|
| "```",
|
| "",
|
| "## Safety Checklist",
|
| "",
|
| "- Set the same `ACCESS_CODE` and `SECRET_KEY` on both Hugging Face and Vercel.",
|
| "- Set Hugging Face `CORS_ORIGINS` to the exact Vercel production origin shown above.",
|
| "- Set Vercel `WORKER_BASE_URL` to the exact Hugging Face worker URL shown above.",
|
| "- After both deployments finish, run the Vercel worker diagnostic command below before uploading a large PDF. It must show `site worker reachable from vercel` and `site worker CORS ready`.",
|
| "- Remove Vercel's temporary direct Hugging Face TTS fallback variables for production: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.",
|
| "- Keep `OCR_ENGINE=tesseract`, `OCR_RENDER_ZOOM=2`, and `TESSERACT_PSM=4` for normal scanned Arabic books; use `arabic-max` only when a short sample needs the slower maximum comparison.", |
| "- Do not commit this handoff; it contains the deployment `SECRET_KEY`.",
|
| "",
|
| "## Hugging Face Docker Build Args",
|
| "",
|
| "Balanced Arabic OCR worker for the first strong-worker deployment:",
|
| "",
|
| format_env_table(handoff.recommended_build_args),
|
| "",
|
| "Copy/paste balanced build args:",
|
| "",
|
| "```text",
|
| format_key_value_block(handoff.recommended_build_args),
|
| "```",
|
| "",
|
| "Maximum quality worker for a GPU or larger paid/owned worker after a 5-page benchmark proves the heavier models help:",
|
| "",
|
| format_env_table(handoff.maximum_quality_build_args),
|
| "",
|
| "Copy/paste maximum-quality build args:",
|
| "",
|
| "```text",
|
| format_key_value_block(handoff.maximum_quality_build_args),
|
| "```",
|
| "",
|
| "## Vercel Environment Variables",
|
| "",
|
| format_env_table(handoff.vercel_env),
|
| "",
|
| "Copy/paste Vercel values:",
|
| "",
|
| "```text",
|
| format_key_value_block(handoff.vercel_env),
|
| "```",
|
| "",
|
| "## Deploy Worker With Hugging Face CLI",
|
| "",
|
| "Create a Hugging Face Space with SDK **Docker**, then run these from the repo root. Replace `<your-hf-username>/<your-space-name>` with the Space repo id:",
|
| "",
|
| "```powershell",
|
| *handoff.huggingface_cli_commands,
|
| "```",
|
| "",
|
| "Set the Hugging Face Space secrets and Docker build args from the tables above in the Space settings before the final smoke test.",
|
| "The `SECRET_KEY` value in this handoff is generated for this deployment; keep it private and do not commit the handoff output.",
|
| "",
|
| "## Deploy Vercel Site With CLI",
|
| "",
|
| "Run these from the repo root after the worker URL is known:",
|
| "",
|
| "```powershell",
|
| *handoff.vercel_cli_commands,
|
| "```",
|
| "",
|
| "If you previously tested direct Hugging Face cloud TTS on Vercel, remove those temporary variables before or after setting `WORKER_BASE_URL`. It is normal if a remove command says the variable does not exist:",
|
| "",
|
| "```powershell",
|
| *handoff.vercel_cleanup_commands,
|
| "vercel --prod --yes",
|
| "```",
|
| "",
|
| "## Commands",
|
| "",
|
| "Run these from the repo root:",
|
| "",
|
| "```powershell",
|
| handoff.commands["localReadiness"],
|
| handoff.commands["refreshResearch"],
|
| handoff.commands["licensePolicy"],
|
| handoff.commands["researchSources"],
|
| handoff.commands["exportWorker"],
|
| handoff.commands["prepareDeployment"],
|
| handoff.commands["validateEnv"],
|
| handoff.commands["deploymentStatus"],
|
| handoff.commands["diagnoseVercelWorker"],
|
| handoff.commands["verifyLive"],
|
| handoff.commands["finalAudit"],
|
| "```",
|
| "",
|
| "The hosted preflight command checks the hosted site's `/api/worker-diagnostics` endpoint and writes `outputs\\hosted-preflight.json`. If it says `cors-blocked`, set Hugging Face `CORS_ORIGINS` to the exact Vercel production URL, keep `COOKIE_SAMESITE=none` and `COOKIE_SECURE=1`, restart the Space, redeploy Vercel, and run the preflight again.",
|
| "",
|
| "The goal audit is complete only after `outputs\\site-verification.json` proves the Vercel shell login, worker routing, large-PDF readiness, hosted provider limits, recommended stack, disabled direct cloud fallback, worker diagnostics reachability, and worker CORS readiness; `outputs\\worker-verification.json` proves the worker recommended stack plus embedded-text and scanned-OCR smoke jobs, usable extracted text, scanned OCR extraction, audio URLs, download URLs, audio bytes, download bytes, and audio file signatures; and `outputs\\live-deployment-proof.json` records `complete: true`.",
|
| "",
|
| ]
|
| ),
|
| encoding="utf-8",
|
| )
|
|
|
|
|
| def main() -> None:
|
| parser = argparse.ArgumentParser(description="Create a deployment handoff file with exact worker/Vercel settings and proof commands.")
|
| parser.add_argument("worker_url", help="Worker URL, for example https://your-space.hf.space")
|
| parser.add_argument("--origin", required=True, help="Vercel production origin, for example https://your-app.vercel.app")
|
| parser.add_argument("--code", default="1234", help="Access code for the site and worker.")
|
| parser.add_argument("--secret-key", help="Optional fixed cookie-signing secret. Omit to generate a random deployment secret.")
|
| parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="Markdown handoff destination.")
|
| parser.add_argument("--json", action="store_true", help="Print JSON instead of a short summary.")
|
| args = parser.parse_args()
|
|
|
| handoff = build_handoff(args.worker_url, args.origin, args.code, args.secret_key)
|
| write_markdown(args.out, handoff)
|
| result = {"out": str(args.out), **asdict(handoff)}
|
| if args.json:
|
| print(json.dumps(result, ensure_ascii=False, indent=2))
|
| else:
|
| print(f"Wrote deployment handoff to {args.out}")
|
| print(handoff.commands["verifyLive"])
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|