File size: 15,358 Bytes
088795a 6d5a99d 088795a 6d5a99d 088795a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 | from __future__ import annotations
import argparse
import json
import secrets
import sys
from dataclasses import asdict, dataclass
from pathlib import Path
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts.next_deployment_step import deployment_url_warnings
DEFAULT_OUT = ROOT_DIR / "outputs" / "deployment-handoff.md"
def normalize_origin(value: str) -> str:
return value.rstrip("/")
def generate_secret_key() -> str:
return secrets.token_urlsafe(48)
@dataclass
class DeploymentHandoff:
worker_url: str
vercel_origin: str
access_code: str
urls_look_real: bool
warnings: list[str]
recommended_build_args: dict[str, str]
maximum_quality_build_args: dict[str, str]
worker_secrets: dict[str, str]
vercel_env: dict[str, str]
commands: dict[str, str]
vercel_cli_commands: list[str]
vercel_cleanup_commands: list[str]
huggingface_cli_commands: list[str]
def build_handoff(
worker_url: str,
vercel_origin: str,
access_code: str = "1234",
secret_key: str | None = None,
) -> DeploymentHandoff:
worker_url = normalize_origin(worker_url)
vercel_origin = normalize_origin(vercel_origin)
secret_key = secret_key or generate_secret_key()
warnings = deployment_url_warnings(worker_url, vercel_origin)
urls_look_real = not warnings
recommended_build_args = {
"INSTALL_TAWKEED_OCR": "1",
"INSTALL_KATIB_OCR": "1",
"INSTALL_ARABIC_QWEN_OCR": "1",
"INSTALL_ARABIC_GLM_OCR": "1",
"INSTALL_BASEER_OCR": "1",
"INSTALL_SUPERTONIC": "1",
}
maximum_quality_build_args = {
**recommended_build_args,
"INSTALL_QARI_OCR": "1",
"INSTALL_PADDLEOCR_VL": "1",
}
worker_secrets = {
"ACCESS_CODE": access_code,
"SECRET_KEY": secret_key,
"CORS_ORIGINS": vercel_origin,
"COOKIE_SAMESITE": "none",
"COOKIE_SECURE": "1",
"OCR_ENGINE": "tesseract",
"OCR_RENDER_ZOOM": "2",
"TESSERACT_PSM": "4",
"DEFAULT_VOICE_ID": "silma-local",
"OUTPUT_RETENTION_DAYS": "7",
"OUTPUT_MAX_FILES": "25",
"AUDIO_FORMAT": "mp3",
"MP3_BITRATE": "96k",
"SILMA_ENABLE_NORMALIZER": "0",
"SILMA_FORCE_TASHKEEL": "0",
"SILMA_NORMALIZE_NUMBERS": "0",
}
vercel_env = {
"ACCESS_CODE": access_code,
"SECRET_KEY": secret_key,
"WORKER_BASE_URL": worker_url,
}
commands = {
"localReadiness": "python scripts\\prove_local_readiness.py --refresh-research",
"refreshResearch": "python scripts\\refresh_research_evidence.py",
"licensePolicy": "python scripts\\research_watchlist.py --check-license-policy --json",
"researchSources": (
"python scripts\\check_research_sources.py "
"--check-hf-metadata --write-hf-metadata-report outputs\\hf-model-metadata.md"
),
"exportWorker": "python scripts\\export_hf_space.py --force",
"prepareDeployment": (
f"python scripts\\prepare_live_deployment.py --worker-url {worker_url} "
f"--origin {vercel_origin} --code {access_code}"
),
"validateEnv": (
"python scripts\\validate_deployment_env.py "
"--vercel-env outputs\\vercel-production.env "
"--worker-env outputs\\worker-secrets.env "
f"--worker-url {worker_url} --origin {vercel_origin}"
),
"deploymentStatus": (
f"python scripts\\deployment_status.py --worker-url {worker_url} "
f"--origin {vercel_origin} --code {access_code}"
),
"diagnoseVercelWorker": (
f"python scripts\\hosted_preflight.py {vercel_origin} "
f"--code {access_code} --worker-url {worker_url}"
),
"configureVercelWorker": (
f"python scripts\\configure_vercel_worker.py {worker_url} "
f"--site-url {vercel_origin} --code {access_code} --verify"
),
"verifyLive": (
f"python scripts\\prove_live_deployment.py {worker_url} "
f"--origin {vercel_origin} --code {access_code} "
"--smoke-ocr-engine arabic "
"--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md "
"--proof-out outputs\\live-deployment-proof.json"
),
"finalAudit": (
"python scripts\\audit_goal_readiness.py "
"--worker-report outputs\\worker-verification.json "
"--site-report outputs\\site-verification.json"
),
}
vercel_cli_commands = [
"npm i -g vercel",
"vercel login",
"vercel link --yes",
f'cmd /c "echo {access_code}| vercel env add ACCESS_CODE production"',
f'cmd /c "echo {secret_key}| vercel env add SECRET_KEY production"',
f'cmd /c "echo {worker_url}| vercel env add WORKER_BASE_URL production"',
f"python scripts\\configure_vercel_worker.py {worker_url} --site-url {vercel_origin} --code {access_code} --verify",
"vercel --prod --yes",
]
vercel_cleanup_commands = [
"vercel env rm ENABLE_DIRECT_CLOUD_TTS production --yes",
"vercel env rm HF_API_TOKEN production --yes",
"vercel env rm HF_TTS_MODEL production --yes",
"vercel env rm DEFAULT_VOICE_ID production --yes",
]
huggingface_cli_commands = [
"python -m pip install -U huggingface_hub",
"python scripts\\export_hf_space.py --force",
"python scripts\\deploy_hf_space.py <your-hf-username>/<your-space-name> --bundle-dir outputs/huggingface-space --json",
]
return DeploymentHandoff(
worker_url=worker_url,
vercel_origin=vercel_origin,
access_code=access_code,
urls_look_real=urls_look_real,
warnings=warnings,
recommended_build_args=recommended_build_args,
maximum_quality_build_args=maximum_quality_build_args,
worker_secrets=worker_secrets,
vercel_env=vercel_env,
commands=commands,
vercel_cli_commands=vercel_cli_commands,
vercel_cleanup_commands=vercel_cleanup_commands,
huggingface_cli_commands=huggingface_cli_commands,
)
def format_env_table(values: dict[str, str]) -> str:
lines = ["| Name | Value |", "| --- | --- |"]
lines.extend(f"| `{key}` | `{value}` |" for key, value in values.items())
return "\n".join(lines)
def format_key_value_block(values: dict[str, str]) -> str:
return "\n".join(f"{key}={value}" for key, value in values.items())
def write_markdown(path: Path, handoff: DeploymentHandoff) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
"\n".join(
[
"# Arabic Audio Reader Deployment Handoff",
"",
"Use this after the Hugging Face worker URL and Vercel production URL exist.",
"",
"## URLs",
"",
f"- Worker: {handoff.worker_url}",
f"- Vercel site: {handoff.vercel_origin}",
f"- URLs look real: {'yes' if handoff.urls_look_real else 'no'}",
"",
*(
[
"## URL Warnings",
"",
*[f"- {warning}" for warning in handoff.warnings],
"",
"Replace placeholder/local/test URLs before running live proof. The final audit will not treat placeholder reports as completed deployment evidence.",
"",
]
if handoff.warnings
else []
),
"## Hugging Face Space Secrets",
"",
format_env_table(handoff.worker_secrets),
"",
"Copy/paste secret values:",
"",
"```text",
format_key_value_block(handoff.worker_secrets),
"```",
"",
"## Safety Checklist",
"",
"- Set the same `ACCESS_CODE` and `SECRET_KEY` on both Hugging Face and Vercel.",
"- Set Hugging Face `CORS_ORIGINS` to the exact Vercel production origin shown above.",
"- Set Vercel `WORKER_BASE_URL` to the exact Hugging Face worker URL shown above.",
"- After both deployments finish, run the Vercel worker diagnostic command below before uploading a large PDF. It must show `site worker reachable from vercel` and `site worker CORS ready`.",
"- Remove Vercel's temporary direct Hugging Face TTS fallback variables for production: `ENABLE_DIRECT_CLOUD_TTS`, `HF_API_TOKEN`, `HF_TTS_MODEL`, and `DEFAULT_VOICE_ID`.",
"- Keep `OCR_ENGINE=tesseract`, `OCR_RENDER_ZOOM=2`, and `TESSERACT_PSM=4` for normal scanned Arabic books; use `arabic-max` only when a short sample needs the slower maximum comparison.",
"- Do not commit this handoff; it contains the deployment `SECRET_KEY`.",
"",
"## Hugging Face Docker Build Args",
"",
"Balanced Arabic OCR worker for the first strong-worker deployment:",
"",
format_env_table(handoff.recommended_build_args),
"",
"Copy/paste balanced build args:",
"",
"```text",
format_key_value_block(handoff.recommended_build_args),
"```",
"",
"Maximum quality worker for a GPU or larger paid/owned worker after a 5-page benchmark proves the heavier models help:",
"",
format_env_table(handoff.maximum_quality_build_args),
"",
"Copy/paste maximum-quality build args:",
"",
"```text",
format_key_value_block(handoff.maximum_quality_build_args),
"```",
"",
"## Vercel Environment Variables",
"",
format_env_table(handoff.vercel_env),
"",
"Copy/paste Vercel values:",
"",
"```text",
format_key_value_block(handoff.vercel_env),
"```",
"",
"## Deploy Worker With Hugging Face CLI",
"",
"Create a Hugging Face Space with SDK **Docker**, then run these from the repo root. Replace `<your-hf-username>/<your-space-name>` with the Space repo id:",
"",
"```powershell",
*handoff.huggingface_cli_commands,
"```",
"",
"Set the Hugging Face Space secrets and Docker build args from the tables above in the Space settings before the final smoke test.",
"The `SECRET_KEY` value in this handoff is generated for this deployment; keep it private and do not commit the handoff output.",
"",
"## Deploy Vercel Site With CLI",
"",
"Run these from the repo root after the worker URL is known:",
"",
"```powershell",
*handoff.vercel_cli_commands,
"```",
"",
"If you previously tested direct Hugging Face cloud TTS on Vercel, remove those temporary variables before or after setting `WORKER_BASE_URL`. It is normal if a remove command says the variable does not exist:",
"",
"```powershell",
*handoff.vercel_cleanup_commands,
"vercel --prod --yes",
"```",
"",
"## Commands",
"",
"Run these from the repo root:",
"",
"```powershell",
handoff.commands["localReadiness"],
handoff.commands["refreshResearch"],
handoff.commands["licensePolicy"],
handoff.commands["researchSources"],
handoff.commands["exportWorker"],
handoff.commands["prepareDeployment"],
handoff.commands["validateEnv"],
handoff.commands["deploymentStatus"],
handoff.commands["diagnoseVercelWorker"],
handoff.commands["verifyLive"],
handoff.commands["finalAudit"],
"```",
"",
"The hosted preflight command checks the hosted site's `/api/worker-diagnostics` endpoint and writes `outputs\\hosted-preflight.json`. If it says `cors-blocked`, set Hugging Face `CORS_ORIGINS` to the exact Vercel production URL, keep `COOKIE_SAMESITE=none` and `COOKIE_SECURE=1`, restart the Space, redeploy Vercel, and run the preflight again.",
"",
"The goal audit is complete only after `outputs\\site-verification.json` proves the Vercel shell login, worker routing, large-PDF readiness, hosted provider limits, recommended stack, disabled direct cloud fallback, worker diagnostics reachability, and worker CORS readiness; `outputs\\worker-verification.json` proves the worker recommended stack plus embedded-text and scanned-OCR smoke jobs, usable extracted text, scanned OCR extraction, audio URLs, download URLs, audio bytes, download bytes, and audio file signatures; and `outputs\\live-deployment-proof.json` records `complete: true`.",
"",
]
),
encoding="utf-8",
)
def main() -> None:
parser = argparse.ArgumentParser(description="Create a deployment handoff file with exact worker/Vercel settings and proof commands.")
parser.add_argument("worker_url", help="Worker URL, for example https://your-space.hf.space")
parser.add_argument("--origin", required=True, help="Vercel production origin, for example https://your-app.vercel.app")
parser.add_argument("--code", default="1234", help="Access code for the site and worker.")
parser.add_argument("--secret-key", help="Optional fixed cookie-signing secret. Omit to generate a random deployment secret.")
parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="Markdown handoff destination.")
parser.add_argument("--json", action="store_true", help="Print JSON instead of a short summary.")
args = parser.parse_args()
handoff = build_handoff(args.worker_url, args.origin, args.code, args.secret_key)
write_markdown(args.out, handoff)
result = {"out": str(args.out), **asdict(handoff)}
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print(f"Wrote deployment handoff to {args.out}")
print(handoff.commands["verifyLive"])
if __name__ == "__main__":
main()
|