from __future__ import annotations import argparse import json import sys from pathlib import Path ROOT_DIR = Path(__file__).resolve().parent.parent if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) from scripts.next_deployment_step import deployment_url_warnings from scripts.verify_site import Check, print_table, verify_site DEFAULT_OUT = ROOT_DIR / "outputs" / "hosted-preflight.json" def next_action_for_checks(checks: list[Check], worker_url: str, site_url: str) -> str: failed = {check.name: check.detail for check in checks if not check.ok} if not failed: return ( "Hosted route is ready. Upload a 5-page Arabic sample first, then run the full book only after " "the OCR text and audio sample sound good." ) if "site worker CORS ready" in failed: return ( "Fix Hugging Face CORS before uploading: set CORS_ORIGINS to the exact Vercel URL, keep " "COOKIE_SAMESITE=none and COOKIE_SECURE=1, restart the Space, redeploy Vercel, then rerun " f"python scripts\\hosted_preflight.py {site_url} --worker-url {worker_url}." ) if "site worker reachable from vercel" in failed or "site worker diagnostics endpoint" in failed: return ( "Fix worker routing before uploading: confirm Vercel WORKER_BASE_URL is the public Hugging Face " "Space URL, confirm the Space is awake/public, redeploy Vercel, then rerun hosted preflight." ) if "site direct cloud fallback disabled" in failed: return ( "Remove Vercel direct-cloud fallback variables before production: ENABLE_DIRECT_CLOUD_TTS, " "HF_API_TOKEN, HF_TTS_MODEL, and DEFAULT_VOICE_ID. Keep WORKER_BASE_URL, then redeploy." ) if "site login" in failed or "site session authenticated" in failed: return "Fix ACCESS_CODE and SECRET_KEY matching between Vercel and the worker, then rerun hosted preflight." return "Fix the failed Vercel site checks, redeploy, then rerun hosted preflight before uploading a PDF." def run_hosted_preflight( site_url: str, worker_url: str, code: str = "1234", out: Path = DEFAULT_OUT, timeout: float = 60, ) -> dict[str, object]: site_url = site_url.rstrip("/") worker_url = worker_url.rstrip("/") warnings = deployment_url_warnings(worker_url, site_url) if warnings: result: dict[str, object] = { "readyToUpload": False, "siteUrl": site_url, "workerUrl": worker_url, "urlWarnings": warnings, "checks": [], "nextAction": ( "Replace placeholder, local, non-HTTPS, or swapped URLs with the real Vercel production URL " "and Hugging Face Space worker URL, then rerun hosted preflight." ), } else: checks = verify_site(site_url, code, worker_url, timeout=timeout) ready = all(check.ok for check in checks) result = { "readyToUpload": ready, "siteUrl": site_url, "workerUrl": worker_url, "urlWarnings": [], "checks": [check.__dict__ for check in checks], "nextAction": next_action_for_checks(checks, worker_url, site_url), } out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") result["report"] = str(out) return result def print_result(result: dict[str, object]) -> None: print(f"Hosted preflight: {'ready' if result['readyToUpload'] else 'not ready'}") print(f"Site: {result['siteUrl']}") print(f"Worker: {result['workerUrl']}") warnings = result.get("urlWarnings") or [] if warnings: print() print("URL warnings") for warning in warnings: print(f"- {warning}") checks = result.get("checks") or [] if checks: print() print_table([Check(str(item["name"]), bool(item["ok"]), str(item["detail"])) for item in checks]) print() print(f"Next action: {result['nextAction']}") print(f"Report: {result['report']}") def main() -> None: parser = argparse.ArgumentParser(description="Run a quick hosted Vercel-to-worker preflight before uploading a large PDF.") parser.add_argument("site_url", help="Vercel production URL, for example https://your-app.vercel.app") parser.add_argument("--worker-url", required=True, help="Expected Hugging Face Space worker URL.") parser.add_argument("--code", default="1234", help="Access code used by the site.") parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="JSON report output path.") parser.add_argument("--timeout", type=float, default=60, help="HTTP timeout in seconds.") parser.add_argument("--json", action="store_true", help="Print JSON instead of a readable summary.") args = parser.parse_args() result = run_hosted_preflight(args.site_url, args.worker_url, code=args.code, out=args.out, timeout=args.timeout) if args.json: print(json.dumps(result, ensure_ascii=False, indent=2)) else: print_result(result) if not result["readyToUpload"]: raise SystemExit(1) if __name__ == "__main__": main()