arabic-audio-reader-worker / scripts /hosted_preflight.py
Syncre's picture
Deploy Arabic Audio Reader worker
2e1a095 verified
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
sys.path.insert(0, str(ROOT_DIR))
from scripts.next_deployment_step import deployment_url_warnings
from scripts.verify_site import Check, print_table, verify_site
DEFAULT_OUT = ROOT_DIR / "outputs" / "hosted-preflight.json"
def next_action_for_checks(checks: list[Check], worker_url: str, site_url: str) -> str:
failed = {check.name: check.detail for check in checks if not check.ok}
if not failed:
return (
"Hosted route is ready. Upload a 5-page Arabic sample first, then run the full book only after "
"the OCR text and audio sample sound good."
)
if "site worker CORS ready" in failed:
return (
"Fix Hugging Face CORS before uploading: set CORS_ORIGINS to the exact Vercel URL, keep "
"COOKIE_SAMESITE=none and COOKIE_SECURE=1, restart the Space, redeploy Vercel, then rerun "
f"python scripts\\hosted_preflight.py {site_url} --worker-url {worker_url}."
)
if "site worker reachable from vercel" in failed or "site worker diagnostics endpoint" in failed:
return (
"Fix worker routing before uploading: confirm Vercel WORKER_BASE_URL is the public Hugging Face "
"Space URL, confirm the Space is awake/public, redeploy Vercel, then rerun hosted preflight."
)
if "site direct cloud fallback disabled" in failed:
return (
"Remove Vercel direct-cloud fallback variables before production: ENABLE_DIRECT_CLOUD_TTS, "
"HF_API_TOKEN, HF_TTS_MODEL, and DEFAULT_VOICE_ID. Keep WORKER_BASE_URL, then redeploy."
)
if "site login" in failed or "site session authenticated" in failed:
return "Fix ACCESS_CODE and SECRET_KEY matching between Vercel and the worker, then rerun hosted preflight."
return "Fix the failed Vercel site checks, redeploy, then rerun hosted preflight before uploading a PDF."
def run_hosted_preflight(
site_url: str,
worker_url: str,
code: str = "1234",
out: Path = DEFAULT_OUT,
timeout: float = 60,
) -> dict[str, object]:
site_url = site_url.rstrip("/")
worker_url = worker_url.rstrip("/")
warnings = deployment_url_warnings(worker_url, site_url)
if warnings:
result: dict[str, object] = {
"readyToUpload": False,
"siteUrl": site_url,
"workerUrl": worker_url,
"urlWarnings": warnings,
"checks": [],
"nextAction": (
"Replace placeholder, local, non-HTTPS, or swapped URLs with the real Vercel production URL "
"and Hugging Face Space worker URL, then rerun hosted preflight."
),
}
else:
checks = verify_site(site_url, code, worker_url, timeout=timeout)
ready = all(check.ok for check in checks)
result = {
"readyToUpload": ready,
"siteUrl": site_url,
"workerUrl": worker_url,
"urlWarnings": [],
"checks": [check.__dict__ for check in checks],
"nextAction": next_action_for_checks(checks, worker_url, site_url),
}
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
result["report"] = str(out)
return result
def print_result(result: dict[str, object]) -> None:
print(f"Hosted preflight: {'ready' if result['readyToUpload'] else 'not ready'}")
print(f"Site: {result['siteUrl']}")
print(f"Worker: {result['workerUrl']}")
warnings = result.get("urlWarnings") or []
if warnings:
print()
print("URL warnings")
for warning in warnings:
print(f"- {warning}")
checks = result.get("checks") or []
if checks:
print()
print_table([Check(str(item["name"]), bool(item["ok"]), str(item["detail"])) for item in checks])
print()
print(f"Next action: {result['nextAction']}")
print(f"Report: {result['report']}")
def main() -> None:
parser = argparse.ArgumentParser(description="Run a quick hosted Vercel-to-worker preflight before uploading a large PDF.")
parser.add_argument("site_url", help="Vercel production URL, for example https://your-app.vercel.app")
parser.add_argument("--worker-url", required=True, help="Expected Hugging Face Space worker URL.")
parser.add_argument("--code", default="1234", help="Access code used by the site.")
parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="JSON report output path.")
parser.add_argument("--timeout", type=float, default=60, help="HTTP timeout in seconds.")
parser.add_argument("--json", action="store_true", help="Print JSON instead of a readable summary.")
args = parser.parse_args()
result = run_hosted_preflight(args.site_url, args.worker_url, code=args.code, out=args.out, timeout=args.timeout)
if args.json:
print(json.dumps(result, ensure_ascii=False, indent=2))
else:
print_result(result)
if not result["readyToUpload"]:
raise SystemExit(1)
if __name__ == "__main__":
main()