File size: 5,297 Bytes
2e1a095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

ROOT_DIR = Path(__file__).resolve().parent.parent
if str(ROOT_DIR) not in sys.path:
    sys.path.insert(0, str(ROOT_DIR))

from scripts.next_deployment_step import deployment_url_warnings
from scripts.verify_site import Check, print_table, verify_site


DEFAULT_OUT = ROOT_DIR / "outputs" / "hosted-preflight.json"


def next_action_for_checks(checks: list[Check], worker_url: str, site_url: str) -> str:
    failed = {check.name: check.detail for check in checks if not check.ok}
    if not failed:
        return (
            "Hosted route is ready. Upload a 5-page Arabic sample first, then run the full book only after "
            "the OCR text and audio sample sound good."
        )
    if "site worker CORS ready" in failed:
        return (
            "Fix Hugging Face CORS before uploading: set CORS_ORIGINS to the exact Vercel URL, keep "
            "COOKIE_SAMESITE=none and COOKIE_SECURE=1, restart the Space, redeploy Vercel, then rerun "
            f"python scripts\\hosted_preflight.py {site_url} --worker-url {worker_url}."
        )
    if "site worker reachable from vercel" in failed or "site worker diagnostics endpoint" in failed:
        return (
            "Fix worker routing before uploading: confirm Vercel WORKER_BASE_URL is the public Hugging Face "
            "Space URL, confirm the Space is awake/public, redeploy Vercel, then rerun hosted preflight."
        )
    if "site direct cloud fallback disabled" in failed:
        return (
            "Remove Vercel direct-cloud fallback variables before production: ENABLE_DIRECT_CLOUD_TTS, "
            "HF_API_TOKEN, HF_TTS_MODEL, and DEFAULT_VOICE_ID. Keep WORKER_BASE_URL, then redeploy."
        )
    if "site login" in failed or "site session authenticated" in failed:
        return "Fix ACCESS_CODE and SECRET_KEY matching between Vercel and the worker, then rerun hosted preflight."
    return "Fix the failed Vercel site checks, redeploy, then rerun hosted preflight before uploading a PDF."


def run_hosted_preflight(
    site_url: str,
    worker_url: str,
    code: str = "1234",
    out: Path = DEFAULT_OUT,
    timeout: float = 60,
) -> dict[str, object]:
    site_url = site_url.rstrip("/")
    worker_url = worker_url.rstrip("/")
    warnings = deployment_url_warnings(worker_url, site_url)
    if warnings:
        result: dict[str, object] = {
            "readyToUpload": False,
            "siteUrl": site_url,
            "workerUrl": worker_url,
            "urlWarnings": warnings,
            "checks": [],
            "nextAction": (
                "Replace placeholder, local, non-HTTPS, or swapped URLs with the real Vercel production URL "
                "and Hugging Face Space worker URL, then rerun hosted preflight."
            ),
        }
    else:
        checks = verify_site(site_url, code, worker_url, timeout=timeout)
        ready = all(check.ok for check in checks)
        result = {
            "readyToUpload": ready,
            "siteUrl": site_url,
            "workerUrl": worker_url,
            "urlWarnings": [],
            "checks": [check.__dict__ for check in checks],
            "nextAction": next_action_for_checks(checks, worker_url, site_url),
        }
    out.parent.mkdir(parents=True, exist_ok=True)
    out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
    result["report"] = str(out)
    return result


def print_result(result: dict[str, object]) -> None:
    print(f"Hosted preflight: {'ready' if result['readyToUpload'] else 'not ready'}")
    print(f"Site: {result['siteUrl']}")
    print(f"Worker: {result['workerUrl']}")
    warnings = result.get("urlWarnings") or []
    if warnings:
        print()
        print("URL warnings")
        for warning in warnings:
            print(f"- {warning}")
    checks = result.get("checks") or []
    if checks:
        print()
        print_table([Check(str(item["name"]), bool(item["ok"]), str(item["detail"])) for item in checks])
    print()
    print(f"Next action: {result['nextAction']}")
    print(f"Report: {result['report']}")


def main() -> None:
    parser = argparse.ArgumentParser(description="Run a quick hosted Vercel-to-worker preflight before uploading a large PDF.")
    parser.add_argument("site_url", help="Vercel production URL, for example https://your-app.vercel.app")
    parser.add_argument("--worker-url", required=True, help="Expected Hugging Face Space worker URL.")
    parser.add_argument("--code", default="1234", help="Access code used by the site.")
    parser.add_argument("--out", type=Path, default=DEFAULT_OUT, help="JSON report output path.")
    parser.add_argument("--timeout", type=float, default=60, help="HTTP timeout in seconds.")
    parser.add_argument("--json", action="store_true", help="Print JSON instead of a readable summary.")
    args = parser.parse_args()

    result = run_hosted_preflight(args.site_url, args.worker_url, code=args.code, out=args.out, timeout=args.timeout)
    if args.json:
        print(json.dumps(result, ensure_ascii=False, indent=2))
    else:
        print_result(result)
    if not result["readyToUpload"]:
        raise SystemExit(1)


if __name__ == "__main__":
    main()