arabic-audio-reader-worker / scripts /next_deployment_step.py
Syncre's picture
Deploy Arabic Audio Reader worker
088795a verified
from __future__ import annotations
import argparse
import json
from dataclasses import dataclass
from pathlib import Path
from urllib.parse import urlparse
ROOT_DIR = Path(__file__).resolve().parent.parent
LOCAL_REPORT = ROOT_DIR / "outputs" / "local-readiness.json"
SITE_REPORT = ROOT_DIR / "outputs" / "site-verification.json"
WORKER_REPORT = ROOT_DIR / "outputs" / "worker-verification.json"
LIVE_PROOF = ROOT_DIR / "outputs" / "live-deployment-proof.json"
@dataclass
class NextStep:
status: str
title: str
detail: str
command: str
def load_json(path: Path) -> object | None:
if not path.exists():
return None
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return None
def report_checks_pass(path: Path, required: set[str]) -> bool:
payload = load_json(path)
if not isinstance(payload, list):
return False
checks = {str(item.get("name", "")): bool(item.get("ok")) for item in payload if isinstance(item, dict)}
return all(checks.get(name) is True for name in required)
def local_ready(path: Path = LOCAL_REPORT) -> bool:
payload = load_json(path)
return isinstance(payload, dict) and bool(payload.get("ready"))
def live_proof_complete(path: Path = LIVE_PROOF) -> bool:
payload = load_json(path)
return isinstance(payload, dict) and bool(payload.get("complete"))
def placeholder_or_test_url(url: str | None) -> bool:
if not url:
return True
lowered = url.lower().strip()
host = _hostname(lowered)
placeholders = [
"your-space.hf.space",
"your-vercel-app.vercel.app",
"localhost",
"127.0.0.1",
"::1",
]
example_hosts = {"example.com", "example.org", "example.net", "example"}
return any(marker in lowered for marker in placeholders) or host in example_hosts or any(
host.endswith(f".{example_host}") for example_host in example_hosts
)
def _hostname(url: str | None) -> str:
if not url:
return ""
try:
parsed = urlparse(url.strip())
return (parsed.hostname or "").lower()
except ValueError:
return ""
def _is_https_url(url: str | None) -> bool:
if not url:
return False
try:
parsed = urlparse(url.strip())
return parsed.scheme == "https" and bool(parsed.netloc)
except ValueError:
return False
def deployment_url_warnings(worker_url: str | None, vercel_origin: str | None) -> list[str]:
warnings: list[str] = []
worker_host = _hostname(worker_url)
origin_host = _hostname(vercel_origin)
if placeholder_or_test_url(worker_url):
warnings.append("Worker URL still looks like a placeholder, local address, or test URL.")
elif not _is_https_url(worker_url):
warnings.append("Worker URL must be a full HTTPS URL, for example https://your-space.hf.space.")
elif worker_host == "vercel.app" or worker_host.endswith(".vercel.app"):
warnings.append("Worker URL looks like a Vercel site URL. Use the Hugging Face Space or Docker worker URL.")
if placeholder_or_test_url(vercel_origin):
warnings.append("Vercel origin still looks like a placeholder, local address, or test URL.")
elif not _is_https_url(vercel_origin):
warnings.append("Vercel origin must be a full HTTPS URL, for example https://your-app.vercel.app.")
elif origin_host == "hf.space" or origin_host.endswith(".hf.space"):
warnings.append("Vercel origin looks like a Hugging Face worker URL. Use the Vercel production site URL.")
if worker_url and vercel_origin and worker_url.rstrip("/") == vercel_origin.rstrip("/"):
warnings.append("Worker URL and Vercel origin are the same URL; they should be two deployed services.")
return warnings
def deployment_urls_look_real(worker_url: str | None, vercel_origin: str | None) -> bool:
return not deployment_url_warnings(worker_url, vercel_origin)
def looks_like_test_artifact(payload: object) -> bool:
if not isinstance(payload, dict):
return False
text = json.dumps(payload, ensure_ascii=False).lower()
return "pytest-" in text or "your-space.hf.space" in text or "your-vercel-app.vercel.app" in text
def live_proof_matches_urls(path: Path, worker_url: str | None, vercel_origin: str | None) -> bool:
payload = load_json(path)
if not isinstance(payload, dict) or not payload.get("complete"):
return False
if looks_like_test_artifact(payload):
return False
proof_worker = str(payload.get("workerUrl") or "").rstrip("/")
proof_origin = str(payload.get("origin") or "").rstrip("/")
expected_worker = str(worker_url or "").rstrip("/")
expected_origin = str(vercel_origin or "").rstrip("/")
if not deployment_urls_look_real(expected_worker, expected_origin):
return False
return proof_worker == expected_worker and proof_origin == expected_origin
def choose_next_step(
worker_url: str | None = None,
vercel_origin: str | None = None,
code: str = "1234",
local_report: Path = LOCAL_REPORT,
site_report: Path = SITE_REPORT,
worker_report: Path = WORKER_REPORT,
live_proof: Path = LIVE_PROOF,
) -> NextStep:
worker_required = {
"recommended stack documented",
"smoke upload accepted",
"smoke job complete",
"smoke usable text",
"smoke audio url",
"smoke download url",
"smoke audio bytes",
"smoke audio file signature",
"smoke download bytes",
"smoke download file signature",
"scanned smoke upload accepted",
"scanned smoke job complete",
"scanned smoke usable text",
"scanned smoke OCR extraction",
"scanned smoke audio url",
"scanned smoke download url",
"scanned smoke audio bytes",
"scanned smoke audio file signature",
"scanned smoke download bytes",
"scanned smoke download file signature",
}
site_required = {
"site login",
"site platform vercel",
"site worker configured",
"site large PDF ready",
"site production worker ready",
"site hosted limits documented",
"site recommended stack documented",
"site direct cloud fallback disabled",
"site worker diagnostics endpoint",
"site worker reachable from vercel",
"site worker CORS ready",
}
if not local_ready(local_report):
return NextStep(
status="local",
title="Prove local/repo readiness",
detail="Run this before deploying so research, packaging, and local checks are current.",
command=(
"python scripts\\prove_local_readiness.py --refresh-research --check-key-links "
"--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md"
),
)
if not deployment_urls_look_real(worker_url, vercel_origin):
return NextStep(
status="handoff",
title="Create deployment handoff with real URLs",
detail=(
"Replace placeholder/test URLs with the real Hugging Face Space URL and Vercel production URL, "
"then generate exact env vars and proof commands."
),
command=(
"python scripts\\deployment_handoff.py https://your-space.hf.space "
"--origin https://your-vercel-app.vercel.app --code 1234"
),
)
site_ok = report_checks_pass(site_report, site_required)
worker_ok = report_checks_pass(worker_report, worker_required)
if live_proof_complete(live_proof):
if site_ok and worker_ok and live_proof_matches_urls(live_proof, worker_url, vercel_origin):
return NextStep(
status="complete",
title="Deployment proof is complete",
detail="The combined live proof report says the deployed Vercel site and worker satisfy the goal audit.",
command=f"Get-Content {live_proof}",
)
return NextStep(
status="live-proof",
title="Re-run live proof against real deployed URLs",
detail=(
"A complete live proof file exists, but the URLs or saved reports do not prove the current real "
"Vercel site and Hugging Face worker. Re-run the live proof with the real deployed URLs."
),
command=(
f"python scripts\\prove_live_deployment.py {worker_url.rstrip('/')} "
f"--origin {vercel_origin.rstrip('/')} --code {code} "
"--smoke-ocr-engine arabic "
"--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md "
"--proof-out outputs\\live-deployment-proof.json"
),
)
if not (site_ok and worker_ok):
return NextStep(
status="live-proof",
title="Verify live Vercel site and OCR/TTS worker",
detail="This writes site and worker reports, including embedded-text and scanned Arabic OCR smoke tests with audio/download proof.",
command=(
f"python scripts\\prove_live_deployment.py {worker_url.rstrip('/')} "
f"--origin {vercel_origin.rstrip('/')} --code {code} "
"--smoke-ocr-engine arabic "
"--check-hf-metadata --hf-metadata-report outputs\\hf-model-metadata.md "
"--proof-out outputs\\live-deployment-proof.json"
),
)
return NextStep(
status="complete-ready",
title="Run final audit",
detail="Both live reports exist and contain the required checks. The final audit should be complete.",
command=(
"python scripts\\audit_goal_readiness.py "
"--worker-report outputs\\worker-verification.json "
"--site-report outputs\\site-verification.json"
),
)
def main() -> None:
parser = argparse.ArgumentParser(description="Print the next command needed to finish the Arabic audio deployment proof.")
parser.add_argument("--worker-url", help="Live worker URL, for example https://your-space.hf.space")
parser.add_argument("--origin", help="Live Vercel origin, for example https://your-app.vercel.app")
parser.add_argument("--code", default="1234", help="Access code for live verification.")
parser.add_argument("--json", action="store_true", help="Print JSON.")
args = parser.parse_args()
step = choose_next_step(worker_url=args.worker_url, vercel_origin=args.origin, code=args.code)
if args.json:
print(json.dumps(step.__dict__, indent=2))
return
print(step.title)
print(step.detail)
print()
print(step.command)
if __name__ == "__main__":
main()