| from __future__ import annotations |
|
|
| import argparse |
| import hashlib |
| import json |
| import re |
| from dataclasses import dataclass |
| from pathlib import Path |
| from typing import Literal |
|
|
|
|
| ROOT_DIR = Path(__file__).resolve().parent.parent |
| Status = Literal["PASS", "WARN", "FAIL"] |
| EXPORT_MANIFEST_NAME = ".export-manifest.json" |
|
|
|
|
| @dataclass |
| class Check: |
| category: str |
| name: str |
| status: Status |
| detail: str |
|
|
|
|
| def add(checks: list[Check], category: str, name: str, status: Status, detail: str) -> None: |
| checks.append(Check(category, name, status, detail)) |
|
|
|
|
| def load_json(path: Path) -> dict: |
| try: |
| return json.loads(path.read_text(encoding="utf-8")) |
| except Exception: |
| return {} |
|
|
|
|
| def dockerfile_copy_sources(dockerfile: str) -> list[str]: |
| sources: list[str] = [] |
| for line in dockerfile.splitlines(): |
| line = line.strip() |
| if not line.startswith("COPY "): |
| continue |
| parts = line.split() |
| if len(parts) < 3: |
| continue |
| sources.extend(parts[1:-1]) |
| return sources |
|
|
|
|
| def referenced_scripts(dockerfile: str) -> set[str]: |
| return set(re.findall(r"scripts/[A-Za-z0-9_.-]+\.sh", dockerfile)) |
|
|
|
|
| def file_sha256(path: Path) -> str: |
| digest = hashlib.sha256() |
| with path.open("rb") as handle: |
| for chunk in iter(lambda: handle.read(1024 * 1024), b""): |
| digest.update(chunk) |
| return digest.hexdigest() |
|
|
|
|
| def should_manifest_copy(path: Path) -> bool: |
| if path.name in {"__pycache__", ".pytest_cache", ".ruff_cache"}: |
| return False |
| if path.suffix in {".pyc", ".pyo", ".pyd"}: |
| return False |
| return True |
|
|
|
|
| def expected_export_manifest(root: Path | None = None) -> dict[str, str]: |
| root = root or ROOT_DIR |
| files: dict[str, str] = {} |
| for relative in [ |
| "requirements.txt", |
| "requirements-silma.txt", |
| "requirements-supertonic.txt", |
| "requirements-paddleocr.txt", |
| "requirements-paddleocr-vl.txt", |
| "requirements-qari-ocr.txt", |
| "requirements-tawkeed-ocr.txt", |
| "requirements-katib-ocr.txt", |
| "requirements-arabic-qwen-ocr.txt", |
| "requirements-arabic-glm-ocr.txt", |
| "requirements-baseer-ocr.txt", |
| ]: |
| path = root / relative |
| if path.exists(): |
| files[relative] = file_sha256(path) |
| dockerfile = root / "Dockerfile.worker" |
| if dockerfile.exists(): |
| files["Dockerfile"] = file_sha256(dockerfile) |
| for relative in ["app", "api", "docs", "static", "scripts"]: |
| base = root / relative |
| if not base.exists(): |
| continue |
| for path in sorted(base.rglob("*")): |
| if path.is_file() and should_manifest_copy(path): |
| files[path.relative_to(root).as_posix()] = file_sha256(path) |
| return files |
|
|
|
|
| def check_required_files(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| required = [ |
| "app/main.py", |
| "api/index.py", |
| "static/index.html", |
| "static/app.js", |
| "static/styles.css", |
| "requirements.txt", |
| "vercel.json", |
| "Dockerfile.worker", |
| "requirements-silma.txt", |
| "requirements-supertonic.txt", |
| "requirements-paddleocr.txt", |
| "requirements-paddleocr-vl.txt", |
| "requirements-qari-ocr.txt", |
| "requirements-tawkeed-ocr.txt", |
| "requirements-katib-ocr.txt", |
| "requirements-arabic-qwen-ocr.txt", |
| "requirements-arabic-glm-ocr.txt", |
| "requirements-baseer-ocr.txt", |
| "scripts/setup_silma.sh", |
| "scripts/setup_supertonic.sh", |
| "scripts/setup_paddleocr.sh", |
| "scripts/setup_paddleocr_vl.sh", |
| "scripts/setup_qari_ocr.sh", |
| "scripts/setup_tawkeed_ocr.sh", |
| "scripts/setup_katib_ocr.sh", |
| "scripts/setup_arabic_qwen_ocr.sh", |
| "scripts/setup_arabic_glm_ocr.sh", |
| "scripts/setup_baseer_ocr.sh", |
| "scripts/qari_ocr_extract.py", |
| "scripts/tawkeed_ocr_extract.py", |
| "scripts/katib_ocr_extract.py", |
| "scripts/arabic_qwen_ocr_extract.py", |
| "scripts/arabic_glm_ocr_extract.py", |
| "scripts/baseer_ocr_extract.py", |
| "scripts/audit_goal_readiness.py", |
| "scripts/prove_local_readiness.py", |
| "scripts/prove_live_deployment.py", |
| "scripts/configure_vercel_worker.py", |
| "scripts/finish_live_deployment.py", |
| "scripts/deployment_handoff.py", |
| "scripts/prepare_live_deployment.py", |
| "scripts/validate_deployment_env.py", |
| "scripts/hosted_preflight.py", |
| "scripts/verify_site.py", |
| "scripts/verify_worker.py", |
| "scripts/preflight_check.py", |
| "scripts/check_research_sources.py", |
| "scripts/research_watchlist.py", |
| "scripts/refresh_research_evidence.py", |
| "scripts/export_tts_sample.py", |
| "scripts/export_ocr_sample_images.py", |
| "scripts/score_voice_listening.py", |
| "scripts/score_tts_preprocessor.py", |
| "scripts/score_external_ocr.py", |
| "scripts/model_promotion_gate.py", |
| "scripts/next_deployment_step.py", |
| "scripts/deployment_status.py", |
| "scripts/check_test_environment.py", |
| "docs/live-deployment-checklist.md", |
| "docs/father-user-guide.md", |
| "docs/source-evidence.md", |
| "docs/huggingface-model-metadata.md", |
| "docs/research-watchlist.md", |
| "docs/recommended-free-stack.md", |
| "docs/recommended-decision-card.md", |
| "docs/recommended-decision-card.json", |
| ] |
| for relative in required: |
| path = root / relative |
| add(checks, "Files", relative, "PASS" if path.exists() else "FAIL", "exists" if path.exists() else "missing") |
| return checks |
|
|
|
|
| def check_vercel(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| config_path = root / "vercel.json" |
| config = load_json(config_path) |
| add(checks, "Vercel", "config readable", "PASS" if config else "FAIL", str(config_path)) |
| if not config: |
| return checks |
| has_builds = "builds" in config |
| has_functions = "functions" in config |
| add( |
| checks, |
| "Vercel", |
| "functions/builds conflict", |
| "FAIL" if has_builds and has_functions else "PASS", |
| "do not use builds and functions together" if has_builds and has_functions else "ok", |
| ) |
| rewrites = config.get("rewrites") or [] |
| rewrite_ok = any(item.get("destination") == "/api/index.py" for item in rewrites if isinstance(item, dict)) |
| add(checks, "Vercel", "FastAPI rewrite", "PASS" if rewrite_ok else "FAIL", json.dumps(rewrites)) |
| functions = config.get("functions") or {} |
| api_function = functions.get("api/index.py", {}) |
| max_duration = api_function.get("maxDuration", 0) |
| add( |
| checks, |
| "Vercel", |
| "function maxDuration", |
| "PASS" if isinstance(max_duration, int) and max_duration >= 60 else "WARN", |
| str(max_duration), |
| ) |
| return checks |
|
|
|
|
| def check_worker(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| dockerfile_path = root / "Dockerfile.worker" |
| if not dockerfile_path.exists(): |
| add(checks, "Worker", "Dockerfile.worker", "FAIL", "missing") |
| return checks |
| dockerfile = dockerfile_path.read_text(encoding="utf-8") |
| add(checks, "Worker", "base image", "PASS" if "python:3.10" in dockerfile else "WARN", "Python 3.10 is expected") |
| for package in ["tesseract-ocr-ara", "espeak-ng", "ffmpeg"]: |
| add(checks, "Worker", f"apt package {package}", "PASS" if package in dockerfile else "FAIL", package) |
| for env_key in ["WORK_DIR", "DATABASE_PATH", "OCR_ENGINE=tesseract", "OCR_RENDER_ZOOM=2", "TESSERACT_PSM=4", "AUDIO_FORMAT=mp3"]: |
| add(checks, "Worker", f"env {env_key}", "PASS" if env_key in dockerfile else "WARN", env_key) |
| for arg in [ |
| "ARG INSTALL_QARI_OCR=0", |
| "ARG INSTALL_TAWKEED_OCR=0", |
| "ARG INSTALL_KATIB_OCR=0", |
| "ARG INSTALL_ARABIC_QWEN_OCR=0", |
| "ARG INSTALL_ARABIC_GLM_OCR=0", |
| "ARG INSTALL_BASEER_OCR=0", |
| "ARG INSTALL_PADDLEOCR_VL=0", |
| "ARG INSTALL_SUPERTONIC=0", |
| ]: |
| add( |
| checks, |
| "Worker", |
| f"optional build {arg.split()[1]}", |
| "PASS" if arg in dockerfile else "WARN", |
| "lets stronger workers install heavy OCR sidecars", |
| ) |
| optional_scripts = { |
| "scripts/setup_supertonic.sh": "SUPERTONIC", |
| "scripts/setup_qari_ocr.sh": "QARI_OCR", |
| "scripts/setup_tawkeed_ocr.sh": "TAWKEED_OCR", |
| "scripts/setup_katib_ocr.sh": "KATIB_OCR", |
| "scripts/setup_arabic_qwen_ocr.sh": "ARABIC_QWEN_OCR", |
| "scripts/setup_arabic_glm_ocr.sh": "ARABIC_GLM_OCR", |
| "scripts/setup_baseer_ocr.sh": "BASEER_OCR", |
| "scripts/setup_paddleocr_vl.sh": "PADDLEOCR_VL", |
| } |
| for optional_script, build_arg_name in optional_scripts.items(): |
| conditional_marker = f"if [ \"$INSTALL_{build_arg_name}\" = \"1\" ]" |
| add( |
| checks, |
| "Worker", |
| f"conditional {optional_script}", |
| "PASS" if optional_script in dockerfile and conditional_marker in dockerfile else "WARN", |
| "optional heavy OCR install is controlled by build arg", |
| ) |
| for source in dockerfile_copy_sources(dockerfile): |
| source_path = root / source |
| add( |
| checks, |
| "Worker", |
| f"COPY source {source}", |
| "PASS" if source_path.exists() else "FAIL", |
| "exists" if source_path.exists() else "missing", |
| ) |
| for script in sorted(referenced_scripts(dockerfile)): |
| script_path = root / script |
| add(checks, "Worker", f"referenced {script}", "PASS" if script_path.exists() else "FAIL", "exists" if script_path.exists() else "missing") |
| return checks |
|
|
|
|
| def check_ignore_files(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| gitignore = (root / ".gitignore").read_text(encoding="utf-8") if (root / ".gitignore").exists() else "" |
| dockerignore = (root / ".dockerignore").read_text(encoding="utf-8") if (root / ".dockerignore").exists() else "" |
| vercelignore = (root / ".vercelignore").read_text(encoding="utf-8") if (root / ".vercelignore").exists() else "" |
| for name, content in [(".gitignore", gitignore), (".dockerignore", dockerignore), (".vercelignore", vercelignore)]: |
| add(checks, "Ignore", name, "PASS" if content else "WARN", "present" if content else "missing") |
| add(checks, "Ignore", f"{name} excludes .env", "PASS" if ".env" in content else "WARN", "secrets should not deploy") |
| add( |
| checks, |
| "Ignore", |
| f"{name} excludes outputs", |
| "PASS" if any(line.strip().rstrip("/") == "outputs" for line in content.splitlines()) else "WARN", |
| "generated handoffs/audio should not deploy or commit", |
| ) |
| for required in ["app", "api", "static", "requirements.txt"]: |
| excluded = any(line.strip() == required for line in vercelignore.splitlines()) |
| add(checks, "Ignore", f"Vercel keeps {required}", "FAIL" if excluded else "PASS", "not excluded" if not excluded else "excluded") |
| return checks |
|
|
|
|
| def check_deployment_handoff(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| path = root / "scripts" / "deployment_handoff.py" |
| text = path.read_text(encoding="utf-8", errors="replace") if path.exists() else "" |
| markers = { |
| "deployment prep command": "prepare_live_deployment.py", |
| "Vercel CLI production deploy": "vercel --prod --yes", |
| "Vercel worker env command": "vercel env add WORKER_BASE_URL production", |
| "Hugging Face deploy helper": "deploy_hf_space.py", |
| "Hugging Face Space bundle path": "outputs/huggingface-space", |
| "deployment status command": "deployment_status.py --worker-url", |
| "Vercel worker diagnostic command": "hosted_preflight.py", |
| "Vercel worker CORS diagnostic": "site worker CORS ready", |
| "live proof command": "prove_live_deployment.py", |
| "worker verification report": "worker-verification.json", |
| "copy paste secret block": "Copy/paste secret values", |
| "copy paste build args": "Copy/paste balanced build args", |
| "copy paste Vercel env": "Copy/paste Vercel values", |
| "Vercel direct cloud fallback cleanup": "vercel env rm ENABLE_DIRECT_CLOUD_TTS production --yes", |
| "Vercel Hugging Face token cleanup": "vercel env rm HF_API_TOKEN production --yes", |
| "Vercel Hugging Face model cleanup": "vercel env rm HF_TTS_MODEL production --yes", |
| "Vercel default voice cleanup": "vercel env rm DEFAULT_VOICE_ID production --yes", |
| } |
| for name, marker in markers.items(): |
| add( |
| checks, |
| "Handoff", |
| name, |
| "PASS" if marker in text else "FAIL", |
| marker if marker in text else f"missing marker: {marker}", |
| ) |
| return checks |
|
|
|
|
| def check_deployment_quickstart(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| path = root / "outputs" / "deployment-quickstart.md" |
| if not path.exists(): |
| add(checks, "Quickstart", "deployment quickstart", "WARN", "run scripts/prepare_live_deployment.py") |
| return checks |
| text = path.read_text(encoding="utf-8", errors="replace") |
| markers = { |
| "Hugging Face new Space link": "https://huggingface.co/new-space", |
| "Hugging Face Docker docs link": "https://huggingface.co/docs/hub/main/en/spaces-sdks-docker", |
| "Vercel new project link": "https://vercel.com/new", |
| "Vercel FastAPI docs link": "https://vercel.com/docs/frameworks/backend/fastapi", |
| "handoff command": "deployment_handoff.py", |
| "hosted preflight command": "hosted_preflight.py", |
| "hosted preflight report": "hosted-preflight.json", |
| "worker CORS ready reminder": "site worker CORS ready", |
| "worker bundle path": "outputs\\huggingface-space", |
| "final proof warning": "live-deployment-proof.json", |
| "safety checklist": "Safety Checklist", |
| "shared secret reminder": "same generated `SECRET_KEY`", |
| "worker URL reminder": "`WORKER_BASE_URL` at the exact Hugging Face Space URL", |
| "direct cloud fallback disabled": "direct Hugging Face TTS disabled", |
| "temporary cloud variable cleanup": "ENABLE_DIRECT_CLOUD_TTS", |
| "current next command": "Current Next Command", |
| } |
| for name, marker in markers.items(): |
| add( |
| checks, |
| "Quickstart", |
| name, |
| "PASS" if marker in text else "FAIL", |
| marker if marker in text else f"missing marker: {marker}", |
| ) |
| return checks |
|
|
|
|
| def check_env_validator(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| path = root / "scripts" / "validate_deployment_env.py" |
| text = path.read_text(encoding="utf-8", errors="replace") if path.exists() else "" |
| markers = { |
| "validator rejects Vercel as worker URL": "WORKER_BASE_URL is not Vercel site", |
| "validator prefers Hugging Face Space worker": "WORKER_BASE_URL uses free worker host", |
| "validator rejects wildcard CORS": "CORS_ORIGINS has no wildcard", |
| "validator rejects non-Vercel CORS origins": "CORS_ORIGINS are Vercel origins", |
| } |
| for name, marker in markers.items(): |
| add( |
| checks, |
| "Env validator", |
| name, |
| "PASS" if marker in text else "FAIL", |
| marker if marker in text else f"missing marker: {marker}", |
| ) |
| return checks |
|
|
|
|
| def check_app_contract(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| app_path = root / "app" / "main.py" |
| static_path = root / "static" / "app.js" |
| verify_site_path = root / "scripts" / "verify_site.py" |
| audit_path = root / "scripts" / "audit_goal_readiness.py" |
| app_text = app_path.read_text(encoding="utf-8", errors="replace") if app_path.exists() else "" |
| static_text = static_path.read_text(encoding="utf-8", errors="replace") if static_path.exists() else "" |
| verify_text = verify_site_path.read_text(encoding="utf-8", errors="replace") if verify_site_path.exists() else "" |
| audit_text = audit_path.read_text(encoding="utf-8", errors="replace") if audit_path.exists() else "" |
| markers = { |
| "health exposes productionReady": (app_text, '"productionReady"'), |
| "health exposes nextAction": (app_text, '"nextAction"'), |
| "health blocks direct cloud fallback readiness": (app_text, "not direct_cloud_fallback"), |
| "ui shows deployment next action": (static_text, "deploymentStatus.nextAction"), |
| "ui warns when productionReady false": (static_text, "deploymentStatus.productionReady === false"), |
| "site verifier checks productionReady": (verify_text, "site production worker ready"), |
| "site verifier checks worker diagnostics": (verify_text, "site worker reachable from vercel"), |
| "site verifier checks worker CORS": (verify_text, "site worker CORS ready"), |
| "goal audit requires productionReady proof": (audit_text, "site production worker ready"), |
| "goal audit requires worker diagnostics proof": (audit_text, "site worker reachable from vercel"), |
| "goal audit requires worker CORS proof": (audit_text, "site worker CORS ready"), |
| } |
| for name, (text, marker) in markers.items(): |
| add( |
| checks, |
| "App contract", |
| name, |
| "PASS" if marker in text else "FAIL", |
| marker if marker in text else f"missing marker: {marker}", |
| ) |
| return checks |
|
|
|
|
| def check_hf_space_export(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| export_dir = root / "outputs" / "huggingface-space" |
| if not export_dir.exists(): |
| add(checks, "HF Space", "export bundle", "WARN", "run scripts/export_hf_space.py --force") |
| return checks |
| required = [ |
| "Dockerfile", |
| "README.md", |
| ".dockerignore", |
| EXPORT_MANIFEST_NAME, |
| "requirements.txt", |
| "requirements-silma.txt", |
| "requirements-supertonic.txt", |
| "requirements-paddleocr.txt", |
| "requirements-paddleocr-vl.txt", |
| "requirements-qari-ocr.txt", |
| "requirements-tawkeed-ocr.txt", |
| "requirements-katib-ocr.txt", |
| "requirements-arabic-qwen-ocr.txt", |
| "requirements-arabic-glm-ocr.txt", |
| "requirements-baseer-ocr.txt", |
| ".export-complete", |
| "app/main.py", |
| "api/index.py", |
| "static/index.html", |
| "scripts/setup_silma.sh", |
| "scripts/setup_supertonic.sh", |
| "scripts/setup_paddleocr.sh", |
| "scripts/setup_paddleocr_vl.sh", |
| "scripts/setup_qari_ocr.sh", |
| "scripts/setup_tawkeed_ocr.sh", |
| "scripts/setup_katib_ocr.sh", |
| "scripts/setup_arabic_qwen_ocr.sh", |
| "scripts/setup_arabic_glm_ocr.sh", |
| "scripts/setup_baseer_ocr.sh", |
| "scripts/qari_ocr_extract.py", |
| "scripts/tawkeed_ocr_extract.py", |
| "scripts/katib_ocr_extract.py", |
| "scripts/arabic_qwen_ocr_extract.py", |
| "scripts/arabic_glm_ocr_extract.py", |
| "scripts/baseer_ocr_extract.py", |
| "scripts/audit_goal_readiness.py", |
| "scripts/prove_local_readiness.py", |
| "scripts/prove_live_deployment.py", |
| "scripts/deployment_handoff.py", |
| "scripts/prepare_live_deployment.py", |
| "scripts/validate_deployment_env.py", |
| "scripts/verify_site.py", |
| "scripts/check_research_sources.py", |
| "scripts/research_watchlist.py", |
| "scripts/refresh_research_evidence.py", |
| "scripts/export_tts_sample.py", |
| "scripts/export_ocr_sample_images.py", |
| "scripts/score_voice_listening.py", |
| "scripts/score_tts_preprocessor.py", |
| "scripts/score_external_ocr.py", |
| "scripts/model_promotion_gate.py", |
| "scripts/next_deployment_step.py", |
| "scripts/deployment_status.py", |
| "docs/live-deployment-checklist.md", |
| "docs/father-user-guide.md", |
| "docs/source-evidence.md", |
| "docs/huggingface-model-metadata.md", |
| "docs/research-watchlist.md", |
| "docs/recommended-free-stack.md", |
| "docs/recommended-decision-card.md", |
| "docs/recommended-decision-card.json", |
| ] |
| forbidden = [".env", "uploads", "outputs", "data", "test_pdfs", ".venv", ".venv-silma", ".venv-ocr"] |
| for relative in required: |
| path = export_dir / relative |
| add(checks, "HF Space", f"bundle has {relative}", "PASS" if path.exists() else "FAIL", "exists" if path.exists() else "missing") |
| manifest_path = export_dir / EXPORT_MANIFEST_NAME |
| manifest = load_json(manifest_path) |
| manifest_files = manifest.get("files") if isinstance(manifest, dict) else None |
| expected_manifest = expected_export_manifest(root) |
| if isinstance(manifest_files, dict): |
| missing_manifest = sorted(path for path in expected_manifest if path not in manifest_files) |
| changed_manifest = sorted( |
| path for path, digest in expected_manifest.items() if manifest_files.get(path) != digest |
| ) |
| extra_manifest = sorted(path for path in manifest_files if path not in expected_manifest) |
| stale_detail = ( |
| f"missing={len(missing_manifest)} changed={len(changed_manifest)} extra={len(extra_manifest)}" |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle manifest matches source", |
| "PASS" if not missing_manifest and not changed_manifest else "FAIL", |
| stale_detail, |
| ) |
| else: |
| add(checks, "HF Space", "bundle manifest matches source", "FAIL", "missing or invalid manifest") |
| app_text = (export_dir / "app" / "main.py").read_text(encoding="utf-8", errors="replace") if (export_dir / "app" / "main.py").exists() else "" |
| static_text = ( |
| (export_dir / "static" / "app.js").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "static" / "app.js").exists() |
| else "" |
| ) |
| verify_site_text = ( |
| (export_dir / "scripts" / "verify_site.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "verify_site.py").exists() |
| else "" |
| ) |
| qari_text = ( |
| (export_dir / "scripts" / "qari_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "qari_ocr_extract.py").exists() |
| else "" |
| ) |
| qari_default = "Qari-OCR-0.4.0-VL-4B-Instruct" |
| tawkeed_text = ( |
| (export_dir / "scripts" / "tawkeed_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "tawkeed_ocr_extract.py").exists() |
| else "" |
| ) |
| tawkeed_default = "tawkeed-sa/tawkeed-ocr" |
| katib_text = ( |
| (export_dir / "scripts" / "katib_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "katib_ocr_extract.py").exists() |
| else "" |
| ) |
| katib_default = "Katib-Qwen3.5-0.8B-0.1" |
| arabic_qwen_text = ( |
| (export_dir / "scripts" / "arabic_qwen_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "arabic_qwen_ocr_extract.py").exists() |
| else "" |
| ) |
| arabic_qwen_requirements = ( |
| (export_dir / "requirements-arabic-qwen-ocr.txt").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "requirements-arabic-qwen-ocr.txt").exists() |
| else "" |
| ) |
| arabic_qwen_default = "Arabic-Qwen3.5-OCR-v4" |
| arabic_glm_text = ( |
| (export_dir / "scripts" / "arabic_glm_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "arabic_glm_ocr_extract.py").exists() |
| else "" |
| ) |
| arabic_glm_default = "Arabic-GLM-OCR-v2" |
| baseer_text = ( |
| (export_dir / "scripts" / "baseer_ocr_extract.py").read_text(encoding="utf-8", errors="replace") |
| if (export_dir / "scripts" / "baseer_ocr_extract.py").exists() |
| else "" |
| ) |
| baseer_default = "Baseer-OCR-V1.0" |
| add( |
| checks, |
| "HF Space", |
| "bundle uses QARI-OCR 0.4 default", |
| "PASS" if qari_default in app_text and qari_default in qari_text else "FAIL", |
| qari_default, |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle uses Tawkeed Arabic OCR default", |
| "PASS" if tawkeed_default in app_text and tawkeed_default in tawkeed_text else "FAIL", |
| tawkeed_default, |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle uses KATIB Arabic OCR default", |
| "PASS" if katib_default in app_text and katib_default in katib_text else "FAIL", |
| katib_default, |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle uses Arabic-Qwen3.5 OCR default", |
| "PASS" if arabic_qwen_default in app_text and arabic_qwen_default in arabic_qwen_text else "FAIL", |
| arabic_qwen_default, |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle has Arabic-Qwen3.5 model-specific runner", |
| "PASS" |
| if all( |
| marker in arabic_qwen_text |
| for marker in ["Qwen3_5ForConditionalGeneration", "process_vision_info", "trust_remote_code=True"] |
| ) |
| and "qwen-vl-utils" in arabic_qwen_requirements |
| else "FAIL", |
| "Qwen3_5 loader plus qwen-vl-utils", |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle uses Arabic-GLM OCR default", |
| "PASS" if arabic_glm_default in app_text and arabic_glm_default in arabic_glm_text else "FAIL", |
| arabic_glm_default, |
| ) |
| add( |
| checks, |
| "HF Space", |
| "bundle uses Baseer OCR default", |
| "PASS" if baseer_default in app_text and baseer_default in baseer_text else "FAIL", |
| baseer_default, |
| ) |
| bundle_contract_markers = { |
| "bundle health exposes productionReady": (app_text, '"productionReady"'), |
| "bundle health exposes nextAction": (app_text, '"nextAction"'), |
| "bundle health rejects direct cloud fallback readiness": (app_text, "not direct_cloud_fallback"), |
| "bundle UI shows deployment next action": (static_text, "deploymentStatus.nextAction"), |
| "bundle UI warns when productionReady false": (static_text, "deploymentStatus.productionReady === false"), |
| "bundle verifier checks productionReady": (verify_site_text, "site production worker ready"), |
| "bundle verifier checks worker diagnostics": (verify_site_text, "site worker reachable from vercel"), |
| "bundle verifier checks worker CORS": (verify_site_text, "site worker CORS ready"), |
| } |
| for name, (text, marker) in bundle_contract_markers.items(): |
| add( |
| checks, |
| "HF Space", |
| name, |
| "PASS" if marker in text else "FAIL", |
| marker if marker in text else f"missing marker: {marker}", |
| ) |
| for relative in forbidden: |
| path = export_dir / relative |
| add(checks, "HF Space", f"bundle excludes {relative}", "PASS" if not path.exists() else "FAIL", "excluded" if not path.exists() else "present") |
| return checks |
|
|
|
|
| def collect_checks(root: Path = ROOT_DIR) -> list[Check]: |
| checks: list[Check] = [] |
| checks.extend(check_required_files(root)) |
| checks.extend(check_vercel(root)) |
| checks.extend(check_worker(root)) |
| checks.extend(check_ignore_files(root)) |
| checks.extend(check_deployment_handoff(root)) |
| checks.extend(check_deployment_quickstart(root)) |
| checks.extend(check_env_validator(root)) |
| checks.extend(check_app_contract(root)) |
| checks.extend(check_hf_space_export(root)) |
| return checks |
|
|
|
|
| def summarize(checks: list[Check]) -> dict[str, object]: |
| counts = {"PASS": 0, "WARN": 0, "FAIL": 0} |
| for check in checks: |
| counts[check.status] += 1 |
| return { |
| "ready": counts["FAIL"] == 0, |
| "counts": counts, |
| "checks": [check.__dict__ for check in checks], |
| } |
|
|
|
|
| def print_table(checks: list[Check]) -> None: |
| for check in checks: |
| print(f"{check.status:<4} {check.category:<8} {check.name:<36} {check.detail}") |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser(description="Check Vercel and Docker worker deployment files before deploying.") |
| parser.add_argument("--json", action="store_true", help="Print JSON instead of a compact table.") |
| args = parser.parse_args() |
|
|
| checks = collect_checks() |
| if args.json: |
| print(json.dumps(summarize(checks), indent=2)) |
| else: |
| print_table(checks) |
| if any(check.status == "FAIL" for check in checks): |
| raise SystemExit(1) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|