from __future__ import annotations import argparse import json from dataclasses import asdict, dataclass from pathlib import Path from typing import Any PERMISSIVE_LICENSE_MARKERS = ("apache-2.0", "mit", "bsd", "openrail") RESTRICTED_LICENSE_MARKERS = ( "cc-by-nc", "non-commercial", "noncommercial", "llama", "gemma", "gpl", "license not declared", "not established", "check model card", "other", ) @dataclass(frozen=True) class GateCheck: name: str status: str detail: str def load_json(path: Path) -> dict[str, Any]: if not path.exists(): raise FileNotFoundError(f"Score JSON not found: {path}") payload = json.loads(path.read_text(encoding="utf-8")) if not isinstance(payload, dict): raise ValueError(f"Score JSON must contain an object: {path}") return payload def license_status(license_text: str, *, allow_restricted: bool = False) -> GateCheck: normalized = license_text.strip().lower() if not normalized: return GateCheck("license", "FAIL", "missing license") if any(marker in normalized for marker in RESTRICTED_LICENSE_MARKERS): status = "PASS" if allow_restricted else "FAIL" return GateCheck("license", status, f"restricted or unclear: {license_text}") if any(marker in normalized for marker in PERMISSIVE_LICENSE_MARKERS): return GateCheck("license", "PASS", license_text) status = "PASS" if allow_restricted else "WARN" return GateCheck("license", status, f"unrecognized license: {license_text}") def score_status(payload: dict[str, Any], kind: str) -> GateCheck: promotion_ready = bool(payload.get("promotionReady") or payload.get("ready")) if kind == "ocr": comparison = payload.get("comparison") or {} best = payload.get("best") or {} if not promotion_ready: return GateCheck("score", "FAIL", "OCR score report is not promotion-ready") if not comparison.get("beatsBaseline"): return GateCheck("score", "FAIL", "OCR candidate does not beat the wired baseline") return GateCheck( "score", "PASS", f"best={best.get('label', '-')} quality={best.get('quality', '-')} delta={comparison.get('scoreDelta', '-')}", ) if not promotion_ready: return GateCheck("score", "FAIL", f"{kind} score report is not promotion-ready") best = payload.get("best") or {} score = best.get("weightedScore", "-") label = best.get("voiceId") or best.get("label") or "-" return GateCheck("score", "PASS", f"best={label} weighted={score}") def bool_check(name: str, ok: bool, detail: str) -> GateCheck: return GateCheck(name, "PASS" if ok else "FAIL", detail) def evaluate_promotion( *, candidate_name: str, kind: str, license_text: str, score_payload: dict[str, Any], same_sample: bool, runtime_ok: bool, privacy_ok: bool, human_reviewed: bool, allow_restricted_license: bool = False, ) -> dict[str, Any]: checks = [ score_status(score_payload, kind), bool_check("same sample", same_sample, "same pages/text used for baseline and candidate"), license_status(license_text, allow_restricted=allow_restricted_license), bool_check("runtime", runtime_ok, "worker can handle model size, speed, cold starts, and memory"), bool_check("privacy/deployment", privacy_ok, "no unsafe external upload path or unclear service/API terms"), bool_check("human review", human_reviewed, "Arabic text/audio manually reviewed for meaning, order, and comfort"), ] failed = [check for check in checks if check.status == "FAIL"] warned = [check for check in checks if check.status == "WARN"] ready = not failed and not warned return { "ready": ready, "candidate": candidate_name, "kind": kind, "checks": [asdict(check) for check in checks], "summary": "promotion ready" if ready else "keep benchmark-only", } def markdown_value(value: Any) -> str: if value is None or value == "": return "-" return str(value) def write_report(path: Path, payload: dict[str, Any]) -> None: lines = [ "# Model Promotion Gate", "", f"Candidate: {markdown_value(payload.get('candidate'))}", f"Type: {markdown_value(payload.get('kind'))}", f"Decision: {markdown_value(payload.get('summary'))}", "", "| Check | Status | Detail |", "| --- | --- | --- |", ] for check in payload.get("checks", []): lines.append( "| " + " | ".join( [ markdown_value(check.get("name")), markdown_value(check.get("status")), markdown_value(check.get("detail")), ] ) + " |" ) lines.extend( [ "", "## Rule", "", "Promote only when the candidate wins on the exact same Arabic sample, the license is acceptable, the worker can run it, privacy/deployment terms are clear, and a human review confirms the Arabic text or audio is comfortable and faithful.", "Anything else stays benchmark-only.", ] ) path.parent.mkdir(parents=True, exist_ok=True) path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") def main() -> None: parser = argparse.ArgumentParser(description="Gate OCR/TTS candidates before promoting them into the Arabic audio reader stack.") parser.add_argument("--candidate-name", required=True) parser.add_argument("--kind", choices=["ocr", "tts", "preprocessor"], required=True) parser.add_argument("--license", required=True, dest="license_text") parser.add_argument("--score-json", type=Path, required=True, help="JSON output from score_external_ocr.py, score_voice_listening.py, or score_tts_preprocessor.py.") parser.add_argument("--same-sample", action="store_true", help="Confirm the candidate and baseline used the exact same pages/text.") parser.add_argument("--runtime-ok", action="store_true", help="Confirm the target worker can run the candidate acceptably.") parser.add_argument("--privacy-ok", action="store_true", help="Confirm external service/API or local deployment terms are acceptable.") parser.add_argument("--human-reviewed", action="store_true", help="Confirm Arabic text/audio was manually reviewed.") parser.add_argument("--allow-restricted-license", action="store_true", help="Allow restricted licenses for personal-only experiments.") parser.add_argument("--write-report", type=Path) parser.add_argument("--json", action="store_true") args = parser.parse_args() payload = evaluate_promotion( candidate_name=args.candidate_name, kind=args.kind, license_text=args.license_text, score_payload=load_json(args.score_json), same_sample=args.same_sample, runtime_ok=args.runtime_ok, privacy_ok=args.privacy_ok, human_reviewed=args.human_reviewed, allow_restricted_license=args.allow_restricted_license, ) if args.write_report: write_report(args.write_report, payload) payload["reportPath"] = str(args.write_report) if args.json: print(json.dumps(payload, ensure_ascii=False, indent=2)) else: print(f"{payload['candidate']}: {payload['summary']}") for check in payload["checks"]: print(f"- {check['status']} {check['name']}: {check['detail']}") if not payload["ready"]: raise SystemExit(1) if __name__ == "__main__": main()