| from __future__ import annotations |
|
|
| import argparse |
| import json |
| from dataclasses import asdict, dataclass |
| from pathlib import Path |
| from typing import Any |
|
|
|
|
| PERMISSIVE_LICENSE_MARKERS = ("apache-2.0", "mit", "bsd", "openrail") |
| RESTRICTED_LICENSE_MARKERS = ( |
| "cc-by-nc", |
| "non-commercial", |
| "noncommercial", |
| "llama", |
| "gemma", |
| "gpl", |
| "license not declared", |
| "not established", |
| "check model card", |
| "other", |
| ) |
|
|
|
|
| @dataclass(frozen=True) |
| class GateCheck: |
| name: str |
| status: str |
| detail: str |
|
|
|
|
| def load_json(path: Path) -> dict[str, Any]: |
| if not path.exists(): |
| raise FileNotFoundError(f"Score JSON not found: {path}") |
| payload = json.loads(path.read_text(encoding="utf-8")) |
| if not isinstance(payload, dict): |
| raise ValueError(f"Score JSON must contain an object: {path}") |
| return payload |
|
|
|
|
| def license_status(license_text: str, *, allow_restricted: bool = False) -> GateCheck: |
| normalized = license_text.strip().lower() |
| if not normalized: |
| return GateCheck("license", "FAIL", "missing license") |
| if any(marker in normalized for marker in RESTRICTED_LICENSE_MARKERS): |
| status = "PASS" if allow_restricted else "FAIL" |
| return GateCheck("license", status, f"restricted or unclear: {license_text}") |
| if any(marker in normalized for marker in PERMISSIVE_LICENSE_MARKERS): |
| return GateCheck("license", "PASS", license_text) |
| status = "PASS" if allow_restricted else "WARN" |
| return GateCheck("license", status, f"unrecognized license: {license_text}") |
|
|
|
|
| def score_status(payload: dict[str, Any], kind: str) -> GateCheck: |
| promotion_ready = bool(payload.get("promotionReady") or payload.get("ready")) |
| if kind == "ocr": |
| comparison = payload.get("comparison") or {} |
| best = payload.get("best") or {} |
| if not promotion_ready: |
| return GateCheck("score", "FAIL", "OCR score report is not promotion-ready") |
| if not comparison.get("beatsBaseline"): |
| return GateCheck("score", "FAIL", "OCR candidate does not beat the wired baseline") |
| return GateCheck( |
| "score", |
| "PASS", |
| f"best={best.get('label', '-')} quality={best.get('quality', '-')} delta={comparison.get('scoreDelta', '-')}", |
| ) |
| if not promotion_ready: |
| return GateCheck("score", "FAIL", f"{kind} score report is not promotion-ready") |
| best = payload.get("best") or {} |
| score = best.get("weightedScore", "-") |
| label = best.get("voiceId") or best.get("label") or "-" |
| return GateCheck("score", "PASS", f"best={label} weighted={score}") |
|
|
|
|
| def bool_check(name: str, ok: bool, detail: str) -> GateCheck: |
| return GateCheck(name, "PASS" if ok else "FAIL", detail) |
|
|
|
|
| def evaluate_promotion( |
| *, |
| candidate_name: str, |
| kind: str, |
| license_text: str, |
| score_payload: dict[str, Any], |
| same_sample: bool, |
| runtime_ok: bool, |
| privacy_ok: bool, |
| human_reviewed: bool, |
| allow_restricted_license: bool = False, |
| ) -> dict[str, Any]: |
| checks = [ |
| score_status(score_payload, kind), |
| bool_check("same sample", same_sample, "same pages/text used for baseline and candidate"), |
| license_status(license_text, allow_restricted=allow_restricted_license), |
| bool_check("runtime", runtime_ok, "worker can handle model size, speed, cold starts, and memory"), |
| bool_check("privacy/deployment", privacy_ok, "no unsafe external upload path or unclear service/API terms"), |
| bool_check("human review", human_reviewed, "Arabic text/audio manually reviewed for meaning, order, and comfort"), |
| ] |
| failed = [check for check in checks if check.status == "FAIL"] |
| warned = [check for check in checks if check.status == "WARN"] |
| ready = not failed and not warned |
| return { |
| "ready": ready, |
| "candidate": candidate_name, |
| "kind": kind, |
| "checks": [asdict(check) for check in checks], |
| "summary": "promotion ready" if ready else "keep benchmark-only", |
| } |
|
|
|
|
| def markdown_value(value: Any) -> str: |
| if value is None or value == "": |
| return "-" |
| return str(value) |
|
|
|
|
| def write_report(path: Path, payload: dict[str, Any]) -> None: |
| lines = [ |
| "# Model Promotion Gate", |
| "", |
| f"Candidate: {markdown_value(payload.get('candidate'))}", |
| f"Type: {markdown_value(payload.get('kind'))}", |
| f"Decision: {markdown_value(payload.get('summary'))}", |
| "", |
| "| Check | Status | Detail |", |
| "| --- | --- | --- |", |
| ] |
| for check in payload.get("checks", []): |
| lines.append( |
| "| " |
| + " | ".join( |
| [ |
| markdown_value(check.get("name")), |
| markdown_value(check.get("status")), |
| markdown_value(check.get("detail")), |
| ] |
| ) |
| + " |" |
| ) |
| lines.extend( |
| [ |
| "", |
| "## Rule", |
| "", |
| "Promote only when the candidate wins on the exact same Arabic sample, the license is acceptable, the worker can run it, privacy/deployment terms are clear, and a human review confirms the Arabic text or audio is comfortable and faithful.", |
| "Anything else stays benchmark-only.", |
| ] |
| ) |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser(description="Gate OCR/TTS candidates before promoting them into the Arabic audio reader stack.") |
| parser.add_argument("--candidate-name", required=True) |
| parser.add_argument("--kind", choices=["ocr", "tts", "preprocessor"], required=True) |
| parser.add_argument("--license", required=True, dest="license_text") |
| parser.add_argument("--score-json", type=Path, required=True, help="JSON output from score_external_ocr.py, score_voice_listening.py, or score_tts_preprocessor.py.") |
| parser.add_argument("--same-sample", action="store_true", help="Confirm the candidate and baseline used the exact same pages/text.") |
| parser.add_argument("--runtime-ok", action="store_true", help="Confirm the target worker can run the candidate acceptably.") |
| parser.add_argument("--privacy-ok", action="store_true", help="Confirm external service/API or local deployment terms are acceptable.") |
| parser.add_argument("--human-reviewed", action="store_true", help="Confirm Arabic text/audio was manually reviewed.") |
| parser.add_argument("--allow-restricted-license", action="store_true", help="Allow restricted licenses for personal-only experiments.") |
| parser.add_argument("--write-report", type=Path) |
| parser.add_argument("--json", action="store_true") |
| args = parser.parse_args() |
|
|
| payload = evaluate_promotion( |
| candidate_name=args.candidate_name, |
| kind=args.kind, |
| license_text=args.license_text, |
| score_payload=load_json(args.score_json), |
| same_sample=args.same_sample, |
| runtime_ok=args.runtime_ok, |
| privacy_ok=args.privacy_ok, |
| human_reviewed=args.human_reviewed, |
| allow_restricted_license=args.allow_restricted_license, |
| ) |
| if args.write_report: |
| write_report(args.write_report, payload) |
| payload["reportPath"] = str(args.write_report) |
| if args.json: |
| print(json.dumps(payload, ensure_ascii=False, indent=2)) |
| else: |
| print(f"{payload['candidate']}: {payload['summary']}") |
| for check in payload["checks"]: |
| print(f"- {check['status']} {check['name']}: {check['detail']}") |
| if not payload["ready"]: |
| raise SystemExit(1) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|