Spaces:

Syncre
/

arabic-audio-reader-worker

Running

App Files Files Community

arabic-audio-reader-worker / scripts /model_promotion_gate.py

Syncre

Deploy Arabic Audio Reader worker

2e1a095 verified 1 day ago

raw

history blame contribute delete

7.7 kB

	from __future__ import annotations

	import argparse
	import json
	from dataclasses import asdict, dataclass
	from pathlib import Path
	from typing import Any


	PERMISSIVE_LICENSE_MARKERS = ("apache-2.0", "mit", "bsd", "openrail")
	RESTRICTED_LICENSE_MARKERS = (
	"cc-by-nc",
	"non-commercial",
	"noncommercial",
	"llama",
	"gemma",
	"gpl",
	"license not declared",
	"not established",
	"check model card",
	"other",
	)


	@dataclass(frozen=True)
	class GateCheck:
	name: str
	status: str
	detail: str


	def load_json(path: Path) -> dict[str, Any]:
	if not path.exists():
	raise FileNotFoundError(f"Score JSON not found: {path}")
	payload = json.loads(path.read_text(encoding="utf-8"))
	if not isinstance(payload, dict):
	raise ValueError(f"Score JSON must contain an object: {path}")
	return payload


	def license_status(license_text: str, *, allow_restricted: bool = False) -> GateCheck:
	normalized = license_text.strip().lower()
	if not normalized:
	return GateCheck("license", "FAIL", "missing license")
	if any(marker in normalized for marker in RESTRICTED_LICENSE_MARKERS):
	status = "PASS" if allow_restricted else "FAIL"
	return GateCheck("license", status, f"restricted or unclear: {license_text}")
	if any(marker in normalized for marker in PERMISSIVE_LICENSE_MARKERS):
	return GateCheck("license", "PASS", license_text)
	status = "PASS" if allow_restricted else "WARN"
	return GateCheck("license", status, f"unrecognized license: {license_text}")


	def score_status(payload: dict[str, Any], kind: str) -> GateCheck:
	promotion_ready = bool(payload.get("promotionReady") or payload.get("ready"))
	if kind == "ocr":
	comparison = payload.get("comparison") or {}
	best = payload.get("best") or {}
	if not promotion_ready:
	return GateCheck("score", "FAIL", "OCR score report is not promotion-ready")
	if not comparison.get("beatsBaseline"):
	return GateCheck("score", "FAIL", "OCR candidate does not beat the wired baseline")
	return GateCheck(
	"score",
	"PASS",
	f"best={best.get('label', '-')} quality={best.get('quality', '-')} delta={comparison.get('scoreDelta', '-')}",
	)
	if not promotion_ready:
	return GateCheck("score", "FAIL", f"{kind} score report is not promotion-ready")
	best = payload.get("best") or {}
	score = best.get("weightedScore", "-")
	label = best.get("voiceId") or best.get("label") or "-"
	return GateCheck("score", "PASS", f"best={label} weighted={score}")


	def bool_check(name: str, ok: bool, detail: str) -> GateCheck:
	return GateCheck(name, "PASS" if ok else "FAIL", detail)


	def evaluate_promotion(
	*,
	candidate_name: str,
	kind: str,
	license_text: str,
	score_payload: dict[str, Any],
	same_sample: bool,
	runtime_ok: bool,
	privacy_ok: bool,
	human_reviewed: bool,
	allow_restricted_license: bool = False,
	) -> dict[str, Any]:
	checks = [
	score_status(score_payload, kind),
	bool_check("same sample", same_sample, "same pages/text used for baseline and candidate"),
	license_status(license_text, allow_restricted=allow_restricted_license),
	bool_check("runtime", runtime_ok, "worker can handle model size, speed, cold starts, and memory"),
	bool_check("privacy/deployment", privacy_ok, "no unsafe external upload path or unclear service/API terms"),
	bool_check("human review", human_reviewed, "Arabic text/audio manually reviewed for meaning, order, and comfort"),
	]
	failed = [check for check in checks if check.status == "FAIL"]
	warned = [check for check in checks if check.status == "WARN"]
	ready = not failed and not warned
	return {
	"ready": ready,
	"candidate": candidate_name,
	"kind": kind,
	"checks": [asdict(check) for check in checks],
	"summary": "promotion ready" if ready else "keep benchmark-only",
	}


	def markdown_value(value: Any) -> str:
	if value is None or value == "":
	return "-"
	return str(value)


	def write_report(path: Path, payload: dict[str, Any]) -> None:
	lines = [
	"# Model Promotion Gate",
	"",
	f"Candidate: {markdown_value(payload.get('candidate'))}",
	f"Type: {markdown_value(payload.get('kind'))}",
	f"Decision: {markdown_value(payload.get('summary'))}",
	"",
	"\| Check \| Status \| Detail \|",
	"\| --- \| --- \| --- \|",
	]
	for check in payload.get("checks", []):
	lines.append(
	"\| "
	+ " \| ".join(
	[
	markdown_value(check.get("name")),
	markdown_value(check.get("status")),
	markdown_value(check.get("detail")),
	]
	)
	+ " \|"
	)
	lines.extend(
	[
	"",
	"## Rule",
	"",
	"Promote only when the candidate wins on the exact same Arabic sample, the license is acceptable, the worker can run it, privacy/deployment terms are clear, and a human review confirms the Arabic text or audio is comfortable and faithful.",
	"Anything else stays benchmark-only.",
	]
	)
	path.parent.mkdir(parents=True, exist_ok=True)
	path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")


	def main() -> None:
	parser = argparse.ArgumentParser(description="Gate OCR/TTS candidates before promoting them into the Arabic audio reader stack.")
	parser.add_argument("--candidate-name", required=True)
	parser.add_argument("--kind", choices=["ocr", "tts", "preprocessor"], required=True)
	parser.add_argument("--license", required=True, dest="license_text")
	parser.add_argument("--score-json", type=Path, required=True, help="JSON output from score_external_ocr.py, score_voice_listening.py, or score_tts_preprocessor.py.")
	parser.add_argument("--same-sample", action="store_true", help="Confirm the candidate and baseline used the exact same pages/text.")
	parser.add_argument("--runtime-ok", action="store_true", help="Confirm the target worker can run the candidate acceptably.")
	parser.add_argument("--privacy-ok", action="store_true", help="Confirm external service/API or local deployment terms are acceptable.")
	parser.add_argument("--human-reviewed", action="store_true", help="Confirm Arabic text/audio was manually reviewed.")
	parser.add_argument("--allow-restricted-license", action="store_true", help="Allow restricted licenses for personal-only experiments.")
	parser.add_argument("--write-report", type=Path)
	parser.add_argument("--json", action="store_true")
	args = parser.parse_args()

	payload = evaluate_promotion(
	candidate_name=args.candidate_name,
	kind=args.kind,
	license_text=args.license_text,
	score_payload=load_json(args.score_json),
	same_sample=args.same_sample,
	runtime_ok=args.runtime_ok,
	privacy_ok=args.privacy_ok,
	human_reviewed=args.human_reviewed,
	allow_restricted_license=args.allow_restricted_license,
	)
	if args.write_report:
	write_report(args.write_report, payload)
	payload["reportPath"] = str(args.write_report)
	if args.json:
	print(json.dumps(payload, ensure_ascii=False, indent=2))
	else:
	print(f"{payload['candidate']}: {payload['summary']}")
	for check in payload["checks"]:
	print(f"- {check['status']} {check['name']}: {check['detail']}")
	if not payload["ready"]:
	raise SystemExit(1)


	if __name__ == "__main__":
	main()