bbkdevops's picture
download
raw
7.19 kB
"""Auditable claim-readiness gates for TinyMind."""
from __future__ import annotations
from dataclasses import dataclass
import json
from pathlib import Path
from typing import Any, Mapping
REQUIRED_CLAIM_CATEGORIES = (
"quality",
"size",
"context",
"stability",
"speed",
"quantization",
)
REQUIRED_ARTIFACTS = (
"checkpoint",
"int4_artifact",
"dataset_manifest",
"objective_report",
)
@dataclass(frozen=True)
class ClaimVerdict:
world_best_claim_allowed: bool
allowed_claims: list[str]
blocked_claims: list[str]
missing: list[str]
def to_dict(self) -> dict:
return {
"world_best_claim_allowed": self.world_best_claim_allowed,
"allowed_claims": self.allowed_claims,
"blocked_claims": self.blocked_claims,
"missing": self.missing,
}
def _artifact_exists(value: Any) -> bool:
if not isinstance(value, str) or not value.strip():
return False
return Path(value).exists()
def _rank1_comparison_ok(row: Mapping[str, Any]) -> bool:
required = ("leaderboard", "metric", "model_score", "best_baseline_score", "rank", "source_url", "as_of")
if not all(row.get(key) not in (None, "") for key in required):
return False
if not str(row["source_url"]).startswith(("https://", "http://")):
return False
try:
return int(row["rank"]) == 1 and float(row["model_score"]) >= float(row["best_baseline_score"])
except (TypeError, ValueError):
return False
def evaluate_claim_readiness(evidence: Mapping[str, Any]) -> dict:
missing: list[str] = []
local_missing: list[str] = []
measurements = evidence.get("measurements", {})
if not isinstance(measurements, Mapping):
measurements = {}
local_smoke = bool(measurements)
for category in REQUIRED_CLAIM_CATEGORIES:
row = measurements.get(category)
if not isinstance(row, Mapping) or row.get("passed") is not True:
local_missing.append(f"measurement:{category}")
artifacts = evidence.get("artifacts", {})
if not isinstance(artifacts, Mapping):
artifacts = {}
for name in REQUIRED_ARTIFACTS:
if not _artifact_exists(artifacts.get(name)):
local_missing.append(f"artifact:{name}")
missing.extend(local_missing)
comparisons = evidence.get("comparisons", [])
if not isinstance(comparisons, list):
comparisons = []
rank1 = [row for row in comparisons if isinstance(row, Mapping) and _rank1_comparison_ok(row)]
if len(rank1) < 3:
missing.append("comparisons:at_least_3_rank1_external")
world_best_allowed = not missing
allowed = ["local_smoke_verified"] if local_smoke else []
if local_smoke and not local_missing:
allowed.append("local_train_eval_complete")
blocked: list[str] = []
claim_scope = str(evidence.get("claim_scope", "world_best")).strip() or "world_best"
if world_best_allowed:
allowed.append(claim_scope)
else:
blocked.append(claim_scope)
return ClaimVerdict(
world_best_claim_allowed=world_best_allowed,
allowed_claims=allowed,
blocked_claims=blocked,
missing=missing,
).to_dict()
def build_claim_dossier(evidence: Mapping[str, Any]) -> dict:
verdict = evaluate_claim_readiness(evidence)
return {
"schema_version": "tinymind-claim-dossier-v1",
"model_name": evidence.get("model_name", "TinyMind"),
"claim_scope": evidence.get("claim_scope", "unspecified"),
"as_of": evidence.get("as_of"),
"verdict": verdict,
"measurements": evidence.get("measurements", {}),
"comparisons": evidence.get("comparisons", []),
"artifacts": evidence.get("artifacts", {}),
"rules": {
"required_categories": list(REQUIRED_CLAIM_CATEGORIES),
"required_artifacts": list(REQUIRED_ARTIFACTS),
"external_rank1_comparisons_required": 3,
"no_world_best_claim_without_saved_json_or_csv_metrics": True,
},
}
def _load_json(path: str | Path) -> dict:
return json.loads(Path(path).read_text(encoding="utf-8"))
def _markdown(dossier: Mapping[str, Any]) -> str:
verdict = dossier["verdict"]
status = "ALLOWED" if verdict["world_best_claim_allowed"] else "BLOCKED"
lines = [
"# TinyMind Claim Dossier",
"",
f"- Model: {dossier.get('model_name')}",
f"- Claim scope: {dossier.get('claim_scope')}",
f"- As of: {dossier.get('as_of')}",
f"- World-best claim: {status}",
"",
"## Allowed Claims",
]
for claim in verdict["allowed_claims"]:
lines.append(f"- {claim}")
if not verdict["allowed_claims"]:
lines.append("- None")
lines.extend(["", "## Blocked Claims"])
for claim in verdict["blocked_claims"]:
lines.append(f"- {claim}")
if not verdict["blocked_claims"]:
lines.append("- None")
lines.extend(["", "## Missing Evidence"])
for item in verdict["missing"]:
lines.append(f"- {item}")
if not verdict["missing"]:
lines.append("- None")
lines.extend(["", "## Measurements"])
measurements = dossier.get("measurements", {})
if isinstance(measurements, Mapping):
for category in REQUIRED_CLAIM_CATEGORIES:
row = measurements.get(category, {})
passed = row.get("passed") if isinstance(row, Mapping) else None
score = row.get("score") if isinstance(row, Mapping) else None
artifact = row.get("artifact") if isinstance(row, Mapping) else None
lines.append(f"- {category}: passed={passed}, score={score}, artifact={artifact}")
lines.extend(["", "## External Comparisons"])
comparisons = dossier.get("comparisons", [])
if isinstance(comparisons, list) and comparisons:
for row in comparisons:
if not isinstance(row, Mapping):
continue
lines.append(
"- "
f"{row.get('leaderboard')}: rank={row.get('rank')}, "
f"metric={row.get('metric')}, model={row.get('model_score')}, "
f"baseline={row.get('best_baseline_score')}, "
f"as_of={row.get('as_of')}, source={row.get('source_url')}"
)
else:
lines.append("- None")
lines.extend(
[
"",
"## Rule",
"No best-in-world claim is permitted unless every required category passes, required artifacts exist, and at least three dated external rank-1 comparisons are saved.",
"",
]
)
return "\n".join(lines)
def write_claim_dossier(evidence_path: str | Path, markdown_path: str | Path) -> dict:
evidence_file = Path(evidence_path)
out = Path(markdown_path)
dossier = build_claim_dossier(_load_json(evidence_file))
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(_markdown(dossier), encoding="utf-8")
json_path = out.with_suffix(".json")
json_path.write_text(json.dumps(dossier, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
return dossier

Xet Storage Details

Size:
7.19 kB
·
Xet hash:
7cc11334468f2dff8c85f388d8f8f25be6c3383695d4d25dc3a8722da65b6265

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.