Buckets:
| """Auditable claim-readiness gates for TinyMind.""" | |
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| import json | |
| from pathlib import Path | |
| from typing import Any, Mapping | |
| REQUIRED_CLAIM_CATEGORIES = ( | |
| "quality", | |
| "size", | |
| "context", | |
| "stability", | |
| "speed", | |
| "quantization", | |
| ) | |
| REQUIRED_ARTIFACTS = ( | |
| "checkpoint", | |
| "int4_artifact", | |
| "dataset_manifest", | |
| "objective_report", | |
| ) | |
| class ClaimVerdict: | |
| world_best_claim_allowed: bool | |
| allowed_claims: list[str] | |
| blocked_claims: list[str] | |
| missing: list[str] | |
| def to_dict(self) -> dict: | |
| return { | |
| "world_best_claim_allowed": self.world_best_claim_allowed, | |
| "allowed_claims": self.allowed_claims, | |
| "blocked_claims": self.blocked_claims, | |
| "missing": self.missing, | |
| } | |
| def _artifact_exists(value: Any) -> bool: | |
| if not isinstance(value, str) or not value.strip(): | |
| return False | |
| return Path(value).exists() | |
| def _rank1_comparison_ok(row: Mapping[str, Any]) -> bool: | |
| required = ("leaderboard", "metric", "model_score", "best_baseline_score", "rank", "source_url", "as_of") | |
| if not all(row.get(key) not in (None, "") for key in required): | |
| return False | |
| if not str(row["source_url"]).startswith(("https://", "http://")): | |
| return False | |
| try: | |
| return int(row["rank"]) == 1 and float(row["model_score"]) >= float(row["best_baseline_score"]) | |
| except (TypeError, ValueError): | |
| return False | |
| def evaluate_claim_readiness(evidence: Mapping[str, Any]) -> dict: | |
| missing: list[str] = [] | |
| local_missing: list[str] = [] | |
| measurements = evidence.get("measurements", {}) | |
| if not isinstance(measurements, Mapping): | |
| measurements = {} | |
| local_smoke = bool(measurements) | |
| for category in REQUIRED_CLAIM_CATEGORIES: | |
| row = measurements.get(category) | |
| if not isinstance(row, Mapping) or row.get("passed") is not True: | |
| local_missing.append(f"measurement:{category}") | |
| artifacts = evidence.get("artifacts", {}) | |
| if not isinstance(artifacts, Mapping): | |
| artifacts = {} | |
| for name in REQUIRED_ARTIFACTS: | |
| if not _artifact_exists(artifacts.get(name)): | |
| local_missing.append(f"artifact:{name}") | |
| missing.extend(local_missing) | |
| comparisons = evidence.get("comparisons", []) | |
| if not isinstance(comparisons, list): | |
| comparisons = [] | |
| rank1 = [row for row in comparisons if isinstance(row, Mapping) and _rank1_comparison_ok(row)] | |
| if len(rank1) < 3: | |
| missing.append("comparisons:at_least_3_rank1_external") | |
| world_best_allowed = not missing | |
| allowed = ["local_smoke_verified"] if local_smoke else [] | |
| if local_smoke and not local_missing: | |
| allowed.append("local_train_eval_complete") | |
| blocked: list[str] = [] | |
| claim_scope = str(evidence.get("claim_scope", "world_best")).strip() or "world_best" | |
| if world_best_allowed: | |
| allowed.append(claim_scope) | |
| else: | |
| blocked.append(claim_scope) | |
| return ClaimVerdict( | |
| world_best_claim_allowed=world_best_allowed, | |
| allowed_claims=allowed, | |
| blocked_claims=blocked, | |
| missing=missing, | |
| ).to_dict() | |
| def build_claim_dossier(evidence: Mapping[str, Any]) -> dict: | |
| verdict = evaluate_claim_readiness(evidence) | |
| return { | |
| "schema_version": "tinymind-claim-dossier-v1", | |
| "model_name": evidence.get("model_name", "TinyMind"), | |
| "claim_scope": evidence.get("claim_scope", "unspecified"), | |
| "as_of": evidence.get("as_of"), | |
| "verdict": verdict, | |
| "measurements": evidence.get("measurements", {}), | |
| "comparisons": evidence.get("comparisons", []), | |
| "artifacts": evidence.get("artifacts", {}), | |
| "rules": { | |
| "required_categories": list(REQUIRED_CLAIM_CATEGORIES), | |
| "required_artifacts": list(REQUIRED_ARTIFACTS), | |
| "external_rank1_comparisons_required": 3, | |
| "no_world_best_claim_without_saved_json_or_csv_metrics": True, | |
| }, | |
| } | |
| def _load_json(path: str | Path) -> dict: | |
| return json.loads(Path(path).read_text(encoding="utf-8")) | |
| def _markdown(dossier: Mapping[str, Any]) -> str: | |
| verdict = dossier["verdict"] | |
| status = "ALLOWED" if verdict["world_best_claim_allowed"] else "BLOCKED" | |
| lines = [ | |
| "# TinyMind Claim Dossier", | |
| "", | |
| f"- Model: {dossier.get('model_name')}", | |
| f"- Claim scope: {dossier.get('claim_scope')}", | |
| f"- As of: {dossier.get('as_of')}", | |
| f"- World-best claim: {status}", | |
| "", | |
| "## Allowed Claims", | |
| ] | |
| for claim in verdict["allowed_claims"]: | |
| lines.append(f"- {claim}") | |
| if not verdict["allowed_claims"]: | |
| lines.append("- None") | |
| lines.extend(["", "## Blocked Claims"]) | |
| for claim in verdict["blocked_claims"]: | |
| lines.append(f"- {claim}") | |
| if not verdict["blocked_claims"]: | |
| lines.append("- None") | |
| lines.extend(["", "## Missing Evidence"]) | |
| for item in verdict["missing"]: | |
| lines.append(f"- {item}") | |
| if not verdict["missing"]: | |
| lines.append("- None") | |
| lines.extend(["", "## Measurements"]) | |
| measurements = dossier.get("measurements", {}) | |
| if isinstance(measurements, Mapping): | |
| for category in REQUIRED_CLAIM_CATEGORIES: | |
| row = measurements.get(category, {}) | |
| passed = row.get("passed") if isinstance(row, Mapping) else None | |
| score = row.get("score") if isinstance(row, Mapping) else None | |
| artifact = row.get("artifact") if isinstance(row, Mapping) else None | |
| lines.append(f"- {category}: passed={passed}, score={score}, artifact={artifact}") | |
| lines.extend(["", "## External Comparisons"]) | |
| comparisons = dossier.get("comparisons", []) | |
| if isinstance(comparisons, list) and comparisons: | |
| for row in comparisons: | |
| if not isinstance(row, Mapping): | |
| continue | |
| lines.append( | |
| "- " | |
| f"{row.get('leaderboard')}: rank={row.get('rank')}, " | |
| f"metric={row.get('metric')}, model={row.get('model_score')}, " | |
| f"baseline={row.get('best_baseline_score')}, " | |
| f"as_of={row.get('as_of')}, source={row.get('source_url')}" | |
| ) | |
| else: | |
| lines.append("- None") | |
| lines.extend( | |
| [ | |
| "", | |
| "## Rule", | |
| "No best-in-world claim is permitted unless every required category passes, required artifacts exist, and at least three dated external rank-1 comparisons are saved.", | |
| "", | |
| ] | |
| ) | |
| return "\n".join(lines) | |
| def write_claim_dossier(evidence_path: str | Path, markdown_path: str | Path) -> dict: | |
| evidence_file = Path(evidence_path) | |
| out = Path(markdown_path) | |
| dossier = build_claim_dossier(_load_json(evidence_file)) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_text(_markdown(dossier), encoding="utf-8") | |
| json_path = out.with_suffix(".json") | |
| json_path.write_text(json.dumps(dossier, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| return dossier | |
Xet Storage Details
- Size:
- 7.19 kB
- Xet hash:
- 7cc11334468f2dff8c85f388d8f8f25be6c3383695d4d25dc3a8722da65b6265
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.