Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /claims.py

bbkdevops

about 1 month ago

download

raw

7.19 kB

	"""Auditable claim-readiness gates for TinyMind."""

	from __future__ import annotations

	from dataclasses import dataclass
	import json
	from pathlib import Path
	from typing import Any, Mapping


	REQUIRED_CLAIM_CATEGORIES = (
	"quality",
	"size",
	"context",
	"stability",
	"speed",
	"quantization",
	)

	REQUIRED_ARTIFACTS = (
	"checkpoint",
	"int4_artifact",
	"dataset_manifest",
	"objective_report",
	)


	@dataclass(frozen=True)
	class ClaimVerdict:
	world_best_claim_allowed: bool
	allowed_claims: list[str]
	blocked_claims: list[str]
	missing: list[str]

	def to_dict(self) -> dict:
	return {
	"world_best_claim_allowed": self.world_best_claim_allowed,
	"allowed_claims": self.allowed_claims,
	"blocked_claims": self.blocked_claims,
	"missing": self.missing,
	}


	def _artifact_exists(value: Any) -> bool:
	if not isinstance(value, str) or not value.strip():
	return False
	return Path(value).exists()


	def _rank1_comparison_ok(row: Mapping[str, Any]) -> bool:
	required = ("leaderboard", "metric", "model_score", "best_baseline_score", "rank", "source_url", "as_of")
	if not all(row.get(key) not in (None, "") for key in required):
	return False
	if not str(row["source_url"]).startswith(("https://", "http://")):
	return False
	try:
	return int(row["rank"]) == 1 and float(row["model_score"]) >= float(row["best_baseline_score"])
	except (TypeError, ValueError):
	return False


	def evaluate_claim_readiness(evidence: Mapping[str, Any]) -> dict:
	missing: list[str] = []
	local_missing: list[str] = []
	measurements = evidence.get("measurements", {})
	if not isinstance(measurements, Mapping):
	measurements = {}

	local_smoke = bool(measurements)
	for category in REQUIRED_CLAIM_CATEGORIES:
	row = measurements.get(category)
	if not isinstance(row, Mapping) or row.get("passed") is not True:
	local_missing.append(f"measurement:{category}")

	artifacts = evidence.get("artifacts", {})
	if not isinstance(artifacts, Mapping):
	artifacts = {}
	for name in REQUIRED_ARTIFACTS:
	if not _artifact_exists(artifacts.get(name)):
	local_missing.append(f"artifact:{name}")

	missing.extend(local_missing)

	comparisons = evidence.get("comparisons", [])
	if not isinstance(comparisons, list):
	comparisons = []
	rank1 = [row for row in comparisons if isinstance(row, Mapping) and _rank1_comparison_ok(row)]
	if len(rank1) < 3:
	missing.append("comparisons:at_least_3_rank1_external")

	world_best_allowed = not missing
	allowed = ["local_smoke_verified"] if local_smoke else []
	if local_smoke and not local_missing:
	allowed.append("local_train_eval_complete")
	blocked: list[str] = []
	claim_scope = str(evidence.get("claim_scope", "world_best")).strip() or "world_best"
	if world_best_allowed:
	allowed.append(claim_scope)
	else:
	blocked.append(claim_scope)

	return ClaimVerdict(
	world_best_claim_allowed=world_best_allowed,
	allowed_claims=allowed,
	blocked_claims=blocked,
	missing=missing,
	).to_dict()


	def build_claim_dossier(evidence: Mapping[str, Any]) -> dict:
	verdict = evaluate_claim_readiness(evidence)
	return {
	"schema_version": "tinymind-claim-dossier-v1",
	"model_name": evidence.get("model_name", "TinyMind"),
	"claim_scope": evidence.get("claim_scope", "unspecified"),
	"as_of": evidence.get("as_of"),
	"verdict": verdict,
	"measurements": evidence.get("measurements", {}),
	"comparisons": evidence.get("comparisons", []),
	"artifacts": evidence.get("artifacts", {}),
	"rules": {
	"required_categories": list(REQUIRED_CLAIM_CATEGORIES),
	"required_artifacts": list(REQUIRED_ARTIFACTS),
	"external_rank1_comparisons_required": 3,
	"no_world_best_claim_without_saved_json_or_csv_metrics": True,
	},
	}


	def _load_json(path: str \| Path) -> dict:
	return json.loads(Path(path).read_text(encoding="utf-8"))


	def _markdown(dossier: Mapping[str, Any]) -> str:
	verdict = dossier["verdict"]
	status = "ALLOWED" if verdict["world_best_claim_allowed"] else "BLOCKED"
	lines = [
	"# TinyMind Claim Dossier",
	"",
	f"- Model: {dossier.get('model_name')}",
	f"- Claim scope: {dossier.get('claim_scope')}",
	f"- As of: {dossier.get('as_of')}",
	f"- World-best claim: {status}",
	"",
	"## Allowed Claims",
	]
	for claim in verdict["allowed_claims"]:
	lines.append(f"- {claim}")
	if not verdict["allowed_claims"]:
	lines.append("- None")

	lines.extend(["", "## Blocked Claims"])
	for claim in verdict["blocked_claims"]:
	lines.append(f"- {claim}")
	if not verdict["blocked_claims"]:
	lines.append("- None")

	lines.extend(["", "## Missing Evidence"])
	for item in verdict["missing"]:
	lines.append(f"- {item}")
	if not verdict["missing"]:
	lines.append("- None")

	lines.extend(["", "## Measurements"])
	measurements = dossier.get("measurements", {})
	if isinstance(measurements, Mapping):
	for category in REQUIRED_CLAIM_CATEGORIES:
	row = measurements.get(category, {})
	passed = row.get("passed") if isinstance(row, Mapping) else None
	score = row.get("score") if isinstance(row, Mapping) else None
	artifact = row.get("artifact") if isinstance(row, Mapping) else None
	lines.append(f"- {category}: passed={passed}, score={score}, artifact={artifact}")

	lines.extend(["", "## External Comparisons"])
	comparisons = dossier.get("comparisons", [])
	if isinstance(comparisons, list) and comparisons:
	for row in comparisons:
	if not isinstance(row, Mapping):
	continue
	lines.append(
	"- "
	f"{row.get('leaderboard')}: rank={row.get('rank')}, "
	f"metric={row.get('metric')}, model={row.get('model_score')}, "
	f"baseline={row.get('best_baseline_score')}, "
	f"as_of={row.get('as_of')}, source={row.get('source_url')}"
	)
	else:
	lines.append("- None")

	lines.extend(
	[
	"",
	"## Rule",
	"No best-in-world claim is permitted unless every required category passes, required artifacts exist, and at least three dated external rank-1 comparisons are saved.",
	"",
	]
	)
	return "\n".join(lines)


	def write_claim_dossier(evidence_path: str \| Path, markdown_path: str \| Path) -> dict:
	evidence_file = Path(evidence_path)
	out = Path(markdown_path)
	dossier = build_claim_dossier(_load_json(evidence_file))
	out.parent.mkdir(parents=True, exist_ok=True)
	out.write_text(_markdown(dossier), encoding="utf-8")
	json_path = out.with_suffix(".json")
	json_path.write_text(json.dumps(dossier, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	return dossier

Xet Storage Details

Size:: 7.19 kB
Xet hash:: 7cc11334468f2dff8c85f388d8f8f25be6c3383695d4d25dc3a8722da65b6265

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.