Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /compact_intelligence.py
| """Compact intelligence dossier: quality per parameter across dimensions.""" | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| REFERENCE_MODELS = [ | |
| {"model": "sshleifer/tiny-gpt2", "params": 102_714, "scope": "size_reference"}, | |
| {"model": "distilgpt2", "params": 81_912_576, "scope": "size_reference"}, | |
| {"model": "gpt2", "params": 124_439_808, "scope": "size_reference"}, | |
| {"model": "7B class", "params": 7_000_000_000, "scope": "world_size_class"}, | |
| {"model": "70B class", "params": 70_000_000_000, "scope": "world_size_class"}, | |
| ] | |
| def _load(path: str | Path) -> dict: | |
| return json.loads(Path(path).read_text(encoding="utf-8")) | |
| def _score_from_loss(loss: float) -> float: | |
| return 100.0 / (1.0 + max(loss, 0.0)) | |
| def build_compact_intelligence_dossier( | |
| out_dir: str | Path, | |
| knowledge_report: str | Path, | |
| bitsharp_report: str | Path, | |
| logic_report: str | Path | None = None, | |
| official_report: str | Path | None = None, | |
| ) -> dict: | |
| knowledge = _load(knowledge_report) | |
| bit = _load(bitsharp_report) | |
| logic = _load(logic_report) if logic_report and Path(logic_report).exists() else {"accuracy": 0.0} | |
| official = _load(official_report) if official_report and Path(official_report).exists() else {"results": {"mmlu_pro": {"accuracy": 0.0}}} | |
| params = 139_186 | |
| if "artifacts" in knowledge: | |
| ckpt = Path(knowledge["artifacts"].get("checkpoint", "")) | |
| if ckpt.exists(): | |
| # Local trained model currently measured by official_hard_eval as 139,186 params. | |
| params = 139_186 | |
| dashboard = (knowledge.get("dashboard") or {}).get("summary_scores", {}) | |
| dimensions = { | |
| "local_pure_loss_quality": _score_from_loss(float(knowledge["train_eval"]["eval_loss"])), | |
| "bit_exactness": 100.0 * (1.0 - float(bit["after"]["bit_error_proxy"])), | |
| "natural_answer_style": 100.0 * float(knowledge["natural_gate"]["score"]), | |
| "knowledge_mmlu_pro_smoke": float(dashboard.get("knowledge", 0.0)), | |
| "instruction_following_smoke": float(dashboard.get("instruction", 0.0)), | |
| "translation_smoke": float(dashboard.get("translation", 0.0)), | |
| "logic_eval": 100.0 * float(logic.get("accuracy", 0.0)), | |
| "official_mmlu_pro_smoke": 100.0 * float(official.get("results", {}).get("mmlu_pro", {}).get("accuracy", 0.0)), | |
| } | |
| holistic = sum(dimensions.values()) / len(dimensions) | |
| dossier = { | |
| "schema_version": "tinymind-compact-intelligence-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "model": "TinyMind PureField/ReGenesis + Tools", | |
| "params": params, | |
| "dimensions": dimensions, | |
| "holistic_score": holistic, | |
| "score_per_million_params": holistic / max(params / 1_000_000, 1e-9), | |
| "reference_models": [ | |
| { | |
| **row, | |
| "relative_params": row["params"] / params, | |
| "measured_here": False, | |
| } | |
| for row in REFERENCE_MODELS | |
| ], | |
| "claim_gate": { | |
| "can_claim_smarter_than_larger_models": False, | |
| "reason": "Needs measured external comparable scores across all dimensions; current instruction and logic axes are not high enough.", | |
| "must_improve": [key for key, value in dimensions.items() if value < 50.0], | |
| }, | |
| "world_best_claim_allowed": False, | |
| } | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| json_path = out / "compact_intelligence_dossier.json" | |
| md_path = out / "compact_intelligence_dossier.md" | |
| dossier["json_path"] = str(json_path) | |
| dossier["markdown_path"] = str(md_path) | |
| json_path.write_text(json.dumps(dossier, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| md_path.write_text(_markdown(dossier), encoding="utf-8") | |
| return dossier | |
| def _markdown(dossier: dict) -> str: | |
| lines = [ | |
| "# TinyMind Compact Intelligence Dossier", | |
| "", | |
| f"- Params: {dossier['params']:,}", | |
| f"- Holistic score: {dossier['holistic_score']:.2f}", | |
| f"- Score per million params: {dossier['score_per_million_params']:.2f}", | |
| f"- Claim smarter than larger models: {dossier['claim_gate']['can_claim_smarter_than_larger_models']}", | |
| "", | |
| "## Dimensions", | |
| "", | |
| ] | |
| for key, value in dossier["dimensions"].items(): | |
| lines.append(f"- {key}: {value:.2f}") | |
| lines.extend(["", "## Must Improve", ""]) | |
| for key in dossier["claim_gate"]["must_improve"]: | |
| lines.append(f"- {key}") | |
| return "\n".join(lines) + "\n" | |
Xet Storage Details
- Size:
- 4.63 kB
- Xet hash:
- 69aad9f3bc62dd717b77ba84969c88dd29df8814c730b40c7b9468c7e5742862
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.