Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /long_memory.py
| """Long-memory evidence helpers for TinyMind PureField.""" | |
| from __future__ import annotations | |
| import json | |
| import math | |
| from pathlib import Path | |
| from typing import Iterable, Mapping | |
| def _shape_numel(shape: list[int]) -> int: | |
| total = 1 | |
| for value in shape: | |
| total *= int(value) | |
| return total | |
| def compute_memory_efficiency(rows: Iterable[Mapping], dtype_bytes: int = 4) -> dict: | |
| data = sorted((dict(row) for row in rows), key=lambda row: int(row["context_tokens"])) | |
| if not data: | |
| raise ValueError("expected at least one context smoke row") | |
| shapes = [list(row.get("memory_shape", [])) for row in data] | |
| constant_memory = all(shape == shapes[0] for shape in shapes) and all( | |
| bool(row.get("logits_finite", False)) for row in data | |
| ) | |
| state_bytes = _shape_numel(shapes[0]) * int(dtype_bytes) | |
| min_context = int(data[0]["context_tokens"]) | |
| max_context = int(data[-1]["context_tokens"]) | |
| local_window_max = max(int(row.get("local_window_tokens", 0)) for row in data) | |
| return { | |
| "claim": "TinyMind PureField recurrent memory state remains constant over measured context lengths.", | |
| "world_best_claim": False, | |
| "constant_memory": constant_memory, | |
| "contexts": [int(row["context_tokens"]) for row in data], | |
| "min_context_tokens": min_context, | |
| "max_context_tokens": max_context, | |
| "memory_shape": shapes[0], | |
| "state_bytes": state_bytes, | |
| "dtype_bytes": int(dtype_bytes), | |
| "local_window_max": local_window_max, | |
| "state_bytes_per_token_at_min_context": state_bytes / max(min_context, 1), | |
| "state_bytes_per_token_at_max_context": state_bytes / max(max_context, 1), | |
| "compression_ratio_vs_full_kv_proxy": max_context / max(local_window_max, 1), | |
| "finite_all": all(bool(row.get("logits_finite", False)) for row in data), | |
| } | |
| def _markdown(report: Mapping) -> str: | |
| lines = [ | |
| "# TinyMind Long-Memory Evidence", | |
| "", | |
| f"- Claim: {report['claim']}", | |
| "- World-best claim: not asserted", | |
| f"- Constant recurrent memory: {report['constant_memory']}", | |
| f"- Measured contexts: {report['contexts']}", | |
| f"- Max context tokens: {report['max_context_tokens']}", | |
| f"- Memory shape: {report['memory_shape']}", | |
| f"- State bytes: {report['state_bytes']}", | |
| f"- State bytes/token at max context: {report['state_bytes_per_token_at_max_context']:.6f}", | |
| f"- Local window max: {report['local_window_max']}", | |
| f"- Full-KV proxy compression ratio: {report['compression_ratio_vs_full_kv_proxy']:.2f}x", | |
| "", | |
| "This report proves a measured local property only. External rank-1 comparison is still required before any best-in-world claim.", | |
| "", | |
| ] | |
| return "\n".join(lines) | |
| def write_long_memory_report( | |
| rows: Iterable[Mapping], | |
| json_path: str | Path, | |
| dtype_bytes: int = 4, | |
| ) -> dict: | |
| out = Path(json_path) | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| report = compute_memory_efficiency(rows, dtype_bytes=dtype_bytes) | |
| out.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| markdown_path = out.with_suffix(".md") | |
| markdown_path.write_text(_markdown(report), encoding="utf-8") | |
| return {"json_path": str(out), "markdown_path": str(markdown_path), **report} | |
Xet Storage Details
- Size:
- 3.39 kB
- Xet hash:
- e54a6973cf534cc740da1cb895a01dbdedf8c04e5a44962584cf60c678b3ceeb
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.