Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /long_memory.py

bbkdevops

about 1 month ago

download

raw

3.39 kB

	"""Long-memory evidence helpers for TinyMind PureField."""

	from __future__ import annotations

	import json
	import math
	from pathlib import Path
	from typing import Iterable, Mapping


	def _shape_numel(shape: list[int]) -> int:
	total = 1
	for value in shape:
	total *= int(value)
	return total


	def compute_memory_efficiency(rows: Iterable[Mapping], dtype_bytes: int = 4) -> dict:
	data = sorted((dict(row) for row in rows), key=lambda row: int(row["context_tokens"]))
	if not data:
	raise ValueError("expected at least one context smoke row")

	shapes = [list(row.get("memory_shape", [])) for row in data]
	constant_memory = all(shape == shapes[0] for shape in shapes) and all(
	bool(row.get("logits_finite", False)) for row in data
	)
	state_bytes = _shape_numel(shapes[0]) * int(dtype_bytes)
	min_context = int(data[0]["context_tokens"])
	max_context = int(data[-1]["context_tokens"])
	local_window_max = max(int(row.get("local_window_tokens", 0)) for row in data)
	return {
	"claim": "TinyMind PureField recurrent memory state remains constant over measured context lengths.",
	"world_best_claim": False,
	"constant_memory": constant_memory,
	"contexts": [int(row["context_tokens"]) for row in data],
	"min_context_tokens": min_context,
	"max_context_tokens": max_context,
	"memory_shape": shapes[0],
	"state_bytes": state_bytes,
	"dtype_bytes": int(dtype_bytes),
	"local_window_max": local_window_max,
	"state_bytes_per_token_at_min_context": state_bytes / max(min_context, 1),
	"state_bytes_per_token_at_max_context": state_bytes / max(max_context, 1),
	"compression_ratio_vs_full_kv_proxy": max_context / max(local_window_max, 1),
	"finite_all": all(bool(row.get("logits_finite", False)) for row in data),
	}


	def _markdown(report: Mapping) -> str:
	lines = [
	"# TinyMind Long-Memory Evidence",
	"",
	f"- Claim: {report['claim']}",
	"- World-best claim: not asserted",
	f"- Constant recurrent memory: {report['constant_memory']}",
	f"- Measured contexts: {report['contexts']}",
	f"- Max context tokens: {report['max_context_tokens']}",
	f"- Memory shape: {report['memory_shape']}",
	f"- State bytes: {report['state_bytes']}",
	f"- State bytes/token at max context: {report['state_bytes_per_token_at_max_context']:.6f}",
	f"- Local window max: {report['local_window_max']}",
	f"- Full-KV proxy compression ratio: {report['compression_ratio_vs_full_kv_proxy']:.2f}x",
	"",
	"This report proves a measured local property only. External rank-1 comparison is still required before any best-in-world claim.",
	"",
	]
	return "\n".join(lines)


	def write_long_memory_report(
	rows: Iterable[Mapping],
	json_path: str \| Path,
	dtype_bytes: int = 4,
	) -> dict:
	out = Path(json_path)
	out.parent.mkdir(parents=True, exist_ok=True)
	report = compute_memory_efficiency(rows, dtype_bytes=dtype_bytes)
	out.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	markdown_path = out.with_suffix(".md")
	markdown_path.write_text(_markdown(report), encoding="utf-8")
	return {"json_path": str(out), "markdown_path": str(markdown_path), **report}

Xet Storage Details

Size:: 3.39 kB
Xet hash:: e54a6973cf534cc740da1cb895a01dbdedf8c04e5a44962584cf60c678b3ceeb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.