Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /evaluation /native_8b_target.py

bbkdevops

about 1 month ago

download

raw

7.67 kB

	from __future__ import annotations

	from dataclasses import asdict, dataclass
	from datetime import datetime, timezone
	import json
	import math
	from pathlib import Path
	from typing import Any


	@dataclass(frozen=True)
	class Native8BProfile:
	name: str
	layers: int
	dim: int
	lanes: int
	seq_len: int
	vocab_size: int
	virtual_dim: int
	basis_rank: int
	facets: int
	train_batch_size: int
	learning_rate: float
	max_steps: int
	tier: str
	purpose: str


	def _estimate_axiom_regenesis_params(profile: Native8BProfile) -> dict[str, Any]:
	d = profile.dim
	layers = profile.layers
	vocab = profile.vocab_size
	lanes = profile.lanes
	memory_slots = max(4, min(16, lanes))
	memory_rank = max(8, min(64, d // 4))
	regen_rank = 4
	regen_top_k = 4

	top_level = (vocab * d) + (profile.seq_len * d) + (d * vocab) + (d * 4)
	axiom = (2 * d * profile.basis_rank) + (profile.facets * profile.basis_rank)
	local_exact = 4 * d * d
	memory = (
	2 * (d * memory_slots * memory_rank + memory_slots * memory_rank)
	+ (d * memory_slots + memory_slots)
	+ (memory_rank * d)
	)
	router = (d * lanes + lanes) + (lanes * d)
	regen = (
	vocab * d
	+ 2 * d
	+ 2 * (d * regen_rank * d)
	+ (2 * d * d)
	+ (d * d + d)
	)
	ffn = (d * (2 * d) + (2 * d)) + ((2 * d) * d + d)
	norms = 3 * d
	per_layer = axiom + local_exact + memory + router + regen + ffn + norms
	self_assess = (d * (2 * d) + (2 * d)) + ((2 * d) * d + d) + (d * 4 + 4)
	total = top_level + layers * per_layer + self_assess
	int4_gib = total * 0.5 / (1024**3)
	bf16_gib = total * 2 / (1024**3)
	return {
	"estimated_parameters": int(total),
	"estimated_parameters_b": total / 1_000_000_000,
	"per_layer_parameters": int(per_layer),
	"memory_slots": memory_slots,
	"memory_rank": memory_rank,
	"regen_top_k": regen_top_k,
	"regen_rank": regen_rank,
	"estimated_weight_gib": {
	"int4": int4_gib,
	"bf16": bf16_gib,
	},
	"training_feasibility": {
	"rtx_3090_full_train": False,
	"rtx_3090_inference_int4_possible_after_runtime_export": int4_gib < 18.0,
	"remote_gpu_training_required": total > 1_000_000_000,
	},
	}


	def _profiles() -> list[Native8BProfile]:
	return [
	Native8BProfile(
	name="local_bridge_3090_proven",
	layers=24,
	dim=192,
	lanes=12,
	seq_len=192,
	vocab_size=512,
	virtual_dim=32_768,
	basis_rank=48,
	facets=12,
	train_batch_size=4,
	learning_rate=5e-5,
	max_steps=1_800,
	tier="local",
	purpose="Continue the proven 25.96M native checkpoint until raw language stops breaking.",
	),
	Native8BProfile(
	name="local_ceiling_3090",
	layers=36,
	dim=512,
	lanes=32,
	seq_len=384,
	vocab_size=512,
	virtual_dim=262_144,
	basis_rank=96,
	facets=24,
	train_batch_size=1,
	learning_rate=3e-5,
	max_steps=800,
	tier="local_ceiling",
	purpose="Find the largest local native capacity that still trains on a 24GB 3090 without fake claims.",
	),
	Native8BProfile(
	name="axiom_regenesis_8b_target",
	layers=48,
	dim=2816,
	lanes=64,
	seq_len=1024,
	vocab_size=4096,
	virtual_dim=1_048_576,
	basis_rank=256,
	facets=64,
	train_batch_size=1,
	learning_rate=1.2e-5,
	max_steps=50_000,
	tier="remote_required",
	purpose="Actual 8B-class TinyMind-native target. Train on Colab/HF multi-GPU before any quality claim.",
	),
	]


	def _train_command(profile: Native8BProfile, dataset: str, out_dir: str) -> str:
	return (
	"python -m train.cli native-axiom-regenesis-train "
	f"--dataset {dataset} "
	f"--out-dir {out_dir}/{profile.name} "
	f"--max-steps {profile.max_steps} --eval-records 256 --limit-records 10000 "
	f"--dim {profile.dim} --layers {profile.layers} --lanes {profile.lanes} "
	f"--seq-len {profile.seq_len} --vocab-size {profile.vocab_size} --tokenizer-mode char_v1 "
	f"--virtual-dim {profile.virtual_dim} --basis-rank {profile.basis_rank} --facets {profile.facets} "
	f"--learning-rate {profile.learning_rate} --train-batch-size {profile.train_batch_size} --device cuda"
	)


	def build_native_8b_target_report(
	out_dir: str \| Path,
	*,
	dataset: str = "reports/omni_round_curriculum_xl_latest/omni_round_curriculum.jsonl",
	) -> dict[str, Any]:
	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)
	profiles = _profiles()
	enriched = []
	for profile in profiles:
	estimate = _estimate_axiom_regenesis_params(profile)
	payload = asdict(profile)
	payload["estimate"] = estimate
	payload["train_command"] = _train_command(profile, dataset, "reports/native_8b_target_runs")
	enriched.append(payload)

	target = next(item for item in enriched if item["name"] == "axiom_regenesis_8b_target")
	target_config_path = out / "axiom_regenesis_8b_target_config.json"
	target_config_path.write_text(json.dumps(target, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
	commands_path = out / "native_8b_training_commands.ps1"
	commands_path.write_text(
	"\n".join(
	[
	"# TinyMind native 8B target commands. Run local profiles first; remote_required needs Colab/HF GPU.",
	*(item["train_command"] for item in enriched),
	"",
	]
	),
	encoding="utf-8",
	)

	target_params = target["estimate"]["estimated_parameters_b"]
	report = {
	"schema": "tinymind.native_8b_target.v1",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"dataset": dataset,
	"strategy": {
	"not_data_only": True,
	"capacity_expansion": "24x192 proven -> 36x512 local ceiling -> 48x3456 8B remote target",
	"decoder_objective": "repeat unlikelihood + entropy floor + constrained deterministic generation",
	"memory": "ReGenesis/Evidence Ledger path keeps historical KV from scaling with context",
	"teacher_use": "Codex-like behavior can be distilled from operational traces; no Codex weights are copied.",
	},
	"profiles": enriched,
	"artifacts": {
	"target_config_path": str(target_config_path),
	"commands_path": str(commands_path),
	},
	"claim_gate": {
	"native_8b_target_config_created": True,
	"estimated_8b_class": bool(7.5 <= target_params <= 8.8),
	"actual_8b_checkpoint_exists": False,
	"rtx3090_full_train_claim_allowed": False,
	"quality_above_larger_models_claim_allowed": False,
	"leaderboard_safe_claim_allowed": False,
	"world_best_claim_allowed": False,
	"reason": "This creates the real 8B target path and commands. Quality claims require completed remote training plus external/raw benchmark evidence.",
	},
	}
	path = out / "native_8b_target_report.json"
	report["json_path"] = str(path)
	path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8")
	return report

Xet Storage Details

Size:: 7.67 kB
Xet hash:: 021933b07423e5a873c99bbb3c0ad30f988d10079574ef4d65772360dd3f3dfb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.