Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /model_sizing.py

bbkdevops

about 1 month ago

download

raw

4.23 kB

	"""Model sizing and RTX 3090 preflight helpers."""

	from __future__ import annotations

	from datetime import datetime, timezone
	import json
	from pathlib import Path

	from model.config import OmegaConfig, purefield_config


	def dense_param_estimate(cfg: OmegaConfig) -> int:
	emb = cfg.vocab_size * cfg.dim
	per_layer = 4 * cfg.dim * cfg.dim + 2 * cfg.dim * cfg.dim * cfg.ffn_mult + cfg.dim * 4
	return int(emb + per_layer * cfg.n_layers)


	def purefield_param_estimate(cfg: OmegaConfig) -> int:
	d = cfg.dim
	r = cfg.memory_ranks
	j = cfg.timescale_count
	rank = cfg.low_rank
	emb = cfg.vocab_size * d
	shared = (d * r) * 2 + d * d + d * j * 2 + r * j + (d * 3) * d
	adapter_per_layer = rank * (
	(d + r) * 2
	+ (d + d)
	+ (d + j) * 2
	+ (r + j)
	+ (d * 3 + d)
	)
	regen = d * max(cfg.regen_kv_rank, 1) * 4 if cfg.regen_kv_enabled else 0
	norms_head = cfg.n_layers * d * 2 + d * cfg.vocab_size * (0 if cfg.tie_word_embeddings else 1)
	return int(emb + shared + adapter_per_layer * cfg.n_layers + regen + norms_head)


	def vram_estimate(params: int) -> dict:
	return {
	"params": params,
	"fp32_weights_gb": params * 4 / 1024**3,
	"bf16_weights_gb": params * 2 / 1024**3,
	"int4_raw_weights_gb": params * 0.5 / 1024**3,
	"adam_training_min_gb": params * 16 / 1024**3,
	"rtx_3090_24gb_dense_full_train_feasible": params * 16 / 1024**3 < 22,
	"rtx_3090_24gb_int4_or_adapter_feasible": params * 0.5 / 1024**3 < 18,
	}


	def build_model_preflight(out_dir: str \| Path, size: str = "4b", architecture: str = "purefield") -> dict:
	if architecture != "purefield":
	raise ValueError("only purefield preflight is supported here")
	if size not in {"4b", "12b"}:
	raise ValueError("preflight size must be one of: 4b, 12b")
	cfg = purefield_config(size)
	dense_params = dense_param_estimate(cfg)
	purefield_params = purefield_param_estimate(cfg)
	adapter_training_feasible = vram_estimate(purefield_params)["rtx_3090_24gb_int4_or_adapter_feasible"]
	report = {
	"schema_version": "tinymind-model-preflight-v2",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"architecture": architecture,
	"size": size,
	"gpu_target": "RTX 3090 24GB",
	"config": cfg.__dict__,
	"dense_class_params": dense_params,
	"purefield_estimated_params": purefield_params,
	"dense_class_vram": vram_estimate(dense_params),
	"purefield_vram": vram_estimate(purefield_params),
	"compression_plan": {
	"weight_format": "int4_4x8_pairwise_sparse",
	"optimizer_path": "adapter_or_lora_bitsharp_tuning",
	"exact_memory": "ReGenesis ledger stores long context exactly; model state stays bounded.",
	"data_policy": "UltraPure/lineage-audited data only; block unsupported world-best claims.",
	},
	"rtx_3090_execution": {
	"full_dense_adam_training_feasible": False,
	"adapter_or_int4_experiment_feasible": adapter_training_feasible,
	"recommended_batching": "microbatch=1, gradient_checkpointing=true, CPU/NVMe offload for 12B-class dense weights",
	},
	"recommendation": (
	"Use PureField/ReGenesis with gradient checkpointing, adapter/BitSharp tuning, "
	"small microbatches, CPU/offload if needed, and INT4 sparse export. "
	f"Do not attempt full dense Adam training of {size.upper()} on a single 3090."
	),
	"world_best_claim_allowed": False,
	}
	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)
	path = out / f"tinymind_{size}_preflight.json"
	report["report_path"] = str(path)
	path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	return report


	def build_4b_preflight(out_dir: str \| Path, architecture: str = "purefield") -> dict:
	return build_model_preflight(out_dir, size="4b", architecture=architecture)


	def build_12b_preflight(out_dir: str \| Path, architecture: str = "purefield") -> dict:
	return build_model_preflight(out_dir, size="12b", architecture=architecture)

Xet Storage Details

Size:: 4.23 kB
Xet hash:: a70391e55f5f4bf607c3e47781968595b30b6527ae9b6f0fb2f0e4cffef64e37

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.