bbkdevops's picture
download
raw
4.23 kB
"""Model sizing and RTX 3090 preflight helpers."""
from __future__ import annotations
from datetime import datetime, timezone
import json
from pathlib import Path
from model.config import OmegaConfig, purefield_config
def dense_param_estimate(cfg: OmegaConfig) -> int:
emb = cfg.vocab_size * cfg.dim
per_layer = 4 * cfg.dim * cfg.dim + 2 * cfg.dim * cfg.dim * cfg.ffn_mult + cfg.dim * 4
return int(emb + per_layer * cfg.n_layers)
def purefield_param_estimate(cfg: OmegaConfig) -> int:
d = cfg.dim
r = cfg.memory_ranks
j = cfg.timescale_count
rank = cfg.low_rank
emb = cfg.vocab_size * d
shared = (d * r) * 2 + d * d + d * j * 2 + r * j + (d * 3) * d
adapter_per_layer = rank * (
(d + r) * 2
+ (d + d)
+ (d + j) * 2
+ (r + j)
+ (d * 3 + d)
)
regen = d * max(cfg.regen_kv_rank, 1) * 4 if cfg.regen_kv_enabled else 0
norms_head = cfg.n_layers * d * 2 + d * cfg.vocab_size * (0 if cfg.tie_word_embeddings else 1)
return int(emb + shared + adapter_per_layer * cfg.n_layers + regen + norms_head)
def vram_estimate(params: int) -> dict:
return {
"params": params,
"fp32_weights_gb": params * 4 / 1024**3,
"bf16_weights_gb": params * 2 / 1024**3,
"int4_raw_weights_gb": params * 0.5 / 1024**3,
"adam_training_min_gb": params * 16 / 1024**3,
"rtx_3090_24gb_dense_full_train_feasible": params * 16 / 1024**3 < 22,
"rtx_3090_24gb_int4_or_adapter_feasible": params * 0.5 / 1024**3 < 18,
}
def build_model_preflight(out_dir: str | Path, size: str = "4b", architecture: str = "purefield") -> dict:
if architecture != "purefield":
raise ValueError("only purefield preflight is supported here")
if size not in {"4b", "12b"}:
raise ValueError("preflight size must be one of: 4b, 12b")
cfg = purefield_config(size)
dense_params = dense_param_estimate(cfg)
purefield_params = purefield_param_estimate(cfg)
adapter_training_feasible = vram_estimate(purefield_params)["rtx_3090_24gb_int4_or_adapter_feasible"]
report = {
"schema_version": "tinymind-model-preflight-v2",
"created_at": datetime.now(timezone.utc).isoformat(),
"architecture": architecture,
"size": size,
"gpu_target": "RTX 3090 24GB",
"config": cfg.__dict__,
"dense_class_params": dense_params,
"purefield_estimated_params": purefield_params,
"dense_class_vram": vram_estimate(dense_params),
"purefield_vram": vram_estimate(purefield_params),
"compression_plan": {
"weight_format": "int4_4x8_pairwise_sparse",
"optimizer_path": "adapter_or_lora_bitsharp_tuning",
"exact_memory": "ReGenesis ledger stores long context exactly; model state stays bounded.",
"data_policy": "UltraPure/lineage-audited data only; block unsupported world-best claims.",
},
"rtx_3090_execution": {
"full_dense_adam_training_feasible": False,
"adapter_or_int4_experiment_feasible": adapter_training_feasible,
"recommended_batching": "microbatch=1, gradient_checkpointing=true, CPU/NVMe offload for 12B-class dense weights",
},
"recommendation": (
"Use PureField/ReGenesis with gradient checkpointing, adapter/BitSharp tuning, "
"small microbatches, CPU/offload if needed, and INT4 sparse export. "
f"Do not attempt full dense Adam training of {size.upper()} on a single 3090."
),
"world_best_claim_allowed": False,
}
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
path = out / f"tinymind_{size}_preflight.json"
report["report_path"] = str(path)
path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
return report
def build_4b_preflight(out_dir: str | Path, architecture: str = "purefield") -> dict:
return build_model_preflight(out_dir, size="4b", architecture=architecture)
def build_12b_preflight(out_dir: str | Path, architecture: str = "purefield") -> dict:
return build_model_preflight(out_dir, size="12b", architecture=architecture)

Xet Storage Details

Size:
4.23 kB
·
Xet hash:
a70391e55f5f4bf607c3e47781968595b30b6527ae9b6f0fb2f0e4cffef64e37

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.