Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /model_sizing.py
| """Model sizing and RTX 3090 preflight helpers.""" | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| from model.config import OmegaConfig, purefield_config | |
| def dense_param_estimate(cfg: OmegaConfig) -> int: | |
| emb = cfg.vocab_size * cfg.dim | |
| per_layer = 4 * cfg.dim * cfg.dim + 2 * cfg.dim * cfg.dim * cfg.ffn_mult + cfg.dim * 4 | |
| return int(emb + per_layer * cfg.n_layers) | |
| def purefield_param_estimate(cfg: OmegaConfig) -> int: | |
| d = cfg.dim | |
| r = cfg.memory_ranks | |
| j = cfg.timescale_count | |
| rank = cfg.low_rank | |
| emb = cfg.vocab_size * d | |
| shared = (d * r) * 2 + d * d + d * j * 2 + r * j + (d * 3) * d | |
| adapter_per_layer = rank * ( | |
| (d + r) * 2 | |
| + (d + d) | |
| + (d + j) * 2 | |
| + (r + j) | |
| + (d * 3 + d) | |
| ) | |
| regen = d * max(cfg.regen_kv_rank, 1) * 4 if cfg.regen_kv_enabled else 0 | |
| norms_head = cfg.n_layers * d * 2 + d * cfg.vocab_size * (0 if cfg.tie_word_embeddings else 1) | |
| return int(emb + shared + adapter_per_layer * cfg.n_layers + regen + norms_head) | |
| def vram_estimate(params: int) -> dict: | |
| return { | |
| "params": params, | |
| "fp32_weights_gb": params * 4 / 1024**3, | |
| "bf16_weights_gb": params * 2 / 1024**3, | |
| "int4_raw_weights_gb": params * 0.5 / 1024**3, | |
| "adam_training_min_gb": params * 16 / 1024**3, | |
| "rtx_3090_24gb_dense_full_train_feasible": params * 16 / 1024**3 < 22, | |
| "rtx_3090_24gb_int4_or_adapter_feasible": params * 0.5 / 1024**3 < 18, | |
| } | |
| def build_model_preflight(out_dir: str | Path, size: str = "4b", architecture: str = "purefield") -> dict: | |
| if architecture != "purefield": | |
| raise ValueError("only purefield preflight is supported here") | |
| if size not in {"4b", "12b"}: | |
| raise ValueError("preflight size must be one of: 4b, 12b") | |
| cfg = purefield_config(size) | |
| dense_params = dense_param_estimate(cfg) | |
| purefield_params = purefield_param_estimate(cfg) | |
| adapter_training_feasible = vram_estimate(purefield_params)["rtx_3090_24gb_int4_or_adapter_feasible"] | |
| report = { | |
| "schema_version": "tinymind-model-preflight-v2", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "architecture": architecture, | |
| "size": size, | |
| "gpu_target": "RTX 3090 24GB", | |
| "config": cfg.__dict__, | |
| "dense_class_params": dense_params, | |
| "purefield_estimated_params": purefield_params, | |
| "dense_class_vram": vram_estimate(dense_params), | |
| "purefield_vram": vram_estimate(purefield_params), | |
| "compression_plan": { | |
| "weight_format": "int4_4x8_pairwise_sparse", | |
| "optimizer_path": "adapter_or_lora_bitsharp_tuning", | |
| "exact_memory": "ReGenesis ledger stores long context exactly; model state stays bounded.", | |
| "data_policy": "UltraPure/lineage-audited data only; block unsupported world-best claims.", | |
| }, | |
| "rtx_3090_execution": { | |
| "full_dense_adam_training_feasible": False, | |
| "adapter_or_int4_experiment_feasible": adapter_training_feasible, | |
| "recommended_batching": "microbatch=1, gradient_checkpointing=true, CPU/NVMe offload for 12B-class dense weights", | |
| }, | |
| "recommendation": ( | |
| "Use PureField/ReGenesis with gradient checkpointing, adapter/BitSharp tuning, " | |
| "small microbatches, CPU/offload if needed, and INT4 sparse export. " | |
| f"Do not attempt full dense Adam training of {size.upper()} on a single 3090." | |
| ), | |
| "world_best_claim_allowed": False, | |
| } | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| path = out / f"tinymind_{size}_preflight.json" | |
| report["report_path"] = str(path) | |
| path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| return report | |
| def build_4b_preflight(out_dir: str | Path, architecture: str = "purefield") -> dict: | |
| return build_model_preflight(out_dir, size="4b", architecture=architecture) | |
| def build_12b_preflight(out_dir: str | Path, architecture: str = "purefield") -> dict: | |
| return build_model_preflight(out_dir, size="12b", architecture=architecture) | |
Xet Storage Details
- Size:
- 4.23 kB
- Xet hash:
- a70391e55f5f4bf607c3e47781968595b30b6527ae9b6f0fb2f0e4cffef64e37
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.