Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /evaluation /native_8b_target.py
| from __future__ import annotations | |
| from dataclasses import asdict, dataclass | |
| from datetime import datetime, timezone | |
| import json | |
| import math | |
| from pathlib import Path | |
| from typing import Any | |
| class Native8BProfile: | |
| name: str | |
| layers: int | |
| dim: int | |
| lanes: int | |
| seq_len: int | |
| vocab_size: int | |
| virtual_dim: int | |
| basis_rank: int | |
| facets: int | |
| train_batch_size: int | |
| learning_rate: float | |
| max_steps: int | |
| tier: str | |
| purpose: str | |
| def _estimate_axiom_regenesis_params(profile: Native8BProfile) -> dict[str, Any]: | |
| d = profile.dim | |
| layers = profile.layers | |
| vocab = profile.vocab_size | |
| lanes = profile.lanes | |
| memory_slots = max(4, min(16, lanes)) | |
| memory_rank = max(8, min(64, d // 4)) | |
| regen_rank = 4 | |
| regen_top_k = 4 | |
| top_level = (vocab * d) + (profile.seq_len * d) + (d * vocab) + (d * 4) | |
| axiom = (2 * d * profile.basis_rank) + (profile.facets * profile.basis_rank) | |
| local_exact = 4 * d * d | |
| memory = ( | |
| 2 * (d * memory_slots * memory_rank + memory_slots * memory_rank) | |
| + (d * memory_slots + memory_slots) | |
| + (memory_rank * d) | |
| ) | |
| router = (d * lanes + lanes) + (lanes * d) | |
| regen = ( | |
| vocab * d | |
| + 2 * d | |
| + 2 * (d * regen_rank * d) | |
| + (2 * d * d) | |
| + (d * d + d) | |
| ) | |
| ffn = (d * (2 * d) + (2 * d)) + ((2 * d) * d + d) | |
| norms = 3 * d | |
| per_layer = axiom + local_exact + memory + router + regen + ffn + norms | |
| self_assess = (d * (2 * d) + (2 * d)) + ((2 * d) * d + d) + (d * 4 + 4) | |
| total = top_level + layers * per_layer + self_assess | |
| int4_gib = total * 0.5 / (1024**3) | |
| bf16_gib = total * 2 / (1024**3) | |
| return { | |
| "estimated_parameters": int(total), | |
| "estimated_parameters_b": total / 1_000_000_000, | |
| "per_layer_parameters": int(per_layer), | |
| "memory_slots": memory_slots, | |
| "memory_rank": memory_rank, | |
| "regen_top_k": regen_top_k, | |
| "regen_rank": regen_rank, | |
| "estimated_weight_gib": { | |
| "int4": int4_gib, | |
| "bf16": bf16_gib, | |
| }, | |
| "training_feasibility": { | |
| "rtx_3090_full_train": False, | |
| "rtx_3090_inference_int4_possible_after_runtime_export": int4_gib < 18.0, | |
| "remote_gpu_training_required": total > 1_000_000_000, | |
| }, | |
| } | |
| def _profiles() -> list[Native8BProfile]: | |
| return [ | |
| Native8BProfile( | |
| name="local_bridge_3090_proven", | |
| layers=24, | |
| dim=192, | |
| lanes=12, | |
| seq_len=192, | |
| vocab_size=512, | |
| virtual_dim=32_768, | |
| basis_rank=48, | |
| facets=12, | |
| train_batch_size=4, | |
| learning_rate=5e-5, | |
| max_steps=1_800, | |
| tier="local", | |
| purpose="Continue the proven 25.96M native checkpoint until raw language stops breaking.", | |
| ), | |
| Native8BProfile( | |
| name="local_ceiling_3090", | |
| layers=36, | |
| dim=512, | |
| lanes=32, | |
| seq_len=384, | |
| vocab_size=512, | |
| virtual_dim=262_144, | |
| basis_rank=96, | |
| facets=24, | |
| train_batch_size=1, | |
| learning_rate=3e-5, | |
| max_steps=800, | |
| tier="local_ceiling", | |
| purpose="Find the largest local native capacity that still trains on a 24GB 3090 without fake claims.", | |
| ), | |
| Native8BProfile( | |
| name="axiom_regenesis_8b_target", | |
| layers=48, | |
| dim=2816, | |
| lanes=64, | |
| seq_len=1024, | |
| vocab_size=4096, | |
| virtual_dim=1_048_576, | |
| basis_rank=256, | |
| facets=64, | |
| train_batch_size=1, | |
| learning_rate=1.2e-5, | |
| max_steps=50_000, | |
| tier="remote_required", | |
| purpose="Actual 8B-class TinyMind-native target. Train on Colab/HF multi-GPU before any quality claim.", | |
| ), | |
| ] | |
| def _train_command(profile: Native8BProfile, dataset: str, out_dir: str) -> str: | |
| return ( | |
| "python -m train.cli native-axiom-regenesis-train " | |
| f"--dataset {dataset} " | |
| f"--out-dir {out_dir}/{profile.name} " | |
| f"--max-steps {profile.max_steps} --eval-records 256 --limit-records 10000 " | |
| f"--dim {profile.dim} --layers {profile.layers} --lanes {profile.lanes} " | |
| f"--seq-len {profile.seq_len} --vocab-size {profile.vocab_size} --tokenizer-mode char_v1 " | |
| f"--virtual-dim {profile.virtual_dim} --basis-rank {profile.basis_rank} --facets {profile.facets} " | |
| f"--learning-rate {profile.learning_rate} --train-batch-size {profile.train_batch_size} --device cuda" | |
| ) | |
| def build_native_8b_target_report( | |
| out_dir: str | Path, | |
| *, | |
| dataset: str = "reports/omni_round_curriculum_xl_latest/omni_round_curriculum.jsonl", | |
| ) -> dict[str, Any]: | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| profiles = _profiles() | |
| enriched = [] | |
| for profile in profiles: | |
| estimate = _estimate_axiom_regenesis_params(profile) | |
| payload = asdict(profile) | |
| payload["estimate"] = estimate | |
| payload["train_command"] = _train_command(profile, dataset, "reports/native_8b_target_runs") | |
| enriched.append(payload) | |
| target = next(item for item in enriched if item["name"] == "axiom_regenesis_8b_target") | |
| target_config_path = out / "axiom_regenesis_8b_target_config.json" | |
| target_config_path.write_text(json.dumps(target, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") | |
| commands_path = out / "native_8b_training_commands.ps1" | |
| commands_path.write_text( | |
| "\n".join( | |
| [ | |
| "# TinyMind native 8B target commands. Run local profiles first; remote_required needs Colab/HF GPU.", | |
| *(item["train_command"] for item in enriched), | |
| "", | |
| ] | |
| ), | |
| encoding="utf-8", | |
| ) | |
| target_params = target["estimate"]["estimated_parameters_b"] | |
| report = { | |
| "schema": "tinymind.native_8b_target.v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "dataset": dataset, | |
| "strategy": { | |
| "not_data_only": True, | |
| "capacity_expansion": "24x192 proven -> 36x512 local ceiling -> 48x3456 8B remote target", | |
| "decoder_objective": "repeat unlikelihood + entropy floor + constrained deterministic generation", | |
| "memory": "ReGenesis/Evidence Ledger path keeps historical KV from scaling with context", | |
| "teacher_use": "Codex-like behavior can be distilled from operational traces; no Codex weights are copied.", | |
| }, | |
| "profiles": enriched, | |
| "artifacts": { | |
| "target_config_path": str(target_config_path), | |
| "commands_path": str(commands_path), | |
| }, | |
| "claim_gate": { | |
| "native_8b_target_config_created": True, | |
| "estimated_8b_class": bool(7.5 <= target_params <= 8.8), | |
| "actual_8b_checkpoint_exists": False, | |
| "rtx3090_full_train_claim_allowed": False, | |
| "quality_above_larger_models_claim_allowed": False, | |
| "leaderboard_safe_claim_allowed": False, | |
| "world_best_claim_allowed": False, | |
| "reason": "This creates the real 8B target path and commands. Quality claims require completed remote training plus external/raw benchmark evidence.", | |
| }, | |
| } | |
| path = out / "native_8b_target_report.json" | |
| report["json_path"] = str(path) | |
| path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") | |
| return report | |
Xet Storage Details
- Size:
- 7.67 kB
- Xet hash:
- 021933b07423e5a873c99bbb3c0ad30f988d10079574ef4d65772360dd3f3dfb
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.