{ "created_at": "2026-06-04T17:08:36.925216+00:00", "description": "Final checkpoints for genome-only 6x8 scaling grid.", "selection_policy": "indices 0-2 from nocompile sweep; indices 3-47 from compile100m sweep; old extra nocompile p001m_t100m excluded.", "checkpoint_count": 48, "records": [ { "index": 0, "run_name": "scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t010m_muon_seed0", "size_label": "001m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_nocompile.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t010m_muon_seed0/model_final.safetensors", "bytes": 3751152, "sha256": "e6f1bdc41bbf371648399416cee5b3af51970b94e310b8db385701611c5516a4" }, { "index": 1, "run_name": "scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t020m_muon_seed0", "size_label": "001m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_nocompile.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t020m_muon_seed0/model_final.safetensors", "bytes": 3751152, "sha256": "318eb65e3bcf93de2ceac8609b4006e82b8c44fb1e8a84847facb86ec6a21cb7" }, { "index": 2, "run_name": "scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t050m_muon_seed0", "size_label": "001m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_nocompile.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_nocompile_genome_p001m_t050m_muon_seed0/model_final.safetensors", "bytes": 3751152, "sha256": "777df93e7d47ef8e52b8780c8f4a7f70e815cad6b131e7ea89d28159a97dc8e6" }, { "index": 3, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t100m_muon_seed0", "size_label": "001m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t100m_muon_seed0/model_final.safetensors", "bytes": 3751840, "sha256": "4ba89b6058cc77a8e91a523ddb9d7566fbe3793f5c6114a6d06ea9991edf6fab" }, { "index": 4, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t200m_muon_seed0", "size_label": "001m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t200m_muon_seed0/model_final.safetensors", "bytes": 3751840, "sha256": "f1032712395ef248e7eb93b6d55bc42a34f4be5bae9d81bc0d6a66a4609e1002" }, { "index": 5, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t500m_muon_seed0", "size_label": "001m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t500m_muon_seed0/model_final.safetensors", "bytes": 3751840, "sha256": "4dbc199694e213bef18ac5630dcc8b3203c86fdfaee07e74cb6d586706f60004" }, { "index": 6, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t001b_muon_seed0", "size_label": "001m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t001b_muon_seed0/model_final.safetensors", "bytes": 3751840, "sha256": "7ebd586dc3613d2aa7f899137ddae9943b487164cf964b068ef1ce73c76940bc" }, { "index": 7, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t002b_muon_seed0", "size_label": "001m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 6, "model_dim": 96, "attention_heads": 4, "head_dim": 24, "estimated_params": 936000, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p001m_t002b_muon_seed0/model_final.safetensors", "bytes": 3751840, "sha256": "8c2c3a9ca6464c940e70c7a2578519c9e47b73cda0b79a9ebcdc1a92df2623e3" }, { "index": 8, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t010m_muon_seed0", "size_label": "002m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t010m_muon_seed0/model_final.safetensors", "bytes": 8277464, "sha256": "74b3128c478f3a1fbbc3c1ec9fdf663e453446b609fbb5b36d2912bdab4cff8c" }, { "index": 9, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t020m_muon_seed0", "size_label": "002m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t020m_muon_seed0/model_final.safetensors", "bytes": 8277464, "sha256": "a28bd97fa8d3bf40bc5eb6e742d980afd9cec48cab5083b4128715646095eee6" }, { "index": 10, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t050m_muon_seed0", "size_label": "002m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t050m_muon_seed0/model_final.safetensors", "bytes": 8277464, "sha256": "4c786366c5a4ca04345a0fb7d0a5412fdee2784dcbb466add12b023440a8fb6e" }, { "index": 11, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t100m_muon_seed0", "size_label": "002m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t100m_muon_seed0/model_final.safetensors", "bytes": 8278152, "sha256": "17b1e854ccdc9f450e2ddb266155d72fb42d0a9d45cd0ddd41cd0828a4707f28" }, { "index": 12, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t200m_muon_seed0", "size_label": "002m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t200m_muon_seed0/model_final.safetensors", "bytes": 8278152, "sha256": "d8238564586d43dde20fbed5911ed9675bd3c8a91931ceda5c623f8d4430ecda" }, { "index": 13, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t500m_muon_seed0", "size_label": "002m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t500m_muon_seed0/model_final.safetensors", "bytes": 8278152, "sha256": "e2ea654fb47d63bbfaa9f3bb2e884a4705733db89cce5e54acafd49c6f08d6bd" }, { "index": 14, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t001b_muon_seed0", "size_label": "002m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t001b_muon_seed0/model_final.safetensors", "bytes": 8278152, "sha256": "6710316b4e245789fa7ac2aef43261e812d048f0011acd4ded3be24814328781" }, { "index": 15, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t002b_muon_seed0", "size_label": "002m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 6, "model_dim": 144, "attention_heads": 4, "head_dim": 36, "estimated_params": 2067552, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p002m_t002b_muon_seed0/model_final.safetensors", "bytes": 8278152, "sha256": "e88ddc23af4e836d4fa208e9e6c743f6ca312bc535beb9d7fdc17ca272a189bb" }, { "index": 16, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t010m_muon_seed0", "size_label": "005m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t010m_muon_seed0/model_final.safetensors", "bytes": 19753376, "sha256": "0165d8c5d319e5362f1956718967b42f52a10285823b809480decba192d988e9" }, { "index": 17, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t020m_muon_seed0", "size_label": "005m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t020m_muon_seed0/model_final.safetensors", "bytes": 19753376, "sha256": "a4accb6e40f2f43f3a99bbcf20274339b72e168e3e84dcc5f98731fa8576bc05" }, { "index": 18, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t050m_muon_seed0", "size_label": "005m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t050m_muon_seed0/model_final.safetensors", "bytes": 19753376, "sha256": "4ff37682895eaf385394550c271f44738f5a20b911b774f938d6fecc3129dc5f" }, { "index": 19, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t100m_muon_seed0", "size_label": "005m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t100m_muon_seed0/model_final.safetensors", "bytes": 19754064, "sha256": "1758c5f6bb7c4c42cbc5a71f7a02321e6c681294ca86ab01a1ed978acd5c5314" }, { "index": 20, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t200m_muon_seed0", "size_label": "005m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t200m_muon_seed0/model_final.safetensors", "bytes": 19754064, "sha256": "e3691d8b0fcd3f23ed67ac060c5c0578ea125248f3565c56abfd4c3ed11dd4fb" }, { "index": 21, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t500m_muon_seed0", "size_label": "005m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t500m_muon_seed0/model_final.safetensors", "bytes": 19754064, "sha256": "12e7ad96ed63b3264978441f07ec940f1de1476c9de09c4b056c6e85c63733bb" }, { "index": 22, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t001b_muon_seed0", "size_label": "005m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t001b_muon_seed0/model_final.safetensors", "bytes": 19754064, "sha256": "8a57625020492be1de2b40cdde95c047991324d3f7c3c0a57175c814f9882332" }, { "index": 23, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t002b_muon_seed0", "size_label": "005m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 6, "model_dim": 224, "attention_heads": 4, "head_dim": 56, "estimated_params": 4936512, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p005m_t002b_muon_seed0/model_final.safetensors", "bytes": 19754064, "sha256": "3af3401fc1eb2e3536c2809c65a747643705a212f61ae795d4377c779e584001" }, { "index": 24, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t010m_muon_seed0", "size_label": "010m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t010m_muon_seed0/model_final.safetensors", "bytes": 40010584, "sha256": "16854f8126f4e6b1ce2fc084f61ad8ba9497566b262b850d497af4c9447779b0" }, { "index": 25, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t020m_muon_seed0", "size_label": "010m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t020m_muon_seed0/model_final.safetensors", "bytes": 40010584, "sha256": "038f8f4312076a10572218719da86f74b2b530f2b196f85f783aa5771c9ed4d3" }, { "index": 26, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t050m_muon_seed0", "size_label": "010m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t050m_muon_seed0/model_final.safetensors", "bytes": 40010584, "sha256": "4e2d9752bbd6a90836feac5ee981826f70b759637329d9b86c2f5ecaeb3f96b8" }, { "index": 27, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t100m_muon_seed0", "size_label": "010m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t100m_muon_seed0/model_final.safetensors", "bytes": 40011272, "sha256": "eb14647e31ead71720fd9945463d158c1f4ddc74fc72846b965594a69183d373" }, { "index": 28, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t200m_muon_seed0", "size_label": "010m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t200m_muon_seed0/model_final.safetensors", "bytes": 40011272, "sha256": "5894282f53a7b6d4aa5c9b3e4a95b7c679e1ee72796d9a0ce934b4837d9544a9" }, { "index": 29, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t500m_muon_seed0", "size_label": "010m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t500m_muon_seed0/model_final.safetensors", "bytes": 40011272, "sha256": "71f26b77db4dc18634a9381a5f3de5457fc52d655449171d919ec74283d5d3ac" }, { "index": 30, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t001b_muon_seed0", "size_label": "010m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t001b_muon_seed0/model_final.safetensors", "bytes": 40011272, "sha256": "ef20efca1e79bcd006100ecd9a1ee0e8f5263ed9a05bcac2e24867a75c014ba0" }, { "index": 31, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t002b_muon_seed0", "size_label": "010m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 6, "model_dim": 320, "attention_heads": 8, "head_dim": 40, "estimated_params": 10000800, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p010m_t002b_muon_seed0/model_final.safetensors", "bytes": 40011272, "sha256": "bd2a4a56edef7dbf3194c9c20cf1500fde5607bc795de58489683e5385babd60" }, { "index": 32, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t010m_muon_seed0", "size_label": "020m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t010m_muon_seed0/model_final.safetensors", "bytes": 78031976, "sha256": "a6e3e8ab52884d7be6c74a9ce2498cb750d97155f5d60e7123f8d0f41cffde05" }, { "index": 33, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t020m_muon_seed0", "size_label": "020m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t020m_muon_seed0/model_final.safetensors", "bytes": 78031976, "sha256": "e164d42e2d0f69038cbfd700395360a7f899bbafa29c626742781913198c9124" }, { "index": 34, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t050m_muon_seed0", "size_label": "020m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t050m_muon_seed0/model_final.safetensors", "bytes": 78031976, "sha256": "1491c87e209e261b744801acc28b8e4153b4b18627026b866e67e80ca36ac751" }, { "index": 35, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t100m_muon_seed0", "size_label": "020m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t100m_muon_seed0/model_final.safetensors", "bytes": 78032672, "sha256": "4796cf04484a72b674de8cbb207a9c37620a1a40c2542a06f3df1e90b4d51292" }, { "index": 36, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t200m_muon_seed0", "size_label": "020m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t200m_muon_seed0/model_final.safetensors", "bytes": 78032672, "sha256": "996cdf4521a9c815b21b108ff8fb5620a06a824bb8718041d74ea5dc8e61ad7e" }, { "index": 37, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t500m_muon_seed0", "size_label": "020m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t500m_muon_seed0/model_final.safetensors", "bytes": 78032672, "sha256": "a15607560fb9b8ec018134ed01a3eee85968e27c032b3f02eb79d1713cb82c40" }, { "index": 38, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t001b_muon_seed0", "size_label": "020m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t001b_muon_seed0/model_final.safetensors", "bytes": 78032672, "sha256": "c5a381388dbc6ea8bbd3f3d80a575ff870f085521a4ddbf04cef1b51bc6debb1" }, { "index": 39, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t002b_muon_seed0", "size_label": "020m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 6, "model_dim": 448, "attention_heads": 8, "head_dim": 56, "estimated_params": 19506144, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p020m_t002b_muon_seed0/model_final.safetensors", "bytes": 78032672, "sha256": "d4f23c6658cdb7ab155c831540a8b495248232d86cae11b6427e444f4f34637b" }, { "index": 40, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t010m_muon_seed0", "size_label": "050m", "token_label": "010m", "target_tokens": 10000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t010m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t010m_muon_seed0/model_final.safetensors", "bytes": 202459512, "sha256": "58e53840c7dc24d54d9e15469f97d055c67fba46dcf3b39ebc42d9f3b60d8b6c" }, { "index": 41, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t020m_muon_seed0", "size_label": "050m", "token_label": "020m", "target_tokens": 20000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t020m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t020m_muon_seed0/model_final.safetensors", "bytes": 202459512, "sha256": "63c891a00cc4f8adfe3ceb57cbab822469384678b0741476ec34453e8117d8d8" }, { "index": 42, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t050m_muon_seed0", "size_label": "050m", "token_label": "050m", "target_tokens": 50000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "0", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t050m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t050m_muon_seed0/model_final.safetensors", "bytes": 202459512, "sha256": "bfe1dc98c0aff02936dc736176319b73c25cde1ddb7db3eac81adf1d56e988dd" }, { "index": 43, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t100m_muon_seed0", "size_label": "050m", "token_label": "100m", "target_tokens": 100000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t100m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t100m_muon_seed0/model_final.safetensors", "bytes": 202460864, "sha256": "d9aed03f1ddbd42e499953e36a62c63e3087216ebced20a317691177c21dd0cf" }, { "index": 44, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t200m_muon_seed0", "size_label": "050m", "token_label": "200m", "target_tokens": 200000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t200m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t200m_muon_seed0/model_final.safetensors", "bytes": 202460864, "sha256": "2e74ecbd706c3301fcc62401cf73396b5d7bd047d2b13229587e7be97034f2d3" }, { "index": 45, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t500m_muon_seed0", "size_label": "050m", "token_label": "500m", "target_tokens": 500000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t500m_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t500m_muon_seed0/model_final.safetensors", "bytes": 202460864, "sha256": "2564ad42442660785178f3f3416c72b9a734f1a41b6a8d4d50f54ff1e342b773" }, { "index": 46, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t001b_muon_seed0", "size_label": "050m", "token_label": "001b", "target_tokens": 1000000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t001b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t001b_muon_seed0/model_final.safetensors", "bytes": 202460864, "sha256": "e5a95d5b1435e20c0057afe258cedc5bd3f39845b8b3d1c256506ae3d17c2898" }, { "index": 47, "run_name": "scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t002b_muon_seed0", "size_label": "050m", "token_label": "002b", "target_tokens": 2000000000, "model_layers": 12, "model_dim": 512, "attention_heads": 8, "head_dim": 64, "estimated_params": 50611200, "torch_compile": "1", "source_manifest": "sweep_manifests/scaling_grid_torch_genome_20260603_compile100m.json", "source_checkpoint": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t002b_muon_seed0/model_final.safetensors", "repo_path": "checkpoints/scaling_grid_torch_genome_20260603_compile100m_genome_p050m_t002b_muon_seed0/model_final.safetensors", "bytes": 202460864, "sha256": "aa07087e9e9f8b155600d7c56a95107c82be7c38c640060bc5f240d1e156e217" } ] }