| { | |
| "checkpoint_id": "ckpt_20260425_135103_18000_22deff1b_9470fbb7", | |
| "created_at": "2026-04-25T13:51:04.065681", | |
| "iteration": 18000, | |
| "epoch": 0, | |
| "train_loss": 0.0, | |
| "val_loss": 0.00032569289276580094, | |
| "learning_rate": 0.00011444514640904435, | |
| "model_config": { | |
| "n_layer": 4, | |
| "n_head": 4, | |
| "n_embd": 256, | |
| "vocab_size": 50257, | |
| "block_size": 1024, | |
| "dropout": 0.1, | |
| "bias": true, | |
| "initial_connections": 0.1, | |
| "connection_growth_rate": 0.05, | |
| "max_connections": 1.0 | |
| }, | |
| "training_config": { | |
| "learning_rate": 0.0002, | |
| "batch_size": 2, | |
| "max_iters": 500, | |
| "warmup_iters": 5000, | |
| "lr_decay_iters": 50000, | |
| "min_lr": 1e-05, | |
| "weight_decay": 0.1, | |
| "grad_clip": 1.0, | |
| "enable_curriculum_learning": true, | |
| "enable_introspection": true | |
| }, | |
| "data_config": { | |
| "data_dir": "data/nanecho", | |
| "batch_size": 2, | |
| "block_size": 1024 | |
| }, | |
| "metrics": { | |
| "val_loss": 0.00032569289276580094, | |
| "connection_ratio": 1.0, | |
| "tokens_processed": 36864000, | |
| "training_speed_iters_per_sec": 0.08497924105645045 | |
| }, | |
| "tags": [ | |
| "phase_adaptive_mastery", | |
| "high_quality", | |
| "nanecho", | |
| "curriculum", | |
| "introspection" | |
| ], | |
| "parent_checkpoint": null, | |
| "notes": "Training checkpoint at iteration 18000 (resumed from iteration 17500) | Phase: adaptive_mastery", | |
| "file_size_mb": 253.3669786453247, | |
| "quality_score": 2764800.7811699593 | |
| } |