| { | |
| "config": { | |
| "model": { | |
| "dtype": "f32", | |
| "experts_per_tok": 1, | |
| "hidden_size": 128, | |
| "max_seq_len": 64, | |
| "name": "nano-start", | |
| "num_experts": 2, | |
| "num_heads": 4, | |
| "num_layers": 4, | |
| "vocab_size": 100315 | |
| }, | |
| "trainer": { | |
| "batch_size": 4, | |
| "effective_batch_size": 8, | |
| "gradient_accumulation": 2, | |
| "learning_rate": 0.002, | |
| "max_steps": 0, | |
| "num_epochs": 20, | |
| "seq_len": 64, | |
| "total_steps": 260 | |
| } | |
| }, | |
| "dataset_size": 6379, | |
| "device": "Cuda(CudaDevice(DeviceId(1)))", | |
| "error": null, | |
| "run_dir": "./runs/20251205_144741", | |
| "status": "completed" | |
| } |