| { | |
| "base_model": "/content/Qwen3-0.6B", | |
| "loop_window_size": 64, | |
| "num_layers": 28, | |
| "num_heads": 16, | |
| "head_dim": 128, | |
| "final_val_loss": 3.6202090362707775, | |
| "final_val_ppl": 37.34537124633789, | |
| "training_epochs": 3, | |
| "training_time_minutes": 38.990576179822284 | |
| } |