| { | |
| "layer_idx": 24, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 8, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 4.520931562042236, | |
| "eval_mse": 4.617096263317011 | |
| } |
| { | |
| "layer_idx": 24, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 8, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 4.520931562042236, | |
| "eval_mse": 4.617096263317011 | |
| } |