| { | |
| "layer_idx": 32, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 1, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 0.45685000762939454, | |
| "eval_mse": 0.4507866852244911 | |
| } |
| { | |
| "layer_idx": 32, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 1, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 0.45685000762939454, | |
| "eval_mse": 0.4507866852244911 | |
| } |