| { | |
| "layer_idx": 20, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 12, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 3.406930178833008, | |
| "eval_mse": 3.4354089314601683 | |
| } |
| { | |
| "layer_idx": 20, | |
| "model_dim": 4096, | |
| "probe_dim": 64, | |
| "num_layers": 12, | |
| "num_heads": 4, | |
| "ffn_multiplier": 4, | |
| "dropout": 0.0, | |
| "train_mse": 3.406930178833008, | |
| "eval_mse": 3.4354089314601683 | |
| } |