| { | |
| "n_nodes": 64, | |
| "d_model": 128, | |
| "n_heads": 4, | |
| "n_layers": 4, | |
| "d_ff": 512, | |
| "dropout": 0.1, | |
| "train_iters": 8, | |
| "rope_base": 10.0, | |
| "damping": 0.85 | |
| } |
| { | |
| "n_nodes": 64, | |
| "d_model": 128, | |
| "n_heads": 4, | |
| "n_layers": 4, | |
| "d_ff": 512, | |
| "dropout": 0.1, | |
| "train_iters": 8, | |
| "rope_base": 10.0, | |
| "damping": 0.85 | |
| } |