{
    "d_model": 256,
    "n_heads": 8,
    "d_head": 32,
    "n_layers": 6,
    "vocab_size": 20000,
    "mode": "tanh-clipped",
    "tau": 1.5
}