{ "activation": "gelu", "arch": "encoder_decoder", "dropout": 0.1, "ff_size": 1024, "latent_dim": [ 1, 256 ], "mlp_dist": false, "nfeats": 263, "normalize_before": false, "num_heads": 4, "num_layers": 9, "pe_type": "mld", "position_embedding": "learned" }