{ "d_model": 408, "nhead": 8, "num_layers": 4, "dim_feedforward": 2048, "out_dim": 512, "dropout": 0.2, "activation": "gelu", "lr0": 0.00005, "warmup_steps": 1000, "projection_hidden_dim": 1024, "max_length": 512, "batch_size": 128 }