{ "d_model": 512, "nhead": 8, "num_encoder_layers": 3, "num_decoder_layers": 3, "dim_feedforward": 2048, "dropout": 0.1, "max_seq_length": 128, "batch_size": 16, "epochs": 50, "learning_rate": 0.0001, "warmup_steps": 4000, "max_grad_norm": 1.0, "early_stopping_patience": 5, "checkpoint_interval": 5 }