| { | |
| "d_model": 512, | |
| "nhead": 8, | |
| "num_encoder_layers": 3, | |
| "num_decoder_layers": 3, | |
| "dim_feedforward": 2048, | |
| "dropout": 0.1, | |
| "max_seq_length": 128, | |
| "batch_size": 16, | |
| "epochs": 50, | |
| "learning_rate": 0.0001, | |
| "warmup_steps": 4000, | |
| "max_grad_norm": 1.0, | |
| "early_stopping_patience": 5, | |
| "checkpoint_interval": 5 | |
| } |