| { | |
| "vocab_size": 256, | |
| "d_model": 384, | |
| "n_layers": 6, | |
| "dropout": 0.1, | |
| "max_len": 512, | |
| "lr": 0.0003, | |
| "warmup": 500, | |
| "batch": 64, | |
| "block": 512, | |
| "steps": 2000000, | |
| "world_size": 2, | |
| "step": 500 | |
| } |
| { | |
| "vocab_size": 256, | |
| "d_model": 384, | |
| "n_layers": 6, | |
| "dropout": 0.1, | |
| "max_len": 512, | |
| "lr": 0.0003, | |
| "warmup": 500, | |
| "batch": 64, | |
| "block": 512, | |
| "steps": 2000000, | |
| "world_size": 2, | |
| "step": 500 | |
| } |