| { | |
| "cfg": { | |
| "batch": 64, | |
| "context_length": 1024, | |
| "cycle": 200, | |
| "ddp_local_rank": 0, | |
| "drop_rate": 0.1, | |
| "emb_dim": 768, | |
| "lr": 0.0004, | |
| "n_heads": 12, | |
| "n_layers": 12, | |
| "num_epoch": 1, | |
| "tok_per_batch": 524288, | |
| "total_tok": 9898595200, | |
| "val_ratio": 0.1, | |
| "vocab_size": 50304, | |
| "warmup_ratio": 0.00125, | |
| "weight_decay": 0.1, | |
| "world_size": 1 | |
| }, | |
| "tied": true | |
| } |