{ "cfg": { "batch": 64, "context_length": 1024, "cycle": 200, "ddp_local_rank": 0, "drop_rate": 0.1, "emb_dim": 768, "lr": 0.0004, "n_heads": 12, "n_layers": 12, "num_epoch": 1, "tok_per_batch": 524288, "total_tok": 9898595200, "val_ratio": 0.1, "vocab_size": 50304, "warmup_ratio": 0.00125, "weight_decay": 0.1, "world_size": 1 }, "tied": true }