File size: 868 Bytes
9bf70c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | {
"architecture": {
"n_embd": 512,
"n_layer": 12,
"embed_dim": 512,
"block_size": 256,
"vocab_size": 499
},
"training": {
"batch_size": 2,
"max_iters": 1000000,
"learning_rate": 6e-4,
"min_lr": 6e-5,
"warmup_iters": 20000,
"weight_decay": 0.1,
"grad_clip": 1
},
"evaluation": {
"eval_interval": 25000,
"eval_iters": 100,
"save_interval": 25000,
"patience": 15
},
"lr_schedule": {
"lr_cycle_length": 25000,
"lr_cycle_warmup": 10000,
"lr_decay_rate": 0.95
},
"scanner": {
"scanner_clamp": 70.0
},
"paths": {
"checkpoint_path": "checkpoints/colm_checkpoint_big.pt",
"best_checkpoint_path": "checkpoints/colm_best_big.pt",
"tokenizer_path": "colm_tokenizer.json",
"dataset_path": "datasets/DCDM_big_dataset.txt"
}
} |