gpt2base / config.json
nikolina-p's picture
Push model using huggingface_hub.
b68486e verified
raw
history blame contribute delete
420 Bytes
{
"cfg": {
"batch": 64,
"context_length": 1024,
"cycle": 200,
"ddp_local_rank": 0,
"drop_rate": 0.1,
"emb_dim": 768,
"lr": 0.0004,
"n_heads": 12,
"n_layers": 12,
"num_epoch": 1,
"tok_per_batch": 524288,
"total_tok": 9898595200,
"val_ratio": 0.1,
"vocab_size": 50304,
"warmup_ratio": 0.00125,
"weight_decay": 0.1,
"world_size": 1
},
"tied": true
}