gpt2 / config.json
allenporter's picture
Upload folder using huggingface_hub
33c0ee7 verified
raw
history blame
1.19 kB
{
"model_type": "gpt2",
"architectures": [
"GPT2LMHeadModel"
],
"n_ctx": 1024,
"block_size": 1024,
"vocab_size": 50304,
"n_layer": 12,
"n_head": 12,
"n_embd": 768,
"val_loss_accum": 3.0538008362054825,
"train_config": {
"seed": 1337,
"step": 19072,
"total_batch_size": 524288,
"micro_batch_size": 32,
"sequence_length": 1024,
"max_lr": 0.0006,
"min_lr_ratio": 0.1,
"warmup_steps": 715,
"max_steps": 19073,
"eval_steps": 250,
"checkpoint_steps": 5000,
"checkpoint_dir": "checkpoints",
"log_file": "train_2025-04-06_01-53-23.log"
},
"dataset_config": {
"dataset_dir": "dataset_cache",
"dataset_name": "finewebedu",
"micro_batch_size": 32,
"sequence_length": 1024
},
"task_specific_params": {
"eval_config": {
"validation_steps": 20,
"hellaswag_samples": 250
},
"sample_config": {
"num_return_sequences": 5,
"max_length": 30,
"text": "Hello, I'm a language model,",
"seed": 42
}
}
}