allenporter
/

gpt2

Model card Files Files and versions

gpt2 / config.json

allenporter's picture

Upload folder using huggingface_hub

33c0ee7 verified 9 months ago

1.19 kB

	{
	"model_type": "gpt2",
	"architectures": [
	"GPT2LMHeadModel"
	],
	"n_ctx": 1024,
	"block_size": 1024,
	"vocab_size": 50304,
	"n_layer": 12,
	"n_head": 12,
	"n_embd": 768,
	"val_loss_accum": 3.0538008362054825,
	"train_config": {
	"seed": 1337,
	"step": 19072,
	"total_batch_size": 524288,
	"micro_batch_size": 32,
	"sequence_length": 1024,
	"max_lr": 0.0006,
	"min_lr_ratio": 0.1,
	"warmup_steps": 715,
	"max_steps": 19073,
	"eval_steps": 250,
	"checkpoint_steps": 5000,
	"checkpoint_dir": "checkpoints",
	"log_file": "train_2025-04-06_01-53-23.log"
	},
	"dataset_config": {
	"dataset_dir": "dataset_cache",
	"dataset_name": "finewebedu",
	"micro_batch_size": 32,
	"sequence_length": 1024
	},
	"task_specific_params": {
	"eval_config": {
	"validation_steps": 20,
	"hellaswag_samples": 250
	},
	"sample_config": {
	"num_return_sequences": 5,
	"max_length": 30,
	"text": "Hello, I'm a language model,",
	"seed": 42
	}
	}
	}