reuAC
/

reFlow

Text Generation

interpretability

mechanistic-interpretability

signal-decomposition

sparse-representations

Model card Files Files and versions

reFlow / config /train_gpt2.py

reuAC's picture

Upload folder using huggingface_hub

672259a verified about 1 month ago

history blame contribute delete

728 Bytes

	model_config = 'gpt2'

	log_file = 'logs/gpt2.log'

	out_dir = 'out/gpt2'
	eval_interval = 500
	log_interval = 1
	eval_iters = 500
	eval_only = False
	always_save_checkpoint = False
	init_from = 'scratch'

	wandb_log = False
	wandb_project = 'owt'
	wandb_run_name = 'gpt2'

	dataset = 'openwebtext'
	gradient_accumulation_steps = 64
	batch_size = 1
	block_size = 1024

	n_layer = 36
	n_head = 16
	n_embd = 1024
	vocab_size = 50304
	dropout = 0.0
	bias = False

	learning_rate = 3e-4
	max_iters = 50000
	weight_decay = 1e-1
	beta1 = 0.9
	beta2 = 0.95
	grad_clip = 1.0

	decay_lr = True
	warmup_iters = 1000
	lr_decay_iters = 50000
	min_lr = 3e-5

	backend = 'nccl'

	device = 'cuda'
	dtype = 'float16'
	compile = True