reuAC
/

reFlow

Text Generation

interpretability

mechanistic-interpretability

signal-decomposition

sparse-representations

Model card Files Files and versions

reFlow / config /base.py

reuAC's picture

Upload folder using huggingface_hub

672259a verified 21 days ago

history blame contribute delete

655 Bytes


	out_dir = 'out'
	eval_interval = 2000
	log_interval = 1
	eval_iters = 200
	eval_only = False
	always_save_checkpoint = True
	init_from = 'scratch'

	wandb_log = False
	wandb_project = 'owt'
	wandb_run_name = 'gpt2'

	dataset = 'openwebtext'
	gradient_accumulation_steps = 5 * 8
	batch_size = 12
	block_size = 1024

	n_layer = 12
	n_head = 12
	n_embd = 768
	dropout = 0.0
	bias = False

	learning_rate = 6e-4
	max_iters = 600000
	weight_decay = 1e-1
	beta1 = 0.9
	beta2 = 0.95
	grad_clip = 1.0

	decay_lr = True
	warmup_iters = 2000
	lr_decay_iters = 600000
	min_lr = 6e-5

	backend = 'nccl'

	device = 'cuda'
	dtype = 'bfloat16'
	compile = True