olmo-1b-finecode-5ksteps / final /hyperparameters.yaml

olmo-1b-finecode-5ksteps to hf at time 2025-11-09 04:17:36

3cb3b28 verified 3 months ago

1.22 kB

	model_name: OLMo-1B-hf
	out_dir: /home/aiops/zhuty/litgpt_out/pretrain/olmo-1b-finecode-5ksteps
	precision: bf16-mixed
	initial_checkpoint_dir: checkpoints/allenai/OLMo-1B-hf
	resume: auto
	data:
	class_path: litgpt.data.TextFiles
	init_args:
	train_data_path: /home/aiops/zhuty/cont_data/finecode/train
	val_data_path: /home/aiops/zhuty/cont_data/finecode/test
	seed: 42
	num_workers: 8
	add_eos: true
	train:
	save_interval: 1000
	save_optimizer_state: true
	max_optimizer_state: 1
	log_interval: 1
	global_batch_size: 1024
	micro_batch_size: 8
	lr_warmup_fraction: 0.01
	max_steps: 5000
	max_seq_length: 1024
	max_norm: 1.0
	min_lr: 5.0e-06
	eval:
	interval: 1000
	max_iters: 100
	initial_validation: true
	final_validation: true
	evaluate_example: first
	num_generation_examples: 1
	calculate_exact_match: false
	log:
	project: mathcont
	optimizer:
	class_path: torch.optim.AdamW
	init_args:
	lr: 5.0e-05
	weight_decay: 0.1
	betas:
	- 0.9
	- 0.95
	devices: auto
	num_nodes: 1
	tokenizer_dir: checkpoints/allenai/OLMo-1B-hf
	logger_name: wandb
	seed: 42
	compiler: torch
	executors:
	- sdpa
	- torchcompile
	- torch
	strategy: fsdp
	diffusion: false
	mask_token_id: 811
	sampling_eps: 0.001