proweb-checkpoints to hf at time 2026-01-01 10:05:32

ce5bad4 verified about 2 months ago

1.32 kB

	model_name: Llama-3.2-1B
	out_dir: /home/aiops/zhuty/litgpt_out/pretrain/llama3-1b-proweb-50ksteps-diffusion
	precision: bf16-mixed
	initial_checkpoint_dir: checkpoints/meta-llama/Llama-3.2-1B
	resume: auto
	data:
	class_path: litgpt.data.TextFiles
	init_args:
	train_data_path: /home/aiops/zhuty/cont_data/proweb/train
	val_data_path: /home/aiops/zhuty/cont_data/proweb/test
	seed: 42
	num_workers: 0
	add_eos: true
	train:
	save_interval: 2500
	save_optimizer_state: true
	max_optimizer_state: 1
	log_interval: 1
	global_batch_size: 1024
	micro_batch_size: 8
	lr_warmup_fraction: 0.01
	max_steps: 50000
	max_seq_length: 1024
	max_norm: 1.0
	min_lr: 5.0e-06
	eval:
	interval: 1000
	max_iters: 100
	initial_validation: true
	final_validation: true
	evaluate_example: first
	num_generation_examples: 1
	calculate_exact_match: false
	log:
	project: mathcont
	optimizer:
	class_path: torch.optim.AdamW
	init_args:
	lr: 5.0e-05
	weight_decay: 0.1
	betas:
	- 0.9
	- 0.95
	devices: auto
	num_nodes: 1
	tokenizer_dir: checkpoints/meta-llama/Llama-3.2-1B
	logger_name: wandb
	seed: 42
	compiler: torch
	executors:
	- sdpa
	- torchcompile
	- torch
	strategy: fsdp
	diffusion: true
	mask_token_id: 811
	sampling_eps: 0.001
	intradoc: false
	block_diffusion: false
	block_size: 8
	timestep_sampler: uniform