QED-75M_artifacts / configs /sft_5090.json
levossadtchi's picture
Add files using upload-large-folder tool
9847679 verified
raw
history blame contribute delete
602 Bytes
{
"seed": 42,
"dataset_path": "data/sft/processed",
"output_dir": "outputs/sft",
"checkpoint_dir": "checkpoints/sft",
"init_from": "checkpoints/pretrain_stage2/last.pt",
"resume_from": null,
"seq_len": 2048,
"micro_batch_size": 8,
"grad_accum_steps": 16,
"max_steps": 5000,
"warmup_steps": 200,
"learning_rate": 0.0005,
"min_lr": 5e-05,
"weight_decay": 0.01,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"precision": "bf16",
"num_workers": 0,
"log_interval": 10,
"eval_interval": 100,
"eval_batches": 50,
"save_interval": 200,
"compile_model": false
}