tinyllama-megamath-1m-debug / training_config.json
Onlydrinkwater's picture
Final model at step 3
9908609 verified
{
"model_name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
"tokenizer_name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
"dataset_config": "config/data_megamath.json",
"seq_len": 2048,
"total_tokens": 1000000,
"per_device_batch_size": 4,
"gradient_accumulation_steps": 4,
"learning_rate": 8e-05,
"min_learning_rate": 8e-06,
"weight_decay": 0.1,
"grad_clip": 1.0,
"warmup_steps": 0,
"lambda_hi": 0.3,
"p_inject": 0.1,
"max_injections_per_seq": 16,
"p_inject_warmup_steps": 10,
"lambda_hi_warmup_steps": 10,
"normalize_hidden": true,
"alpha_scale": 1.0,
"output_dir": "outputs/tinyllama_megamath_1m_debug",
"log_every_steps": 5,
"wandb_project": "megamath-debug",
"wandb_run_name": "tinyllama-1m-debug",
"hf_repo_id": "Onlydrinkwater/tinyllama-megamath-1m-debug",
"hf_upload": true,
"hf_private": false,
"seed": 42,
"bf16": true,
"dataloader_num_workers": 2
}