blt-reasoner-pilot1 / code /configs /exp7b_longer_sft.json
LauraGG's picture
Refresh code/ with latest BLT-Reasoner sources (post-campaign)
bc7101b verified
{
"_doc": "Longer SFT continuation from BLT 7B GRPO ckpt (52.5% AR, H1 thresholds passed). val was still descending at end of prior SFT and GRPO further consolidated. Hypothesis: another 2000 K=16 steps of SFT (same recipe — full-y InfoNCE + MLP projector) lifts absolute accuracy by 2-4 pp. K_curriculum is just [[0,16]] (no curriculum — we resume at K=16). Reset optimizer state on resume (existing limitation).",
"base_model": "Qwen/Qwen2.5-Math-7B-Instruct",
"use_lora": true,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
"dtype": "bfloat16",
"attn_impl": "eager",
"gradient_checkpointing": false,
"K_latents": 16,
"K_curriculum": [[0, 16]],
"block_y_to_x": true,
"block_z_to_x": false,
"proj_init_scale": 0.02,
"proj_mlp": true,
"proj_hidden_mult": 4,
"lambda_lm": 1.0,
"lambda_id": 1.0,
"lambda_kl": 0.0001,
"tau_infonce": 0.2,
"infonce_full_answer": true,
"infonce_target_max_len": 128,
"lr_lora": 1e-4,
"lr_proj": 5e-5,
"lr_head": 1e-4,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"warmup_steps": 50,
"batch_size": 4,
"grad_accum": 4,
"max_steps": 2000,
"max_prompt_len": 192,
"max_answer_len": 192,
"log_every": 10,
"eval_every": 0,
"eval_size": 200,
"save_every": 500,
"seed": 7,
"output_dir": "/home/ubuntu/work/blt_longer_sft",
"data_train_size": null,
"data_eval_size": 200
}