blt-reasoner-pilot1 / code /configs /control_no_infonce.json

Refresh code/ with latest BLT-Reasoner sources (post-campaign)

bc7101b verified 12 days ago

1.35 kB

	{
	"_doc": "Control A: identical to pilot config BUT lambda_id=0 (no InfoNCE identifiability loss). Tests whether InfoNCE is the load-bearing piece that makes z informative. Hypothesis: without InfoNCE, z collapses to decorative (like Abstract-CoT). 3000 K=4 steps, ~1.6h on GH200. Compare resulting Delta_random/Delta_zero to pilot's step-2000 K=4 result (Delta=3pp at same compute scale) and step-6000 K=8 result (Delta=11pp peak).",

	"base_model": "Qwen/Qwen2.5-1.5B-Instruct",
	"use_lora": true,
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"lora_target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
	"dtype": "bfloat16",
	"attn_impl": "eager",

	"K_latents": 4,
	"K_curriculum": [[0, 4]],
	"block_y_to_x": true,
	"proj_init_scale": 0.02,

	"lambda_lm": 1.0,
	"lambda_id": 0.0,
	"lambda_kl": 0.0001,
	"tau_infonce": 0.2,
	"infonce_target": "final_number",

	"lr_lora": 3e-4,
	"lr_proj": 1e-4,
	"lr_head": 3e-4,
	"weight_decay": 0.01,
	"max_grad_norm": 1.0,
	"warmup_steps": 100,

	"batch_size": 16,
	"grad_accum": 2,
	"max_steps": 3000,
	"max_prompt_len": 192,
	"max_answer_len": 192,

	"log_every": 25,
	"eval_every": 500,
	"eval_size": 200,
	"save_every": 3000,
	"seed": 42,

	"output_dir": "/home/ubuntu/work/blt_control_no_infonce",
	"data_train_size": null,
	"data_eval_size": 200
	}