{ "purpose": "200m_branch_only_pure_ssm_4b_pretrain_plus_sft_chatbot_attempt", "candidate": "pure_ssm_196m_branch_rms_only", "pretrain_token_positions": 4000000000, "pretrain_steps": 976563, "batch_size": 8, "seq_len": 512, "sft_steps": 50000, "pretrain_lr": 0.0008, "sft_lr": 0.00005, "save_every_steps": 100000, "block_residual_rms_cap": "null", "notes": [ "Uses streaming JSONL training through TaoTrain CLI.", "Counts token positions as batch_size * seq_len * max_steps.", "Keeps SSM branch RMS normalization enabled and block residual RMS normalization disabled.", "Runs corrected response-only SFT after pretraining." ] }