TaoNet-mini-T2 / artifacts /run_plan.json
StarMist0012's picture
Add files using upload-large-folder tool
e2bfccc verified
{
"purpose": "200m_branch_only_pure_ssm_4b_pretrain_plus_sft_chatbot_attempt",
"candidate": "pure_ssm_196m_branch_rms_only",
"pretrain_token_positions": 4000000000,
"pretrain_steps": 976563,
"batch_size": 8,
"seq_len": 512,
"sft_steps": 50000,
"pretrain_lr": 0.0008,
"sft_lr": 0.00005,
"save_every_steps": 100000,
"block_residual_rms_cap": "null",
"notes": [
"Uses streaming JSONL training through TaoTrain CLI.",
"Counts token positions as batch_size * seq_len * max_steps.",
"Keeps SSM branch RMS normalization enabled and block residual RMS normalization disabled.",
"Runs corrected response-only SFT after pretraining."
]
}