File size: 670 Bytes
e2bfccc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
{
  "purpose": "200m_branch_only_pure_ssm_4b_pretrain_plus_sft_chatbot_attempt",
  "candidate": "pure_ssm_196m_branch_rms_only",
  "pretrain_token_positions": 4000000000,
  "pretrain_steps": 976563,
  "batch_size": 8,
  "seq_len": 512,
  "sft_steps": 50000,
  "pretrain_lr": 0.0008,
  "sft_lr": 0.00005,
  "save_every_steps": 100000,
  "block_residual_rms_cap": "null",
  "notes": [
    "Uses streaming JSONL training through TaoTrain CLI.",
    "Counts token positions as batch_size * seq_len * max_steps.",
    "Keeps SSM branch RMS normalization enabled and block residual RMS normalization disabled.",
    "Runs corrected response-only SFT after pretraining."
  ]
}