File size: 209 Bytes
8ccc261
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
{
  "backbone_arch": "Qwen/Qwen2.5-Math-7B-Instruct",
  "hidden_size": 3584,
  "objective": "bce",
  "use_rank": false,
  "margin": "m_t = head(h_t) = -logit(H_t); reward r_t = m_t - m_{t-1}",
  "step": 1000
}