File size: 209 Bytes
8ccc261 | 1 2 3 4 5 6 7 8 | {
"backbone_arch": "Qwen/Qwen2.5-Math-7B-Instruct",
"hidden_size": 3584,
"objective": "bce",
"use_rank": false,
"margin": "m_t = head(h_t) = -logit(H_t); reward r_t = m_t - m_{t-1}",
"step": 1000
} |