| { | |
| "backbone_arch": "Qwen/Qwen2.5-Math-7B-Instruct", | |
| "hidden_size": 3584, | |
| "objective": "bce", | |
| "use_rank": false, | |
| "margin": "m_t = head(h_t) = -logit(H_t); reward r_t = m_t - m_{t-1}", | |
| "step": 1000 | |
| } |
| { | |
| "backbone_arch": "Qwen/Qwen2.5-Math-7B-Instruct", | |
| "hidden_size": 3584, | |
| "objective": "bce", | |
| "use_rank": false, | |
| "margin": "m_t = head(h_t) = -logit(H_t); reward r_t = m_t - m_{t-1}", | |
| "step": 1000 | |
| } |