cot-moe-controller-rft-v1 / grpo_stats.json
pawan2411's picture
Upload folder using huggingface_hub
760692e verified
{
"config": {
"group_size": 8,
"lr": 5e-06,
"epochs": 2,
"max_steps_per_traj": 8,
"temperature": 1.0,
"temp_start": 1.2,
"temp_end": 0.3,
"clip_grad": 1.0,
"question_batch": 4,
"rft_accept_exact_only": true,
"rft_diversity_boost": 3.0,
"rft_diversity_threshold": 0.5,
"rft_force_diverse_rollouts": true
},
"rewards": {
"correct": 7.0,
"partial": 1.5,
"wrong": -1.5,
"plan_first": 0.5,
"verify_finish": 1.0,
"no_verify": -0.5,
"sig_correct": 0.5,
"sig_ignored": -1.0,
"step_cost": -0.05,
"max_step": -2.0,
"repeat3": -0.3,
"plan_late": -0.5,
"diversity": 1.5,
"cross_verify": 1.0,
"early_exit": 1.0,
"single_model": -1.5,
"long_wrong": -1.0,
"error_recover": 1.0,
"intermediate_answer": 0.3,
"clear_verification": 0.2,
"stuck_same_worker": -0.3,
"low_confidence_switch": 0.4
},
"epochs": [
{
"epoch": 1,
"avg_reward": 6.3244140625,
"avg_loss": 6.204556642713502,
"accuracy": 71.54947916666666,
"api": {
"@qwen-coder": {
"tag": "@qwen-coder",
"model": "mistralai/Mistral-Small-24B-Instruct-2501",
"provider": "together",
"calls": 3188,
"in_tok": 2723761,
"out_tok": 912639,
"cost_usd": 2.9091
},
"@qwen-general": {
"tag": "@qwen-general",
"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
"provider": "together",
"calls": 3215,
"in_tok": 2910742,
"out_tok": 450239,
"cost_usd": 1.0083
},
"@kimi": {
"tag": "@kimi",
"model": "google/gemma-3n-E4B-it",
"provider": "together",
"calls": 3194,
"in_tok": 2831854,
"out_tok": 767376,
"cost_usd": 1.4397
},
"_total": {
"calls": 9597,
"cost_usd": 5.3571
}
}
}
],
"api": {
"@qwen-coder": {
"tag": "@qwen-coder",
"model": "mistralai/Mistral-Small-24B-Instruct-2501",
"provider": "together",
"calls": 3188,
"in_tok": 2723761,
"out_tok": 912639,
"cost_usd": 2.9091
},
"@qwen-general": {
"tag": "@qwen-general",
"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
"provider": "together",
"calls": 3215,
"in_tok": 2910742,
"out_tok": 450239,
"cost_usd": 1.0083
},
"@kimi": {
"tag": "@kimi",
"model": "google/gemma-3n-E4B-it",
"provider": "together",
"calls": 3194,
"in_tok": 2831854,
"out_tok": 767376,
"cost_usd": 1.4397
},
"_total": {
"calls": 9597,
"cost_usd": 5.3571
}
},
"peak_vram_gb": "83.2",
"gpu": "NVIDIA A100-SXM4-80GB",
"version": "v5",
"stopped_early": true,
"stop_reason": "REWARD PLATEAU: No improvement for 40 batches. Best R\u0304=9.041 at batch 8."
}