| { |
| "config": { |
| "group_size": 8, |
| "lr": 5e-06, |
| "epochs": 2, |
| "max_steps_per_traj": 8, |
| "temperature": 1.0, |
| "temp_start": 1.2, |
| "temp_end": 0.3, |
| "clip_grad": 1.0, |
| "question_batch": 4, |
| "rft_accept_exact_only": true, |
| "rft_diversity_boost": 3.0, |
| "rft_diversity_threshold": 0.5, |
| "rft_force_diverse_rollouts": true |
| }, |
| "rewards": { |
| "correct": 7.0, |
| "partial": 1.5, |
| "wrong": -1.5, |
| "plan_first": 0.5, |
| "verify_finish": 1.0, |
| "no_verify": -0.5, |
| "sig_correct": 0.5, |
| "sig_ignored": -1.0, |
| "step_cost": -0.05, |
| "max_step": -2.0, |
| "repeat3": -0.3, |
| "plan_late": -0.5, |
| "diversity": 1.5, |
| "cross_verify": 1.0, |
| "early_exit": 1.0, |
| "single_model": -1.5, |
| "long_wrong": -1.0, |
| "error_recover": 1.0, |
| "intermediate_answer": 0.3, |
| "clear_verification": 0.2, |
| "stuck_same_worker": -0.3, |
| "low_confidence_switch": 0.4 |
| }, |
| "epochs": [ |
| { |
| "epoch": 1, |
| "avg_reward": 6.3244140625, |
| "avg_loss": 6.204556642713502, |
| "accuracy": 71.54947916666666, |
| "api": { |
| "@qwen-coder": { |
| "tag": "@qwen-coder", |
| "model": "mistralai/Mistral-Small-24B-Instruct-2501", |
| "provider": "together", |
| "calls": 3188, |
| "in_tok": 2723761, |
| "out_tok": 912639, |
| "cost_usd": 2.9091 |
| }, |
| "@qwen-general": { |
| "tag": "@qwen-general", |
| "model": "Qwen/Qwen2.5-7B-Instruct-Turbo", |
| "provider": "together", |
| "calls": 3215, |
| "in_tok": 2910742, |
| "out_tok": 450239, |
| "cost_usd": 1.0083 |
| }, |
| "@kimi": { |
| "tag": "@kimi", |
| "model": "google/gemma-3n-E4B-it", |
| "provider": "together", |
| "calls": 3194, |
| "in_tok": 2831854, |
| "out_tok": 767376, |
| "cost_usd": 1.4397 |
| }, |
| "_total": { |
| "calls": 9597, |
| "cost_usd": 5.3571 |
| } |
| } |
| } |
| ], |
| "api": { |
| "@qwen-coder": { |
| "tag": "@qwen-coder", |
| "model": "mistralai/Mistral-Small-24B-Instruct-2501", |
| "provider": "together", |
| "calls": 3188, |
| "in_tok": 2723761, |
| "out_tok": 912639, |
| "cost_usd": 2.9091 |
| }, |
| "@qwen-general": { |
| "tag": "@qwen-general", |
| "model": "Qwen/Qwen2.5-7B-Instruct-Turbo", |
| "provider": "together", |
| "calls": 3215, |
| "in_tok": 2910742, |
| "out_tok": 450239, |
| "cost_usd": 1.0083 |
| }, |
| "@kimi": { |
| "tag": "@kimi", |
| "model": "google/gemma-3n-E4B-it", |
| "provider": "together", |
| "calls": 3194, |
| "in_tok": 2831854, |
| "out_tok": 767376, |
| "cost_usd": 1.4397 |
| }, |
| "_total": { |
| "calls": 9597, |
| "cost_usd": 5.3571 |
| } |
| }, |
| "peak_vram_gb": "83.2", |
| "gpu": "NVIDIA A100-SXM4-80GB", |
| "version": "v5", |
| "stopped_early": true, |
| "stop_reason": "REWARD PLATEAU: No improvement for 40 batches. Best R\u0304=9.041 at batch 8." |
| } |