pawan2411
/

cot-moe-controller-rft-v1

Text Generation

Model card Files Files and versions

cot-moe-controller-rft-v1 / grpo_stats.json

pawan2411's picture

Upload folder using huggingface_hub

760692e verified 2 months ago

history blame contribute delete

2.96 kB

	{
	"config": {
	"group_size": 8,
	"lr": 5e-06,
	"epochs": 2,
	"max_steps_per_traj": 8,
	"temperature": 1.0,
	"temp_start": 1.2,
	"temp_end": 0.3,
	"clip_grad": 1.0,
	"question_batch": 4,
	"rft_accept_exact_only": true,
	"rft_diversity_boost": 3.0,
	"rft_diversity_threshold": 0.5,
	"rft_force_diverse_rollouts": true
	},
	"rewards": {
	"correct": 7.0,
	"partial": 1.5,
	"wrong": -1.5,
	"plan_first": 0.5,
	"verify_finish": 1.0,
	"no_verify": -0.5,
	"sig_correct": 0.5,
	"sig_ignored": -1.0,
	"step_cost": -0.05,
	"max_step": -2.0,
	"repeat3": -0.3,
	"plan_late": -0.5,
	"diversity": 1.5,
	"cross_verify": 1.0,
	"early_exit": 1.0,
	"single_model": -1.5,
	"long_wrong": -1.0,
	"error_recover": 1.0,
	"intermediate_answer": 0.3,
	"clear_verification": 0.2,
	"stuck_same_worker": -0.3,
	"low_confidence_switch": 0.4
	},
	"epochs": [
	{
	"epoch": 1,
	"avg_reward": 6.3244140625,
	"avg_loss": 6.204556642713502,
	"accuracy": 71.54947916666666,
	"api": {
	"@qwen-coder": {
	"tag": "@qwen-coder",
	"model": "mistralai/Mistral-Small-24B-Instruct-2501",
	"provider": "together",
	"calls": 3188,
	"in_tok": 2723761,
	"out_tok": 912639,
	"cost_usd": 2.9091
	},
	"@qwen-general": {
	"tag": "@qwen-general",
	"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
	"provider": "together",
	"calls": 3215,
	"in_tok": 2910742,
	"out_tok": 450239,
	"cost_usd": 1.0083
	},
	"@kimi": {
	"tag": "@kimi",
	"model": "google/gemma-3n-E4B-it",
	"provider": "together",
	"calls": 3194,
	"in_tok": 2831854,
	"out_tok": 767376,
	"cost_usd": 1.4397
	},
	"_total": {
	"calls": 9597,
	"cost_usd": 5.3571
	}
	}
	}
	],
	"api": {
	"@qwen-coder": {
	"tag": "@qwen-coder",
	"model": "mistralai/Mistral-Small-24B-Instruct-2501",
	"provider": "together",
	"calls": 3188,
	"in_tok": 2723761,
	"out_tok": 912639,
	"cost_usd": 2.9091
	},
	"@qwen-general": {
	"tag": "@qwen-general",
	"model": "Qwen/Qwen2.5-7B-Instruct-Turbo",
	"provider": "together",
	"calls": 3215,
	"in_tok": 2910742,
	"out_tok": 450239,
	"cost_usd": 1.0083
	},
	"@kimi": {
	"tag": "@kimi",
	"model": "google/gemma-3n-E4B-it",
	"provider": "together",
	"calls": 3194,
	"in_tok": 2831854,
	"out_tok": 767376,
	"cost_usd": 1.4397
	},
	"_total": {
	"calls": 9597,
	"cost_usd": 5.3571
	}
	},
	"peak_vram_gb": "83.2",
	"gpu": "NVIDIA A100-SXM4-80GB",
	"version": "v5",
	"stopped_early": true,
	"stop_reason": "REWARD PLATEAU: No improvement for 40 batches. Best R\u0304=9.041 at batch 8."
	}