{ "uniform": { "agent": "uniform", "n_episodes": 50, "mean_reward": 7.620243686, "std_reward": 1.497929030271716, "mean_accuracy": 0.78, "std_accuracy": 0.13564659966250536, "mean_budget_utilization": 1.0, "episodes": [ { "total_reward": 8.93117295, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0047081500000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0020814 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.00155605 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0032822000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00200635 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00801035 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001795000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0026818 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0032822000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0035824 } ] }, { "total_reward": 5.638689650000001, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.10001425 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0061341000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.00575885 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001715 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.0073349 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00365745 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001825 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0071848 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10001660000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0086858 } ] }, { "total_reward": 6.72990625, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0025317 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001520000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.10001840000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0056838 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0028318999999999 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0035824 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00440795 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00695965 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 400, "reward": -0.10001520000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00395765 } ] }, { "total_reward": 8.94896065, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0041828 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00816045 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00515845 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00650935 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0037325 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0035824 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.0083856 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00440795 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.00485825 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10001710000000001 } ] }, { "total_reward": 3.4224851499999995, "accuracy": 0.4, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0077852 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10001735 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001795000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": -0.10001605000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2000, "reward": -0.1000168 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0082355 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.10001645 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0025317 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0040327 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001535 } ] }, { "total_reward": 7.841082350000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.00635925 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0031321 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001825 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.1000169 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00395765 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0028318999999999 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0086107500000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0035824 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.00425785 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0083856 } ] }, { "total_reward": 6.7296836, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3600, "reward": -0.10001520000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0043329 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.1000145 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001660000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0062091499999999 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0023816 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0022315 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0052335000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0032822000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00605905 } ] }, { "total_reward": 8.936575950000002, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00290695 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00530855 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.0016311 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00395765 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00605905 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00605905 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00260675 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00485825 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10001855000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00320715 } ] }, { "total_reward": 10.0601402, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0016311 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0068846 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.00245665 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00725985 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0077852 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.00440795 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.008986 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0037325 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.00801035 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.008986 } ] }, { "total_reward": 10.0479821, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00725985 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00320715 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0064343 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0046331 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00695965 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0020814 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0025317 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00725985 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0032822000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0043329 } ] }, { "total_reward": 7.836208800000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00365745 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00485825 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0075600500000002 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0041828 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.00380755 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10001495 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.10001550000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00200635 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00485825 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00530855 } ] }, { "total_reward": 6.7447692, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0080854000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0083856 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0049333 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.1000145 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0088359 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.004483 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00320715 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0068846 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10001605000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001520000000001 } ] }, { "total_reward": 6.730883350000001, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0023816 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00680955 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001555000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0041828 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00200635 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0047831999999999 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0061341000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 800, "reward": -0.10001660000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0046331 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10001520000000001 } ] }, { "total_reward": 7.8407082500000005, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0050834 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00635925 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001675 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0085357 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0038826 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0018562500000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00320715 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00635925 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 400, "reward": -0.10001725 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00545865 } ] }, { "total_reward": 8.94701215, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.00545865 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00500835 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0077852 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00680955 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00440795 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00440795 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.0032822000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00695965 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": -0.1000143 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00290695 } ] }, { "total_reward": 6.73463205, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00365745 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001795000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": -0.10001660000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0082355 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0035824 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.00710975 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0067345 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00245665 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00290695 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": -0.10001660000000001 } ] }, { "total_reward": 8.946108400000002, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3600, "reward": -0.10001745000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00260675 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.008986 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0025317 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0086107500000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00530855 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00170615 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0086858 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.00440795 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0032822000000001 } ] }, { "total_reward": 8.934851, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0028318999999999 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00320715 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00440795 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.00425785 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.002982 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.00350735 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0050834 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0035824 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 400, "reward": -0.10001735 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00500835 } ] }, { "total_reward": 6.740263100000001, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": -0.1000162 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": -0.10001660000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0077852 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0062842 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.00605905 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00545865 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00350735 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00365745 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10001605000000001 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0075600500000002 } ] }, { "total_reward": 6.732084499999999, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.007485 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001555000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00590895 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0028318999999999 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0028318999999999 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10001455000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.00876085 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.1000169 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0017812 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0025317 } ] }, { "total_reward": 8.9449069, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0025317 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0062091499999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.00440795 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001815 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0033572499999999 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00350735 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.0080854000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00801035 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0064343 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0023816 } ] }, { "total_reward": 8.952115500000001, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.0056838 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10001435 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0086858 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00801035 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.0026818 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0037325 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.00650935 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0047831999999999 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0037325 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00831055 } ] }, { "total_reward": 7.849942, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3600, "reward": -0.1000149 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00740995 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0079353 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0020814 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": -0.10001650000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.008986 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0086107500000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0034323 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.0034323 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0080854000000001 } ] }, { "total_reward": 7.84551195, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0079353 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0086858 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0047081500000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00425785 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0037325 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.1000182 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.00650935 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0047081500000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.00500835 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000153 } ] }, { "total_reward": 8.9472363, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.1000153 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0033572499999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.0056838 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0056838 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00245665 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0085357 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0086858 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0028318999999999 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0025317 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.007485 } ] }, { "total_reward": 7.839136450000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00650935 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00440795 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.00155605 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0079353 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00440795 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00260675 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00695965 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10001550000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.0047831999999999 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10001425 } ] }, { "total_reward": 10.042878700000001, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.0033572499999999 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00380755 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0018562500000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0086858 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.0016311 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.00155605 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0031321 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0077852 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0085357 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0025317 } ] }, { "total_reward": 5.6258552, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.00350735 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10001745000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.10001475 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00380755 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0025317 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.00155605 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001760000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0065844 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0079353 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001735 } ] }, { "total_reward": 7.843411150000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.0086858 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10001435 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.00305705 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00680955 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.00485825 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": -0.10001855000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00710975 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0049333 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00350735 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.004483 } ] }, { "total_reward": 7.838907149999999, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0043329 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00695965 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00695965 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001855000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": -0.10001535 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0050834 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0026818 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0056838 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.00485825 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0023816 } ] }, { "total_reward": 7.836883499999999, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0037325 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0016311 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.00440795 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.1000169 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.0065844 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.1000143 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.0085357 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00350735 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0049333 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0035824 } ] }, { "total_reward": 6.7344062, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.10001660000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00320715 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001890000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10001635 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0025317 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00876085 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.008986 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0047831999999999 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0016311 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00455805 } ] }, { "total_reward": 6.733280700000001, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": -0.10001605000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0082355 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0017812 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00305705 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2000, "reward": -0.10001755000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": -0.10001800000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00320715 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00635925 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.00575885 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0049333 } ] }, { "total_reward": 8.956168, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0075600500000002 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0033572499999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0086107500000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.00440795 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00710975 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0073349 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0077852 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00710975 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00290695 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001455000000001 } ] }, { "total_reward": 8.95031035, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00771015 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00635925 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0080854000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0038826 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0035824 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.1000183 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00320715 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0038826 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.00816045 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00545865 } ] }, { "total_reward": 5.637345049999999, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.10001415000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0053836 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00650935 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001535 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0053836 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.1000149 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001555000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0056838 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0070347 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00740995 } ] }, { "total_reward": 8.951287350000001, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0073349 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0034323 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001695000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0043329 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00245665 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0070347 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.00575885 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.00831055 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.0037325 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00891095 } ] }, { "total_reward": 4.53682, "accuracy": 0.5, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.1000143 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0086858 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10001775 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2000, "reward": -0.10001455000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": -0.10001855000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.00635925 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0080854000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0067345 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": -0.1000145 } ] }, { "total_reward": 6.734710550000001, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0041828 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001520000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.00365745 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.00200635 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0075600500000002 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00575885 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.1000143 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.1000182 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0038826 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00771015 } ] }, { "total_reward": 7.837031000000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.00740995 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001825 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001555000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0035824 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0020814 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00200635 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.0075600500000002 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.00305705 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0062842 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0050834 } ] }, { "total_reward": 7.8445363000000015, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00816045 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00740995 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": -0.1000183 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.00260675 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0043329 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00395765 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 800, "reward": -0.10001520000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0023816 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0086858 } ] }, { "total_reward": 7.8411603, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0016311 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0066594500000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0052335000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00500835 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00605905 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.004483 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001470000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 800, "reward": -0.10001755000000001 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00725985 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00485825 } ] }, { "total_reward": 6.742589049999999, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0043329 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.00155605 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10001520000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00831055 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00771015 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00500835 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10001605000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0071848 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": -0.1000182 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0085357 } ] }, { "total_reward": 6.73163405, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0056838 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0023816 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.004483 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0050834 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0023816 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10001635 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.00771015 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": -0.10001435 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": -0.10001645 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00395765 } ] }, { "total_reward": 8.9411572, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0043329 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0046331 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0016311 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0037325 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.00876085 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10001535 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.00320715 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00740995 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0046331 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0028318999999999 } ] }, { "total_reward": 6.73613935, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001470000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.00305705 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10001520000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00455805 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10001495 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0065844 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0047831999999999 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.00365745 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00650935 } ] }, { "total_reward": 3.4263921500000003, "accuracy": 0.4, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.00725985 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001470000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.1000143 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0053836 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": -0.10001405000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0061341000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00771015 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10001835 }, { "step": 9, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": -0.1000187 }, { "step": 10, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10001545 } ] }, { "total_reward": 6.735009150000001, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.00515845 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0062842 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0035824 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.1000168 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.00771015 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10001675 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00410775 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10001575 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.00425785 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00395765 } ] }, { "total_reward": 8.951962250000001, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0047831999999999 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00380755 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.00680955 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0025317 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": -0.10001750000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0080854000000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.0075600500000002 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0077852 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0028318999999999 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0077852 } ] }, { "total_reward": 8.94926085, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00545865 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0062091499999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.00725985 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0064343 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.0052335000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0047081500000001 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.10001710000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0033572499999999 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.0033572499999999 }, { "step": 10, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00725985 } ] } ] }, "greedy_max": { "agent": "greedy_max", "n_episodes": 50, "mean_reward": 4.163493538, "std_reward": 0.8606553919009542, "mean_accuracy": 0.8399999999999999, "std_accuracy": 0.15491933384829668, "mean_budget_utilization": 1.0, "episodes": [ { "total_reward": 3.9370282500000005, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003655 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0055137 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.01759675 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0058889500000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0080654 } ] }, { "total_reward": 2.83028465, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003635000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00964145 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0142195 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0064893499999998 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000293 } ] }, { "total_reward": 5.0546615500000005, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00363745 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0122682 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.01429455 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00919115 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0152702 } ] }, { "total_reward": 3.9344796, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003350000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00633925 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.01294365 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.01114245 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00408775 } ] }, { "total_reward": 3.9645737, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.01624585 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.013469 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10003445000000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0176718 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0172215 } ] }, { "total_reward": 1.7125787, "accuracy": 0.4, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.1000352 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.00513845 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10002935 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10003525 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00754005 } ] }, { "total_reward": 5.04520525, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0118179 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.01579555 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00408775 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00829055 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0052135 } ] }, { "total_reward": 3.9325303500000004, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.01339395 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0032622 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10003145000000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0083656 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00754005 } ] }, { "total_reward": 5.05931465, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0165460499999999 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.01129255 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.01279355 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0150450500000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00363745 } ] }, { "total_reward": 2.8317869000000004, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0150450500000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10003530000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0098666 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00693965 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10002910000000001 } ] }, { "total_reward": 3.9469379, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0112175 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.008966 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0155704 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": -0.10003350000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0112175 } ] }, { "total_reward": 5.049032799999999, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0154203 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0166211 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0097165 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00408775 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00318715 } ] }, { "total_reward": 2.8395168, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0152702 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0073899499999999 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0169213 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": -0.10003350000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10003115 } ] }, { "total_reward": 5.0682456, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0139193 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.01444465 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0133189 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.01174285 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0148199 } ] }, { "total_reward": 3.92637765, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.008966 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10003005000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00363745 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.00453805 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0092662000000001 } ] }, { "total_reward": 2.8245754499999998, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.01354405 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10003445000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": -0.1000366 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0047632 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00633925 } ] }, { "total_reward": 3.9420648999999996, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10002825 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.01609575 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00829055 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0145197 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00318715 } ] }, { "total_reward": 5.06576895, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0152702 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0160206999999999 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0082155 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0157205 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.01054205 } ] }, { "total_reward": 5.0346232, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0077652 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00303705 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0064143 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0082155 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00919115 } ] }, { "total_reward": 5.0750001000000005, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0127185 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0169213 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0107671999999999 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.01729655 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.01729655 } ] }, { "total_reward": 2.8305914, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003025 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0100167 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0091161 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10002885 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0115177 } ] }, { "total_reward": 3.9369521499999998, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.01189295 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0056638 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.013469 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 800, "reward": -0.1000376 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0059639999999999 } ] }, { "total_reward": 3.9525647499999996, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.01114245 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10003540000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0067145 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.01744665 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.01729655 } ] }, { "total_reward": 5.04955815, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0040127 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0058139 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0165460499999999 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.01324385 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00994165 } ] }, { "total_reward": 5.058639199999999, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0115177 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.01204305 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0139193 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.01189295 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0092662000000001 } ] }, { "total_reward": 5.038826, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.00528855 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0092662000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.01204305 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0040127 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0082155 } ] }, { "total_reward": 3.94558905, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00303705 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0112175 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0178969500000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.013469 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10003145000000001 } ] }, { "total_reward": 3.93027465, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00663945 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10003565 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00348735 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00708975 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.01309375 } ] }, { "total_reward": 5.0530855, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.013469 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0056638 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.01054205 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0073899499999999 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0160206999999999 } ] }, { "total_reward": 5.0443796999999995, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.00363745 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0049133 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0125684 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.00814045 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0151201 } ] }, { "total_reward": 3.9214942, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00408775 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00468815 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00558875 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10003525 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0071648 } ] }, { "total_reward": 2.8296829, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.01369415 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10003530000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0056638 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.01039195 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": -0.1000317 } ] }, { "total_reward": 3.9426592, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0176718 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0047632 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00558875 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0146698 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10003435000000001 } ] }, { "total_reward": 5.046406050000001, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.013469 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00318715 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00318715 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0092662000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.01729655 } ] }, { "total_reward": 5.05180965, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.00453805 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.01444465 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0112175 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0173716 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00423785 } ] }, { "total_reward": 3.94130965, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003300000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00889095 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0103169 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0151201 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0070147 } ] }, { "total_reward": 5.04760685, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.01744665 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.008966 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0070147 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0049133 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0092662000000001 } ] }, { "total_reward": 5.0503837, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00663945 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.01489495 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0073899499999999 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0041628 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.01729655 } ] }, { "total_reward": 2.8441699000000003, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003350000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0122682 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.01639595 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": -0.10003115 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0155704 } ] }, { "total_reward": 3.9362849499999997, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00558875 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10002935 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0169213 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00573885 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0080654 } ] }, { "total_reward": 5.0385257999999995, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00663945 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00633925 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00558875 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0131688 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00678955 } ] }, { "total_reward": 5.06862085, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.0165460499999999 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.01399435 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.00979155 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.01474485 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.01354405 } ] }, { "total_reward": 3.9379313999999996, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0049133 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.0124183 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.01129255 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 800, "reward": -0.10003400000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0093412499999999 } ] }, { "total_reward": 3.9515129, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003655 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.01684625 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0163209 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.01414445 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00423785 } ] }, { "total_reward": 3.9419063999999997, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0094163 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0146698 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10003665 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0038626 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.01399435 } ] }, { "total_reward": 3.9389795000000003, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00663945 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.1000366 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.0098666 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0122682 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0102418499999999 } ] }, { "total_reward": 2.8365136, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0050634 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.1000366 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10002925 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.01744665 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0140694 } ] }, { "total_reward": 5.0585641500000005, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.01294365 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0136191 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00318715 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0161708 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0126434500000001 } ] }, { "total_reward": 3.9344063499999997, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0064893499999998 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0170713999999998 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0052135 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": -0.1000317 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0056638 } ] }, { "total_reward": 3.9408614, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0169213 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0100167 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": -0.10003095000000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0098666 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00408775 } ] } ] }, "oracle": { "agent": "oracle", "n_episodes": 50, "mean_reward": 6.932624968, "std_reward": 1.50156485979675, "mean_accuracy": 0.7282222222222221, "std_accuracy": 0.13007519192972794, "mean_budget_utilization": 0.9830249999999999, "episodes": [ { "total_reward": 7.896245749999999, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003655 }, { "step": 2, "tokens_allocated": 177, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3023, "reward": 1.00704585 }, { "step": 3, "tokens_allocated": 283, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2740, "reward": 1.00884175 }, { "step": 4, "tokens_allocated": 782, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1958, "reward": 1.0319322 }, { "step": 5, "tokens_allocated": 163, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1795, "reward": 1.00674635 }, { "step": 6, "tokens_allocated": 448, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1347, "reward": 1.01161035 }, { "step": 7, "tokens_allocated": 420, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 927, "reward": -0.10001795000000001 }, { "step": 8, "tokens_allocated": 618, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 309, "reward": 1.0190318 }, { "step": 9, "tokens_allocated": 193, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 116, "reward": 1.0092215 }, { "step": 10, "tokens_allocated": 116, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00187045 } ] }, { "total_reward": 5.67526105, "accuracy": 0.6, "total_tokens_used": 3843, "budget_utilization": 0.96075, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003635000000001 }, { "step": 2, "tokens_allocated": 177, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3023, "reward": 1.0068207 }, { "step": 3, "tokens_allocated": 755, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2268, "reward": 1.03238385 }, { "step": 4, "tokens_allocated": 648, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1620, "reward": -0.10001715 }, { "step": 5, "tokens_allocated": 540, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1080, "reward": 1.0178349 }, { "step": 6, "tokens_allocated": 108, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 972, "reward": 1.0011203499999999 }, { "step": 7, "tokens_allocated": 303, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 669, "reward": 1.0110172 }, { "step": 8, "tokens_allocated": 167, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 502, "reward": -0.1000047 }, { "step": 9, "tokens_allocated": 188, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 314, "reward": -0.10000400000000001 }, { "step": 10, "tokens_allocated": 157, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 157, "reward": 1.00614625 } ] }, { "total_reward": 7.87083005, "accuracy": 0.8, "total_tokens_used": 3830, "budget_utilization": 0.9575, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.00246665 }, { "step": 2, "tokens_allocated": 527, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3273, "reward": -0.10001520000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2473, "reward": 1.01429455 }, { "step": 4, "tokens_allocated": 176, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2297, "reward": 1.0064455 }, { "step": 5, "tokens_allocated": 191, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2106, "reward": 1.00794575 }, { "step": 6, "tokens_allocated": 526, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1580, "reward": 1.0130324 }, { "step": 7, "tokens_allocated": 493, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1087, "reward": 1.01138295 }, { "step": 8, "tokens_allocated": 181, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 906, "reward": 1.00779615 }, { "step": 9, "tokens_allocated": 566, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 340, "reward": -0.10001520000000001 }, { "step": 10, "tokens_allocated": 170, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 170, "reward": 1.0074965 } ] }, { "total_reward": 8.9928685, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003350000000001 }, { "step": 2, "tokens_allocated": 177, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3023, "reward": 1.00719595 }, { "step": 3, "tokens_allocated": 188, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2835, "reward": 1.0071954 }, { "step": 4, "tokens_allocated": 303, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2532, "reward": 1.0120679 }, { "step": 5, "tokens_allocated": 211, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2321, "reward": 1.0032166 }, { "step": 6, "tokens_allocated": 348, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1973, "reward": 1.0133415000000001 }, { "step": 7, "tokens_allocated": 369, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1604, "reward": 1.015667 }, { "step": 8, "tokens_allocated": 668, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 936, "reward": 1.02450795 }, { "step": 9, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 136, "reward": 1.0059639999999999 }, { "step": 10, "tokens_allocated": 136, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0037457 } ] }, { "total_reward": 5.6425433499999995, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.00216645 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3000, "reward": 1.013469 }, { "step": 3, "tokens_allocated": 468, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2532, "reward": -0.10001795000000001 }, { "step": 4, "tokens_allocated": 271, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2261, "reward": 1.00764155 }, { "step": 5, "tokens_allocated": 471, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1790, "reward": -0.1000168 }, { "step": 6, "tokens_allocated": 268, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1522, "reward": 1.00816705 }, { "step": 7, "tokens_allocated": 761, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 761, "reward": -0.10001645 }, { "step": 8, "tokens_allocated": 190, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 571, "reward": 1.00742045 }, { "step": 9, "tokens_allocated": 142, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 429, "reward": 1.0037454 }, { "step": 10, "tokens_allocated": 429, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001535 } ] }, { "total_reward": 6.881022700000001, "accuracy": 0.7777777777777778, "total_tokens_used": 3986, "budget_utilization": 0.9965, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.1000352 }, { "step": 2, "tokens_allocated": 711, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2489, "reward": 1.0264571 }, { "step": 3, "tokens_allocated": 388, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2101, "reward": 1.0173922 }, { "step": 4, "tokens_allocated": 600, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1501, "reward": -0.1000169 }, { "step": 5, "tokens_allocated": 125, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1376, "reward": 1.0041215 }, { "step": 6, "tokens_allocated": 137, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1239, "reward": 1.00449615 }, { "step": 7, "tokens_allocated": 154, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1085, "reward": 1.0059962999999998 }, { "step": 8, "tokens_allocated": 271, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 814, "reward": 1.00689105 }, { "step": 9, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 14, "reward": 1.0157205 } ] }, { "total_reward": 5.7549208499999995, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3500, "reward": -0.10001520000000001 }, { "step": 2, "tokens_allocated": 291, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3209, "reward": -0.100008 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2409, "reward": 1.00408775 }, { "step": 4, "tokens_allocated": 688, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1721, "reward": -0.10001660000000001 }, { "step": 5, "tokens_allocated": 215, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1506, "reward": 1.002616 }, { "step": 6, "tokens_allocated": 150, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1356, "reward": 1.0042703499999999 }, { "step": 7, "tokens_allocated": 169, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1187, "reward": 1.00644585 }, { "step": 8, "tokens_allocated": 494, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 693, "reward": 1.0122835000000001 }, { "step": 9, "tokens_allocated": 693, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0252572 } ] }, { "total_reward": 7.91441255, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": -0.1000091 }, { "step": 2, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3187, "reward": 1.01378355 }, { "step": 3, "tokens_allocated": 796, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2391, "reward": 1.0313311 }, { "step": 4, "tokens_allocated": 170, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2221, "reward": 1.00682105 }, { "step": 5, "tokens_allocated": 740, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1481, "reward": 1.03155905 }, { "step": 6, "tokens_allocated": 148, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1333, "reward": 1.00412035 }, { "step": 7, "tokens_allocated": 416, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 917, "reward": 1.00380675 }, { "step": 8, "tokens_allocated": 611, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 306, "reward": 1.02068325 }, { "step": 9, "tokens_allocated": 114, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 192, "reward": 1.00232085 }, { "step": 10, "tokens_allocated": 192, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.1000043 } ] }, { "total_reward": 9.02716315, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0035173499999999 }, { "step": 2, "tokens_allocated": 316, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3484, "reward": 1.01236745 }, { "step": 3, "tokens_allocated": 544, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2940, "reward": 1.01325665 }, { "step": 4, "tokens_allocated": 315, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2625, "reward": 1.010116 }, { "step": 5, "tokens_allocated": 218, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 2407, "reward": -0.1000076 }, { "step": 6, "tokens_allocated": 361, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2046, "reward": 1.0151420500000001 }, { "step": 7, "tokens_allocated": 639, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1407, "reward": 1.0269110000000001 }, { "step": 8, "tokens_allocated": 586, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 821, "reward": 1.0176825 }, { "step": 9, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 308, "reward": 1.01648535 }, { "step": 10, "tokens_allocated": 308, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0116924 } ] }, { "total_reward": 8.013730599999999, "accuracy": 0.8888888888888888, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.008991 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2900, "reward": -0.10003530000000001 }, { "step": 3, "tokens_allocated": 181, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2719, "reward": 1.0065203 }, { "step": 4, "tokens_allocated": 485, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2234, "reward": 1.0110081 }, { "step": 5, "tokens_allocated": 744, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1490, "reward": 1.03275965 }, { "step": 6, "tokens_allocated": 149, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1341, "reward": 1.00584645 }, { "step": 7, "tokens_allocated": 670, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 671, "reward": 1.0227817 }, { "step": 8, "tokens_allocated": 279, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 392, "reward": 1.00764115 }, { "step": 9, "tokens_allocated": 392, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.01821755 } ] }, { "total_reward": 6.7610722999999995, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.0113926 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2900, "reward": 1.008966 }, { "step": 3, "tokens_allocated": 453, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2447, "reward": 1.01153505 }, { "step": 4, "tokens_allocated": 262, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2185, "reward": 1.00681645 }, { "step": 5, "tokens_allocated": 455, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1730, "reward": 1.00793255 }, { "step": 6, "tokens_allocated": 692, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1038, "reward": -0.10001495 }, { "step": 7, "tokens_allocated": 519, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 519, "reward": -0.10001550000000001 }, { "step": 8, "tokens_allocated": 346, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 173, "reward": 1.01251605 }, { "step": 9, "tokens_allocated": 64, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 109, "reward": -0.1000018 }, { "step": 10, "tokens_allocated": 109, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.00194585 } ] }, { "total_reward": 7.855514200000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.00201635 }, { "step": 2, "tokens_allocated": 316, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3484, "reward": 1.01101655 }, { "step": 3, "tokens_allocated": 217, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3267, "reward": 1.0036666 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2467, "reward": 1.00408775 }, { "step": 5, "tokens_allocated": 205, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2262, "reward": 1.0036672 }, { "step": 6, "tokens_allocated": 226, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2036, "reward": 1.0061428000000001 }, { "step": 7, "tokens_allocated": 636, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1400, "reward": 1.02090715 }, { "step": 8, "tokens_allocated": 233, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1167, "reward": 1.00404105 }, { "step": 9, "tokens_allocated": 437, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 730, "reward": -0.10001605000000001 }, { "step": 10, "tokens_allocated": 730, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001520000000001 } ] }, { "total_reward": 6.87944935, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0028419 }, { "step": 2, "tokens_allocated": 211, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3589, "reward": 1.0036669 }, { "step": 3, "tokens_allocated": 560, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3029, "reward": -0.10001555000000001 }, { "step": 4, "tokens_allocated": 324, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2705, "reward": 1.01146645 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1905, "reward": -0.10003115 }, { "step": 6, "tokens_allocated": 476, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1429, "reward": 1.0104832 }, { "step": 7, "tokens_allocated": 714, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 715, "reward": 1.0296841 }, { "step": 8, "tokens_allocated": 119, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 596, "reward": 1.0020204 }, { "step": 9, "tokens_allocated": 596, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0193331 } ] }, { "total_reward": 7.9015055, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.00966645 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2900, "reward": 1.01444465 }, { "step": 3, "tokens_allocated": 453, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2447, "reward": -0.10001675 }, { "step": 4, "tokens_allocated": 262, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2185, "reward": 1.0083925 }, { "step": 5, "tokens_allocated": 182, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2003, "reward": 1.0080963 }, { "step": 6, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1703, "reward": 1.00846565 }, { "step": 7, "tokens_allocated": 532, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1171, "reward": 1.01310715 }, { "step": 8, "tokens_allocated": 780, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 391, "reward": 1.03485925 }, { "step": 9, "tokens_allocated": 97, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 294, "reward": 1.00449815 }, { "step": 10, "tokens_allocated": 294, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10000785000000001 } ] }, { "total_reward": 6.915772100000001, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.01295865 }, { "step": 2, "tokens_allocated": 777, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2723, "reward": 1.03328335 }, { "step": 3, "tokens_allocated": 170, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2553, "reward": 1.0071963 }, { "step": 4, "tokens_allocated": 273, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2280, "reward": 1.00809175 }, { "step": 5, "tokens_allocated": 760, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1520, "reward": 1.03140795 }, { "step": 6, "tokens_allocated": 380, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1140, "reward": -0.1000076 }, { "step": 7, "tokens_allocated": 356, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 784, "reward": -0.10000935000000001 }, { "step": 8, "tokens_allocated": 522, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 262, "reward": 1.01610965 }, { "step": 9, "tokens_allocated": 262, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0067414 } ] }, { "total_reward": 6.76834685, "accuracy": 0.7, "total_tokens_used": 3889, "budget_utilization": 0.97225, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": -0.10000945 }, { "step": 2, "tokens_allocated": 513, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3187, "reward": -0.10001795000000001 }, { "step": 3, "tokens_allocated": 298, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2889, "reward": 1.01041705 }, { "step": 4, "tokens_allocated": 309, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2580, "reward": 1.0112420500000001 }, { "step": 5, "tokens_allocated": 215, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2365, "reward": 1.00254095 }, { "step": 6, "tokens_allocated": 354, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2011, "reward": 1.01236555 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1211, "reward": -0.1000326 }, { "step": 8, "tokens_allocated": 504, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 707, "reward": 1.01025665 }, { "step": 9, "tokens_allocated": 265, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 442, "reward": 1.00869255 }, { "step": 10, "tokens_allocated": 331, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 111, "reward": 1.01289205 } ] }, { "total_reward": 8.9896424, "accuracy": 0.9, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3500, "reward": -0.10001745000000001 }, { "step": 2, "tokens_allocated": 486, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3014, "reward": 1.00905675 }, { "step": 3, "tokens_allocated": 188, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2826, "reward": 1.008096 }, { "step": 4, "tokens_allocated": 302, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2524, "reward": 1.00891585 }, { "step": 5, "tokens_allocated": 210, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2314, "reward": 1.0044925 }, { "step": 6, "tokens_allocated": 231, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2083, "reward": 1.0063677 }, { "step": 7, "tokens_allocated": 260, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1823, "reward": 1.00696665 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1023, "reward": 1.0058139 }, { "step": 9, "tokens_allocated": 639, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 384, "reward": 1.02233295 }, { "step": 10, "tokens_allocated": 384, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.01761755 } ] }, { "total_reward": 7.904428449999999, "accuracy": 0.8, "total_tokens_used": 3954, "budget_utilization": 0.9885, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.00441795 }, { "step": 2, "tokens_allocated": 527, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3273, "reward": 1.01273215 }, { "step": 3, "tokens_allocated": 204, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3069, "reward": 1.0037422999999999 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2269, "reward": 1.0157205 }, { "step": 5, "tokens_allocated": 756, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1513, "reward": 1.029682 }, { "step": 6, "tokens_allocated": 226, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1287, "reward": -0.10000740000000001 }, { "step": 7, "tokens_allocated": 643, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 644, "reward": 1.0233084 }, { "step": 8, "tokens_allocated": 161, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 483, "reward": 1.00509535 }, { "step": 9, "tokens_allocated": 301, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 182, "reward": 1.00974145 }, { "step": 10, "tokens_allocated": 136, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 46, "reward": -0.10000425 } ] }, { "total_reward": 6.7541784, "accuracy": 0.7, "total_tokens_used": 3629, "budget_utilization": 0.90725, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": -0.1000081 }, { "step": 2, "tokens_allocated": 205, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3495, "reward": 1.0047179 }, { "step": 3, "tokens_allocated": 218, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3277, "reward": 1.0053927 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2477, "reward": 1.0082155 }, { "step": 5, "tokens_allocated": 309, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2168, "reward": 1.0105666 }, { "step": 6, "tokens_allocated": 542, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1626, "reward": 1.01610865 }, { "step": 7, "tokens_allocated": 203, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1423, "reward": 1.00404255 }, { "step": 8, "tokens_allocated": 237, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1186, "reward": 1.0051666 }, { "step": 9, "tokens_allocated": 444, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 742, "reward": -0.10001605000000001 }, { "step": 10, "tokens_allocated": 371, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 371, "reward": -0.10000795000000001 } ] }, { "total_reward": 5.6360168999999996, "accuracy": 0.6, "total_tokens_used": 3946, "budget_utilization": 0.9865, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0127185 }, { "step": 2, "tokens_allocated": 444, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2756, "reward": -0.10001555000000001 }, { "step": 3, "tokens_allocated": 172, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2584, "reward": 1.00697105 }, { "step": 4, "tokens_allocated": 184, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0071956 }, { "step": 5, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2200, "reward": 1.0013409 }, { "step": 6, "tokens_allocated": 550, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1650, "reward": -0.10001455000000001 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 850, "reward": -0.1000309 }, { "step": 8, "tokens_allocated": 566, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 284, "reward": -0.1000169 }, { "step": 9, "tokens_allocated": 177, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 107, "reward": 1.00674565 }, { "step": 10, "tokens_allocated": 53, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 54, "reward": 1.0011231 } ] }, { "total_reward": 6.756207800000001, "accuracy": 0.7, "total_tokens_used": 3737, "budget_utilization": 0.93425, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003025 }, { "step": 2, "tokens_allocated": 444, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2756, "reward": 1.00950915 }, { "step": 3, "tokens_allocated": 258, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2498, "reward": 1.00741705 }, { "step": 4, "tokens_allocated": 713, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1785, "reward": -0.10001815 }, { "step": 5, "tokens_allocated": 148, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1637, "reward": 1.0057714500000001 }, { "step": 6, "tokens_allocated": 163, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1474, "reward": 1.00569565 }, { "step": 7, "tokens_allocated": 276, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1198, "reward": -0.10000895 }, { "step": 8, "tokens_allocated": 499, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 699, "reward": 1.01543535 }, { "step": 9, "tokens_allocated": 174, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 525, "reward": 1.0059953 }, { "step": 10, "tokens_allocated": 262, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 263, "reward": 1.0064412 } ] }, { "total_reward": 7.8895138, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.0131838 }, { "step": 2, "tokens_allocated": 777, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2723, "reward": -0.10001435 }, { "step": 3, "tokens_allocated": 425, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2298, "reward": 1.0105608 }, { "step": 4, "tokens_allocated": 410, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1888, "reward": 1.00876035 }, { "step": 5, "tokens_allocated": 629, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1259, "reward": 1.0198568000000001 }, { "step": 6, "tokens_allocated": 314, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 945, "reward": 1.01266775 }, { "step": 7, "tokens_allocated": 177, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 768, "reward": -0.1000043 }, { "step": 8, "tokens_allocated": 192, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 576, "reward": 1.0091465 }, { "step": 9, "tokens_allocated": 144, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 432, "reward": 1.0046459 }, { "step": 10, "tokens_allocated": 432, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.01071055 } ] }, { "total_reward": 9.0091763, "accuracy": 0.9, "total_tokens_used": 3633, "budget_utilization": 0.90825, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3500, "reward": -0.1000149 }, { "step": 2, "tokens_allocated": 777, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2723, "reward": 1.03568495 }, { "step": 3, "tokens_allocated": 255, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2468, "reward": 1.00749225 }, { "step": 4, "tokens_allocated": 176, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2292, "reward": 1.00667065 }, { "step": 5, "tokens_allocated": 191, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2101, "reward": 1.0074204 }, { "step": 6, "tokens_allocated": 315, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1786, "reward": 1.0099659 }, { "step": 7, "tokens_allocated": 223, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1563, "reward": 1.00269065 }, { "step": 8, "tokens_allocated": 390, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1173, "reward": 1.01731705 }, { "step": 9, "tokens_allocated": 439, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 734, "reward": 1.0063573 }, { "step": 10, "tokens_allocated": 367, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 367, "reward": 1.01559205 } ] }, { "total_reward": 6.79033245, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0040127 }, { "step": 2, "tokens_allocated": 711, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2489, "reward": 1.0320108000000001 }, { "step": 3, "tokens_allocated": 388, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2101, "reward": 1.0179926 }, { "step": 4, "tokens_allocated": 225, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1876, "reward": 1.00584265 }, { "step": 5, "tokens_allocated": 156, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1720, "reward": 1.0055459 }, { "step": 6, "tokens_allocated": 688, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1032, "reward": -0.1000182 }, { "step": 7, "tokens_allocated": 193, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 839, "reward": 1.0083209 }, { "step": 8, "tokens_allocated": 559, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 280, "reward": 1.01663315 }, { "step": 9, "tokens_allocated": 175, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 105, "reward": -0.10000445000000001 }, { "step": 10, "tokens_allocated": 105, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000360000000001 } ] }, { "total_reward": 6.78769955, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0115177 }, { "step": 2, "tokens_allocated": 266, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2934, "reward": 1.0058406 }, { "step": 3, "tokens_allocated": 733, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2201, "reward": 1.0306587999999999 }, { "step": 4, "tokens_allocated": 393, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1808, "reward": 1.0185176999999999 }, { "step": 5, "tokens_allocated": 150, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1658, "reward": 1.00547115 }, { "step": 6, "tokens_allocated": 414, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1244, "reward": 1.0095857 }, { "step": 7, "tokens_allocated": 155, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1089, "reward": 1.00614635 }, { "step": 8, "tokens_allocated": 181, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 908, "reward": -0.10000395000000001 }, { "step": 9, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 108, "reward": -0.10003025 }, { "step": 10, "tokens_allocated": 108, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000425 } ] }, { "total_reward": 7.883722, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.01176785 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2900, "reward": 1.0092662000000001 }, { "step": 3, "tokens_allocated": 453, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2447, "reward": 1.00553105 }, { "step": 4, "tokens_allocated": 699, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1748, "reward": 1.0303603 }, { "step": 5, "tokens_allocated": 145, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1603, "reward": 1.00554645 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1203, "reward": 1.00260675 }, { "step": 7, "tokens_allocated": 375, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 828, "reward": 1.0161920500000001 }, { "step": 8, "tokens_allocated": 552, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 276, "reward": -0.10001550000000001 }, { "step": 9, "tokens_allocated": 103, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 173, "reward": 1.0024715 }, { "step": 10, "tokens_allocated": 173, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000465 } ] }, { "total_reward": 9.009530049999999, "accuracy": 0.9, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.01085725 }, { "step": 2, "tokens_allocated": 486, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3014, "reward": 1.01025755 }, { "step": 3, "tokens_allocated": 282, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2732, "reward": 1.00711565 }, { "step": 4, "tokens_allocated": 487, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2245, "reward": 1.0152108 }, { "step": 5, "tokens_allocated": 748, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1497, "reward": 1.0277311 }, { "step": 6, "tokens_allocated": 224, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1273, "reward": -0.10000925000000001 }, { "step": 7, "tokens_allocated": 636, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 637, "reward": 1.0208321 }, { "step": 8, "tokens_allocated": 106, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 531, "reward": 1.0020961000000002 }, { "step": 9, "tokens_allocated": 331, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 200, "reward": 1.0137176 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.00172115 } ] }, { "total_reward": 5.77586915, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.01100735 }, { "step": 2, "tokens_allocated": 777, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2723, "reward": -0.10001745000000001 }, { "step": 3, "tokens_allocated": 680, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2043, "reward": -0.10001475 }, { "step": 4, "tokens_allocated": 364, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1679, "reward": 1.0160425 }, { "step": 5, "tokens_allocated": 139, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1540, "reward": 1.00464615 }, { "step": 6, "tokens_allocated": 231, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1309, "reward": 1.00554215 }, { "step": 7, "tokens_allocated": 409, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 900, "reward": -0.10001760000000001 }, { "step": 8, "tokens_allocated": 150, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 750, "reward": 1.0044955 }, { "step": 9, "tokens_allocated": 750, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0341852999999999 } ] }, { "total_reward": 6.768739500000001, "accuracy": 0.7, "total_tokens_used": 3891, "budget_utilization": 0.97275, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.0161858000000001 }, { "step": 2, "tokens_allocated": 777, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2723, "reward": -0.10001435 }, { "step": 3, "tokens_allocated": 255, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2468, "reward": 1.0060663 }, { "step": 4, "tokens_allocated": 176, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2292, "reward": 1.0078714500000001 }, { "step": 5, "tokens_allocated": 286, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2006, "reward": 1.01071785 }, { "step": 6, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1706, "reward": 1.0116928 }, { "step": 7, "tokens_allocated": 213, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1493, "reward": 1.00539295 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 693, "reward": 1.01084225 }, { "step": 9, "tokens_allocated": 259, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 434, "reward": -0.10000740000000001 }, { "step": 10, "tokens_allocated": 325, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 109, "reward": -0.10000815 } ] }, { "total_reward": 5.766700849999999, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.00966645 }, { "step": 2, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3187, "reward": 1.01543465 }, { "step": 3, "tokens_allocated": 199, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2988, "reward": 1.00914615 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2188, "reward": 1.00814045 }, { "step": 5, "tokens_allocated": 729, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1459, "reward": -0.10001535 }, { "step": 6, "tokens_allocated": 583, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 876, "reward": 1.0188084 }, { "step": 7, "tokens_allocated": 438, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 438, "reward": 1.0055318 }, { "step": 8, "tokens_allocated": 292, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 146, "reward": -0.10000705 }, { "step": 9, "tokens_allocated": 146, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000465 } ] }, { "total_reward": 6.877045400000001, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0023916 }, { "step": 2, "tokens_allocated": 316, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3484, "reward": 1.01041615 }, { "step": 3, "tokens_allocated": 544, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2940, "reward": 1.01520795 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2140, "reward": -0.10003525 }, { "step": 5, "tokens_allocated": 713, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1427, "reward": 1.0300594 }, { "step": 6, "tokens_allocated": 570, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 857, "reward": -0.1000143 }, { "step": 7, "tokens_allocated": 160, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 697, "reward": 1.00532055 }, { "step": 8, "tokens_allocated": 290, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 407, "reward": 1.008241 }, { "step": 9, "tokens_allocated": 407, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0054583 } ] }, { "total_reward": 5.796034349999999, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.01369415 }, { "step": 2, "tokens_allocated": 711, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2489, "reward": 1.02653215 }, { "step": 3, "tokens_allocated": 388, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2101, "reward": 1.018593 }, { "step": 4, "tokens_allocated": 375, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1726, "reward": -0.10000715 }, { "step": 5, "tokens_allocated": 215, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1511, "reward": -0.10000890000000001 }, { "step": 6, "tokens_allocated": 604, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 907, "reward": 1.02406085 }, { "step": 7, "tokens_allocated": 283, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 624, "reward": -0.10000735000000001 }, { "step": 8, "tokens_allocated": 156, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 468, "reward": 1.0064465 }, { "step": 9, "tokens_allocated": 468, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0067310999999999 } ] }, { "total_reward": 7.86692335, "accuracy": 0.8, "total_tokens_used": 3787, "budget_utilization": 0.94675, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.00981655 }, { "step": 2, "tokens_allocated": 308, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3392, "reward": 1.01116705 }, { "step": 3, "tokens_allocated": 212, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3180, "reward": 1.00231595 }, { "step": 4, "tokens_allocated": 227, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2953, "reward": 1.0041164 }, { "step": 5, "tokens_allocated": 615, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2338, "reward": -0.10001755000000001 }, { "step": 6, "tokens_allocated": 350, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1988, "reward": 1.0128911 }, { "step": 7, "tokens_allocated": 621, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1367, "reward": 1.01978215 }, { "step": 8, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 567, "reward": -0.1000352 }, { "step": 9, "tokens_allocated": 141, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 426, "reward": 1.0035203 }, { "step": 10, "tokens_allocated": 213, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 213, "reward": 1.0033665999999999 } ] }, { "total_reward": 7.8743482999999985, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.00966645 }, { "step": 2, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3187, "reward": 1.0118322499999999 }, { "step": 3, "tokens_allocated": 199, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2988, "reward": 1.0093713 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2188, "reward": 1.0092662000000001 }, { "step": 5, "tokens_allocated": 182, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2006, "reward": 1.00772105 }, { "step": 6, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1206, "reward": -0.1000293 }, { "step": 7, "tokens_allocated": 150, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1056, "reward": 1.0056962999999999 }, { "step": 8, "tokens_allocated": 176, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 880, "reward": 1.00727105 }, { "step": 9, "tokens_allocated": 330, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 550, "reward": 1.0135675499999999 }, { "step": 10, "tokens_allocated": 550, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10001455000000001 } ] }, { "total_reward": 8.9898032, "accuracy": 0.9, "total_tokens_used": 3821, "budget_utilization": 0.95525, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.0098916 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2900, "reward": 1.01444465 }, { "step": 3, "tokens_allocated": 181, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2719, "reward": 1.0065203 }, { "step": 4, "tokens_allocated": 194, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2525, "reward": 1.0080957000000001 }, { "step": 5, "tokens_allocated": 315, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2210, "reward": 1.01019105 }, { "step": 6, "tokens_allocated": 552, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1658, "reward": -0.1000183 }, { "step": 7, "tokens_allocated": 518, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1140, "reward": 1.01205715 }, { "step": 8, "tokens_allocated": 190, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 950, "reward": 1.0086963 }, { "step": 9, "tokens_allocated": 237, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 713, "reward": 1.0044161 }, { "step": 10, "tokens_allocated": 534, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 179, "reward": 1.01550865 } ] }, { "total_reward": 5.638869999999999, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003300000000001 }, { "step": 2, "tokens_allocated": 444, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2756, "reward": 1.0086836 }, { "step": 3, "tokens_allocated": 172, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2584, "reward": 1.0066708500000001 }, { "step": 4, "tokens_allocated": 738, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1846, "reward": -0.10001535 }, { "step": 5, "tokens_allocated": 230, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1616, "reward": 1.003741 }, { "step": 6, "tokens_allocated": 404, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1212, "reward": -0.1000149 }, { "step": 7, "tokens_allocated": 378, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 834, "reward": -0.10000875000000001 }, { "step": 8, "tokens_allocated": 208, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 626, "reward": 1.0017908 }, { "step": 9, "tokens_allocated": 156, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 470, "reward": 1.0053958 }, { "step": 10, "tokens_allocated": 470, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.01265995 } ] }, { "total_reward": 7.9015086000000005, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.01744665 }, { "step": 2, "tokens_allocated": 177, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3023, "reward": 1.0066706 }, { "step": 3, "tokens_allocated": 472, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2551, "reward": -0.10001695000000001 }, { "step": 4, "tokens_allocated": 273, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2278, "reward": 1.009968 }, { "step": 5, "tokens_allocated": 189, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2089, "reward": 1.00839615 }, { "step": 6, "tokens_allocated": 313, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1776, "reward": 1.0110167 }, { "step": 7, "tokens_allocated": 333, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1443, "reward": -0.10000745000000001 }, { "step": 8, "tokens_allocated": 360, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1083, "reward": 1.01416645 }, { "step": 9, "tokens_allocated": 406, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 677, "reward": 1.0041825 }, { "step": 10, "tokens_allocated": 677, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.02968595 } ] }, { "total_reward": 3.5552238000000003, "accuracy": 0.4444444444444444, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": -0.10000945 }, { "step": 2, "tokens_allocated": 527, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3273, "reward": -0.1000143 }, { "step": 3, "tokens_allocated": 306, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2967, "reward": 1.0116174500000001 }, { "step": 4, "tokens_allocated": 529, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2438, "reward": -0.10001775 }, { "step": 5, "tokens_allocated": 507, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1931, "reward": -0.10001455000000001 }, { "step": 6, "tokens_allocated": 289, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1642, "reward": 1.0108678 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 842, "reward": -0.1000352 }, { "step": 8, "tokens_allocated": 140, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 702, "reward": 1.0034453 }, { "step": 9, "tokens_allocated": 702, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.0293845000000001 } ] }, { "total_reward": 4.5351288, "accuracy": 0.5, "total_tokens_used": 3929, "budget_utilization": 0.98225, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003350000000001 }, { "step": 2, "tokens_allocated": 444, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2756, "reward": -0.10001520000000001 }, { "step": 3, "tokens_allocated": 430, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2326, "reward": 1.00590745 }, { "step": 4, "tokens_allocated": 664, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1662, "reward": 1.02180635 }, { "step": 5, "tokens_allocated": 138, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1524, "reward": 1.0041959 }, { "step": 6, "tokens_allocated": 381, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1143, "reward": -0.1000071 }, { "step": 7, "tokens_allocated": 571, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 572, "reward": -0.1000143 }, { "step": 8, "tokens_allocated": 381, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 191, "reward": -0.100007 }, { "step": 9, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 141, "reward": 1.00082305 }, { "step": 10, "tokens_allocated": 70, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 71, "reward": 1.00247315 } ] }, { "total_reward": 5.65206235, "accuracy": 0.6, "total_tokens_used": 3968, "budget_utilization": 0.992, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0037425 }, { "step": 2, "tokens_allocated": 527, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3273, "reward": -0.10001825 }, { "step": 3, "tokens_allocated": 511, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2762, "reward": -0.10001555000000001 }, { "step": 4, "tokens_allocated": 197, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2565, "reward": 1.00869595 }, { "step": 5, "tokens_allocated": 320, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2245, "reward": 1.01296765 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1445, "reward": 1.0056638 }, { "step": 7, "tokens_allocated": 270, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1175, "reward": 1.00741645 }, { "step": 8, "tokens_allocated": 293, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 882, "reward": -0.10000745000000001 }, { "step": 9, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 82, "reward": 1.0136191 }, { "step": 10, "tokens_allocated": 50, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 32, "reward": -0.10000185 } ] }, { "total_reward": 7.8643071, "accuracy": 0.8, "total_tokens_used": 3629, "budget_utilization": 0.90725, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": -0.10000945 }, { "step": 2, "tokens_allocated": 211, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3589, "reward": 1.0024661 }, { "step": 3, "tokens_allocated": 224, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3365, "reward": 1.0055425 }, { "step": 4, "tokens_allocated": 240, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3125, "reward": 1.0040407 }, { "step": 5, "tokens_allocated": 390, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2735, "reward": 1.01611625 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1935, "reward": 1.0094163 }, { "step": 7, "tokens_allocated": 241, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1694, "reward": 1.0063672 }, { "step": 8, "tokens_allocated": 705, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 989, "reward": -0.10001520000000001 }, { "step": 9, "tokens_allocated": 247, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 742, "reward": 1.0063669 }, { "step": 10, "tokens_allocated": 371, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 371, "reward": 1.0140158 } ] }, { "total_reward": 7.8762429, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0035173499999999 }, { "step": 2, "tokens_allocated": 211, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3589, "reward": 1.0047176 }, { "step": 3, "tokens_allocated": 224, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3365, "reward": 1.0040415 }, { "step": 4, "tokens_allocated": 360, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3005, "reward": 1.01446665 }, { "step": 5, "tokens_allocated": 250, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2755, "reward": 1.00561625 }, { "step": 6, "tokens_allocated": 275, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2480, "reward": 1.007116 }, { "step": 7, "tokens_allocated": 775, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1705, "reward": -0.10001470000000001 }, { "step": 8, "tokens_allocated": 710, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 995, "reward": -0.10001755000000001 }, { "step": 9, "tokens_allocated": 373, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 622, "reward": 1.01529155 }, { "step": 10, "tokens_allocated": 622, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.02150825 } ] }, { "total_reward": 4.66363005, "accuracy": 0.5555555555555556, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.011993 }, { "step": 2, "tokens_allocated": 308, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3392, "reward": 1.01131715 }, { "step": 3, "tokens_allocated": 530, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2862, "reward": -0.10001520000000001 }, { "step": 4, "tokens_allocated": 511, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2351, "reward": 1.01663555 }, { "step": 5, "tokens_allocated": 195, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2156, "reward": 1.00824575 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1356, "reward": 1.01549535 }, { "step": 7, "tokens_allocated": 423, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 933, "reward": -0.10001605000000001 }, { "step": 8, "tokens_allocated": 233, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 700, "reward": -0.10000730000000001 }, { "step": 9, "tokens_allocated": 700, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000182 } ] }, { "total_reward": 7.87246095, "accuracy": 0.8, "total_tokens_used": 3971, "budget_utilization": 0.99275, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003655 }, { "step": 2, "tokens_allocated": 711, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2489, "reward": 1.0257066000000001 }, { "step": 3, "tokens_allocated": 233, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2256, "reward": 1.0041161 }, { "step": 4, "tokens_allocated": 161, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2095, "reward": 1.0066714 }, { "step": 5, "tokens_allocated": 174, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1921, "reward": 1.00607035 }, { "step": 6, "tokens_allocated": 480, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1441, "reward": -0.10001635 }, { "step": 7, "tokens_allocated": 270, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1171, "reward": 1.0086172500000001 }, { "step": 8, "tokens_allocated": 292, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 879, "reward": 1.0095918 }, { "step": 9, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 79, "reward": 1.0106171 }, { "step": 10, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 29, "reward": 1.00112325 } ] }, { "total_reward": 7.8952819000000005, "accuracy": 0.8, "total_tokens_used": 3985, "budget_utilization": 0.99625, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0094163 }, { "step": 2, "tokens_allocated": 711, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2489, "reward": 1.0279581 }, { "step": 3, "tokens_allocated": 388, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2101, "reward": 1.01626645 }, { "step": 4, "tokens_allocated": 375, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1726, "reward": 1.0170176 }, { "step": 5, "tokens_allocated": 359, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1367, "reward": -0.10000890000000001 }, { "step": 6, "tokens_allocated": 341, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1026, "reward": 1.01184085 }, { "step": 7, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 513, "reward": 1.01168215 }, { "step": 8, "tokens_allocated": 342, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 171, "reward": -0.1000086 }, { "step": 9, "tokens_allocated": 106, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 65, "reward": 1.0008953 }, { "step": 10, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 15, "reward": 1.00022265 } ] }, { "total_reward": 2.3343037, "accuracy": 0.3, "total_tokens_used": 3982, "budget_utilization": 0.9955, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": -0.10000945 }, { "step": 2, "tokens_allocated": 527, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3273, "reward": -0.10001470000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2473, "reward": 1.0098666 }, { "step": 4, "tokens_allocated": 441, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2032, "reward": -0.10001520000000001 }, { "step": 5, "tokens_allocated": 169, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1863, "reward": 1.00599555 }, { "step": 6, "tokens_allocated": 745, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1118, "reward": -0.10001495 }, { "step": 7, "tokens_allocated": 559, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 559, "reward": 1.0185094 }, { "step": 8, "tokens_allocated": 372, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 187, "reward": -0.10000730000000001 }, { "step": 9, "tokens_allocated": 116, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 71, "reward": -0.10000435 }, { "step": 10, "tokens_allocated": 53, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 18, "reward": -0.1000019 } ] }, { "total_reward": 3.5414481000000007, "accuracy": 0.4444444444444444, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 500, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3500, "reward": 1.0147598500000001 }, { "step": 2, "tokens_allocated": 486, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3014, "reward": -0.10001470000000001 }, { "step": 3, "tokens_allocated": 753, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2261, "reward": -0.1000143 }, { "step": 4, "tokens_allocated": 161, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2100, "reward": 1.00584585 }, { "step": 5, "tokens_allocated": 700, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1400, "reward": -0.10001405000000001 }, { "step": 6, "tokens_allocated": 140, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1260, "reward": 1.0040457 }, { "step": 7, "tokens_allocated": 393, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 867, "reward": 1.0168666 }, { "step": 8, "tokens_allocated": 578, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 289, "reward": -0.10001835 }, { "step": 9, "tokens_allocated": 289, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000085 } ] }, { "total_reward": 5.6448685, "accuracy": 0.6, "total_tokens_used": 3887, "budget_utilization": 0.97175, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3800, "reward": 1.0038926 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3000, "reward": 1.0136191 }, { "step": 3, "tokens_allocated": 468, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2532, "reward": 1.0086824 }, { "step": 4, "tokens_allocated": 452, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2080, "reward": -0.1000168 }, { "step": 5, "tokens_allocated": 433, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1647, "reward": 1.01018515 }, { "step": 6, "tokens_allocated": 411, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1236, "reward": -0.10001675 }, { "step": 7, "tokens_allocated": 154, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1082, "reward": 1.0056961 }, { "step": 8, "tokens_allocated": 721, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 361, "reward": -0.10001575 }, { "step": 9, "tokens_allocated": 135, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 226, "reward": -0.1000038 }, { "step": 10, "tokens_allocated": 113, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 113, "reward": 1.00284625 } ] }, { "total_reward": 7.8907995, "accuracy": 0.8, "total_tokens_used": 3512, "budget_utilization": 0.878, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.0110924 }, { "step": 2, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3187, "reward": 1.0122825500000001 }, { "step": 3, "tokens_allocated": 298, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2889, "reward": 1.00861585 }, { "step": 4, "tokens_allocated": 309, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2580, "reward": -0.10000890000000001 }, { "step": 5, "tokens_allocated": 322, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2258, "reward": 1.0119919 }, { "step": 6, "tokens_allocated": 338, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1920, "reward": -0.10000895 }, { "step": 7, "tokens_allocated": 360, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1560, "reward": 1.01416645 }, { "step": 8, "tokens_allocated": 260, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1300, "reward": 1.00666645 }, { "step": 9, "tokens_allocated": 325, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 975, "reward": 1.0116915499999999 }, { "step": 10, "tokens_allocated": 487, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 488, "reward": 1.0143102 } ] }, { "total_reward": 7.8885411, "accuracy": 0.8, "total_tokens_used": 3896, "budget_utilization": 0.974, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 300, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": 1.01191795 }, { "step": 2, "tokens_allocated": 513, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3187, "reward": 1.01468415 }, { "step": 3, "tokens_allocated": 298, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2889, "reward": 1.01146775 }, { "step": 4, "tokens_allocated": 206, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2683, "reward": 1.00441765 }, { "step": 5, "tokens_allocated": 558, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2125, "reward": 1.0170835 }, { "step": 6, "tokens_allocated": 531, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1594, "reward": 1.01453315 }, { "step": 7, "tokens_allocated": 797, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 797, "reward": -0.10001710000000001 }, { "step": 8, "tokens_allocated": 132, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 665, "reward": 1.00457145 }, { "step": 9, "tokens_allocated": 249, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 416, "reward": -0.10000840000000001 }, { "step": 10, "tokens_allocated": 312, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 104, "reward": 1.0098909999999999 } ] } ] }, "bandit": { "agent": "bandit", "n_episodes": 50, "mean_reward": 6.525903147, "std_reward": 1.6575015872803898, "mean_accuracy": 0.743531746031746, "std_accuracy": 0.1592141678198517, "mean_budget_utilization": 0.9884999999999999, "episodes": [ { "total_reward": 5.631424399999999, "accuracy": 0.6, "total_tokens_used": 3550, "budget_utilization": 0.8875, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3950, "reward": -0.1000023 }, { "step": 2, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3850, "reward": 1.00127085 }, { "step": 3, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3750, "reward": 1.0022465 }, { "step": 4, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3650, "reward": -0.10000375 }, { "step": 5, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3450, "reward": 1.0032922 }, { "step": 6, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3250, "reward": -0.10000765 }, { "step": 7, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2850, "reward": -0.10001795000000001 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2050, "reward": 1.0059639999999999 }, { "step": 9, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1250, "reward": 1.0053636 }, { "step": 10, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 450, "reward": 1.0133189 } ] }, { "total_reward": 3.4211402, "accuracy": 0.4, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003635000000001 }, { "step": 2, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3000, "reward": 1.00321715 }, { "step": 3, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.1000072 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001715 }, { "step": 5, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2200, "reward": -0.10000925000000001 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1400, "reward": 1.0137692 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 600, "reward": -0.10002935 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": -0.1000047 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 300, "reward": 1.00306705 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.0011908 } ] }, { "total_reward": 6.7648755000000005, "accuracy": 0.7, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3900, "reward": 1.00082055 }, { "step": 2, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3800, "reward": -0.10000375 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3000, "reward": 1.01429455 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2200, "reward": 1.00919115 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1400, "reward": 1.0152702 }, { "step": 6, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1350, "reward": -0.10000205000000001 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 550, "reward": 1.01759675 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 350, "reward": 1.00366745 }, { "step": 9, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 300, "reward": -0.10000205000000001 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.0040427 } ] }, { "total_reward": 5.639730100000001, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3900, "reward": -0.10000450000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3100, "reward": 1.00633925 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2300, "reward": 1.01294365 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1500, "reward": 1.01114245 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 700, "reward": 1.00408775 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": 1.0022415 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 300, "reward": 1.0029919999999999 }, { "step": 8, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 100, "reward": -0.1000076 }, { "step": 9, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 50, "reward": -0.10000205000000001 }, { "step": 10, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000235 } ] }, { "total_reward": 4.851217999999999, "accuracy": 0.7142857142857143, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 7, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.01624585 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.013469 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.10003445000000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.0176718 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 400, "reward": -0.1000168 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.00306705 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00081555 } ] }, { "total_reward": 4.7170817, "accuracy": 0.625, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.1000352 }, { "step": 2, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3000, "reward": 1.0026918 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2200, "reward": -0.10002935 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1400, "reward": -0.10003525 }, { "step": 5, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.0040427 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1000, "reward": 1.00246665 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0037425 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00423785 } ] }, { "total_reward": 4.630049850000001, "accuracy": 0.5555555555555556, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3900, "reward": -0.10000445000000001 }, { "step": 2, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3700, "reward": -0.100008 }, { "step": 3, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.10000395000000001 }, { "step": 4, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3400, "reward": -0.10000890000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2600, "reward": 1.0052135 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1800, "reward": 1.00423785 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1000, "reward": 1.0118179 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 200, "reward": 1.00498835 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00381755 } ] }, { "total_reward": 5.6293051, "accuracy": 0.6, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3900, "reward": -0.10000360000000001 }, { "step": 2, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3850, "reward": -0.1000022 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3050, "reward": -0.10003145000000001 }, { "step": 4, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2850, "reward": 1.00291695 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2050, "reward": 1.00754005 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1850, "reward": 1.00186625 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1050, "reward": 1.0035624 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 250, "reward": 1.0092662000000001 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 50, "reward": 1.0041928 }, { "step": 10, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.1000023 } ] }, { "total_reward": 9.055337000000002, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3900, "reward": 1.00097065 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3100, "reward": 1.01129255 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2300, "reward": 1.01279355 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1500, "reward": 1.0150450500000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 700, "reward": 1.00363745 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": 1.00306705 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 300, "reward": 1.0035924 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 100, "reward": 1.0038926 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0010457000000001 } ] }, { "total_reward": 4.53231765, "accuracy": 0.5, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00725985 }, { "step": 2, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3400, "reward": -0.10000780000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2600, "reward": 1.0098666 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1800, "reward": 1.00693965 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1000, "reward": -0.10002910000000001 }, { "step": 6, "tokens_allocated": 200, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 800, "reward": -0.10000875000000001 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 600, "reward": -0.100009 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 500, "reward": -0.10000435 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 300, "reward": 1.00381755 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 100, "reward": 1.004493 } ] }, { "total_reward": 3.7393624000000005, "accuracy": 0.5714285714285714, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 7, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00365745 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.008966 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.0155704 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": -0.10003350000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0112175 }, { "step": 6, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": -0.1000072 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000825 } ] }, { "total_reward": 6.93710895, "accuracy": 0.875, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0080854000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0166211 }, { "step": 3, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2700, "reward": 1.0015710500000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1900, "reward": 1.00408775 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1100, "reward": 1.00318715 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1000, "reward": 1.00202135 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 200, "reward": -0.1000309 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00156605 } ] }, { "total_reward": 10.05136435, "accuracy": 1.0, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0023816 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00680955 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0169213 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0041828 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00200635 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.00979155 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0061341000000001 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 300, "reward": 1.0005954 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": 1.0016461 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": 1.0008956 } ] }, { "total_reward": 6.865171, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0050834 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00635925 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0133189 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0085357 }, { "step": 5, "tokens_allocated": 200, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 1800, "reward": -0.1000087 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1000, "reward": 1.0178969500000001 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 200, "reward": 1.0122682 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": -0.10000385 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00172115 } ] }, { "total_reward": 8.0473817, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.00545865 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00500835 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.0077852 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00680955 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00440795 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.00558875 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0053636 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00695965 } ] }, { "total_reward": 6.73081605, "accuracy": 0.7, "total_tokens_used": 3800, "budget_utilization": 0.95, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00365745 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.10003445000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": -0.10001660000000001 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0082355 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00633925 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1100, "reward": 1.00052035 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 700, "reward": 1.0067345 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 500, "reward": 1.00231655 }, { "step": 9, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10000355000000001 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.00306705 } ] }, { "total_reward": 5.753239799999999, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3600, "reward": -0.10001745000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00260675 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.008986 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0145197 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1900, "reward": 1.0019463 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1100, "reward": 1.0115177 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 300, "reward": 1.01369415 }, { "step": 8, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": -0.10000880000000001 }, { "step": 9, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10000455000000001 } ] }, { "total_reward": 8.051434400000002, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0028318999999999 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0160206999999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.00440795 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": 1.00425785 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.002982 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": 1.00350735 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0050834 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.01234325 } ] }, { "total_reward": 6.8307278, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": -0.1000162 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": -0.10001660000000001 }, { "step": 3, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3100, "reward": 1.0016461 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2700, "reward": 1.0062842 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1900, "reward": 1.00919115 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1100, "reward": 1.00528855 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 300, "reward": 1.0055137 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.00052035 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00231655 } ] }, { "total_reward": 6.8515897, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.007485 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3200, "reward": -0.10001555000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00590895 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2400, "reward": 1.0028318999999999 }, { "step": 5, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2200, "reward": 1.0013409 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1400, "reward": 1.01729655 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1000, "reward": 1.00876085 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": 1.00799035 }, { "step": 9, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10000925000000001 } ] }, { "total_reward": 6.731551549999999, "accuracy": 0.7, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0025317 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0100167 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0091161 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10001815 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.00217145 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 700, "reward": 1.0055137 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": -0.10000895 }, { "step": 8, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 300, "reward": -0.1000075 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.0004453 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0017912 } ] }, { "total_reward": 3.7282562499999994, "accuracy": 0.5714285714285714, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 7, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3600, "reward": 1.0056838 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10001435 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.013469 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1600, "reward": -0.1000376 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": 1.0026818 }, { "step": 6, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 400, "reward": -0.10003575 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00650935 } ] }, { "total_reward": 8.9628671, "accuracy": 0.9, "total_tokens_used": 3600, "budget_utilization": 0.9, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 3600, "reward": -0.1000149 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00740995 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0079353 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.01744665 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.01729655 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 800, "reward": 1.008986 }, { "step": 7, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 700, "reward": 1.00187125 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 600, "reward": 1.0007455 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": 1.0007455 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 400, "reward": 1.0004453 } ] }, { "total_reward": 5.748363149999999, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0079353 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0086858 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0165460499999999 }, { "step": 4, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2200, "reward": 1.00396765 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1400, "reward": 1.00994165 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1000, "reward": -0.1000182 }, { "step": 7, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 900, "reward": 1.0013459 }, { "step": 8, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": -0.10003655 }, { "step": 9, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10000445000000001 } ] }, { "total_reward": 5.7280397999999995, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.1000153 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.0033572499999999 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": 1.0056838 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.01189295 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1900, "reward": 1.00172115 }, { "step": 6, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1100, "reward": -0.10003345000000001 }, { "step": 7, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1000, "reward": 1.00202135 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.0034423 }, { "step": 9, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10003025 } ] }, { "total_reward": 5.75136635, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00650935 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00440795 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.01204305 }, { "step": 4, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2300, "reward": -0.10000395000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.0082155 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 700, "reward": 1.0166211 }, { "step": 7, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 600, "reward": -0.10000355000000001 }, { "step": 8, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": -0.10001550000000001 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0035924 } ] }, { "total_reward": 10.058188900000001, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00303705 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0112175 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0178969500000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.013469 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 700, "reward": 1.00082055 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 600, "reward": 1.0017962 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 400, "reward": 1.0026918 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 300, "reward": 1.0016461 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 100, "reward": 1.0038926 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00172115 } ] }, { "total_reward": 5.7356833, "accuracy": 0.6666666666666666, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.00663945 }, { "step": 2, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2800, "reward": -0.10001745000000001 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": -0.10001475 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00708975 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.01309375 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 600, "reward": 1.00321715 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.00366745 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.00201635 }, { "step": 9, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.10000840000000001 } ] }, { "total_reward": 5.637050099999999, "accuracy": 0.6, "total_tokens_used": 3600, "budget_utilization": 0.9, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.013469 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0056638 }, { "step": 3, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2300, "reward": -0.10000400000000001 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.0073899499999999 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1100, "reward": 1.00485825 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1000, "reward": 1.00127085 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 800, "reward": 1.00441795 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 700, "reward": -0.1000047 }, { "step": 9, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 500, "reward": -0.10000740000000001 }, { "step": 10, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": -0.10000360000000001 } ] }, { "total_reward": 6.84070835, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0043329 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0049133 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.0125684 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": -0.10001855000000001 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.10001535 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0050834 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 600, "reward": 1.0040427 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": 1.0056838 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00411775 } ] }, { "total_reward": 8.92735145, "accuracy": 0.9, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00408775 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2400, "reward": 1.00468815 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00558875 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.1000169 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 800, "reward": 1.0065844 }, { "step": 6, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 600, "reward": 1.0020914 }, { "step": 7, "tokens_allocated": 50, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 550, "reward": 1.0005979 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 350, "reward": 1.001491 }, { "step": 9, "tokens_allocated": 50, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 300, "reward": 1.00082305 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.00141595 } ] }, { "total_reward": 6.8424329, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": -0.10001660000000001 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.00320715 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.0056638 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.01039195 }, { "step": 5, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1400, "reward": -0.10000890000000001 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1000, "reward": 1.00876085 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 200, "reward": 1.0113676 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 100, "reward": 1.0022465 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00082055 } ] }, { "total_reward": 6.84093365, "accuracy": 0.7777777777777778, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 3950, "reward": -0.1000019 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3150, "reward": 1.0047632 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2750, "reward": 1.0017812 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1950, "reward": 1.0146698 }, { "step": 5, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1150, "reward": -0.10003435000000001 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1050, "reward": 1.001496 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 250, "reward": 1.0160206999999999 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 50, "reward": 1.00111575 }, { "step": 9, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00112325 } ] }, { "total_reward": 6.753542500000001, "accuracy": 0.7, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.013469 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00318715 }, { "step": 3, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2300, "reward": 1.0019463 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1500, "reward": 1.0092662000000001 }, { "step": 5, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1450, "reward": 1.0005979 }, { "step": 6, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1400, "reward": -0.1000023 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1000, "reward": 1.0077852 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.01729655 }, { "step": 9, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 150, "reward": -0.10000175 }, { "step": 10, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 100, "reward": -0.10000175 } ] }, { "total_reward": 7.84911345, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3200, "reward": 1.00453805 }, { "step": 2, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3000, "reward": -0.10000735000000001 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2200, "reward": 1.0112175 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1400, "reward": 1.0173716 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1300, "reward": 1.0013459 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 500, "reward": 1.0094163 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 300, "reward": -0.10000705 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.0019463 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.00142095 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00187125 } ] }, { "total_reward": 4.53276095, "accuracy": 0.5, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": -0.10003300000000001 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2400, "reward": 1.00889095 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1600, "reward": 1.0103169 }, { "step": 4, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1200, "reward": -0.10001535 }, { "step": 5, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1150, "reward": -0.10000200000000001 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 350, "reward": 1.01114245 }, { "step": 7, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 300, "reward": -0.10000185 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.00127085 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.0011958 }, { "step": 10, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000038 } ] }, { "total_reward": 7.84588485, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0073349 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.008966 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.0070147 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1600, "reward": 1.0043329 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.00172115 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1100, "reward": 1.0070347 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 700, "reward": 1.00575885 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 600, "reward": -0.1000037 }, { "step": 9, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.0037325 }, { "step": 10, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 0, "reward": -0.10000715 } ] }, { "total_reward": 8.05361085, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.01489495 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0073899499999999 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1200, "reward": 1.0041628 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.01729655 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 300, "reward": 1.00127085 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": 1.00111575 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.0004453 } ] }, { "total_reward": 8.9502416, "accuracy": 0.9, "total_tokens_used": 3900, "budget_utilization": 0.975, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0041828 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0122682 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.01639595 }, { "step": 4, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 1600, "reward": 1.00200635 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.0013459 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 700, "reward": 1.00889095 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 500, "reward": 1.0020914 }, { "step": 8, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 300, "reward": -0.100007 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 200, "reward": 1.0019463 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.00112075 } ] }, { "total_reward": 6.7311904500000015, "accuracy": 0.7, "total_tokens_used": 3850, "budget_utilization": 0.9625, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.00558875 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.10002935 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0169213 }, { "step": 4, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.00142095 }, { "step": 5, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1100, "reward": 1.0020814 }, { "step": 6, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 700, "reward": 1.00200635 }, { "step": 7, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 600, "reward": 1.0017962 }, { "step": 8, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 550, "reward": -0.10000205000000001 }, { "step": 9, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 350, "reward": -0.10000905 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 150, "reward": 1.00141595 } ] }, { "total_reward": 7.8409295000000006, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2800, "reward": 1.00633925 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.00558875 }, { "step": 4, "tokens_allocated": 100, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 1900, "reward": -0.10000440000000001 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1100, "reward": 1.00678955 }, { "step": 6, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1000, "reward": -0.10000350000000001 }, { "step": 7, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 950, "reward": 1.00112325 }, { "step": 8, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 150, "reward": 1.0122682 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 50, "reward": 1.00112075 }, { "step": 10, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00067295 } ] }, { "total_reward": 7.84731065, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 50, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3950, "reward": 1.00067295 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3150, "reward": 1.01399435 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2350, "reward": 1.00979155 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 1550, "reward": 1.01474485 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1450, "reward": 1.00052035 }, { "step": 6, "tokens_allocated": 50, "was_correct": false, "difficulty": "gsm8k", "remaining_budget": 1400, "reward": -0.1000019 }, { "step": 7, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 600, "reward": -0.1000366 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.0011908 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.00231655 }, { "step": 10, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": 1.00411775 } ] }, { "total_reward": 4.744839, "accuracy": 0.625, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0043329 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.0124183 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2000, "reward": 1.01129255 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 1200, "reward": -0.10003400000000001 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1100, "reward": 1.00112075 }, { "step": 6, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 900, "reward": -0.10000730000000001 }, { "step": 7, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 100, "reward": 1.0157205 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": -0.1000047 } ] }, { "total_reward": 8.047381699999999, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3600, "reward": 1.0056838 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3200, "reward": 1.0023816 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2800, "reward": 1.004483 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 2000, "reward": 1.01414445 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1200, "reward": 1.00423785 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 400, "reward": 1.01039195 }, { "step": 7, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.00336725 }, { "step": 8, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.0026918 } ] }, { "total_reward": 4.6349301, "accuracy": 0.5555555555555556, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 3950, "reward": -0.10000185 }, { "step": 2, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 3550, "reward": 1.0046331 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2750, "reward": -0.10003665 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1950, "reward": 1.0038626 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1150, "reward": 1.01399435 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 350, "reward": 1.01204305 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 150, "reward": -0.10000780000000001 }, { "step": 8, "tokens_allocated": 50, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": -0.10000200000000001 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0004453 } ] }, { "total_reward": 7.839810100000001, "accuracy": 0.8, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 10, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3600, "reward": 1.0070347 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2800, "reward": -0.1000366 }, { "step": 3, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.00305705 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.0122682 }, { "step": 5, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1500, "reward": 1.00082055 }, { "step": 6, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1100, "reward": -0.10001495 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 700, "reward": 1.0065844 }, { "step": 8, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 300, "reward": 1.0047831999999999 }, { "step": 9, "tokens_allocated": 200, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 100, "reward": 1.0038926 }, { "step": 10, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 0, "reward": 1.00142095 } ] }, { "total_reward": 3.5236087000000005, "accuracy": 0.4444444444444444, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 3200, "reward": 1.0050634 }, { "step": 2, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l3", "remaining_budget": 2400, "reward": -0.1000366 }, { "step": 3, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 2000, "reward": -0.1000143 }, { "step": 4, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1900, "reward": 1.00127085 }, { "step": 5, "tokens_allocated": 400, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 1500, "reward": -0.10001405000000001 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 700, "reward": 1.00964145 }, { "step": 7, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 300, "reward": 1.00771015 }, { "step": 8, "tokens_allocated": 100, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 200, "reward": -0.1000037 }, { "step": 9, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 0, "reward": -0.1000085 } ] }, { "total_reward": 5.0585641500000005, "accuracy": 1.0, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 5, "per_step": [ { "step": 1, "tokens_allocated": 800, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 3200, "reward": 1.01294365 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l4_l5", "remaining_budget": 2400, "reward": 1.0136191 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1600, "reward": 1.00318715 }, { "step": 4, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 800, "reward": 1.0161708 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 0, "reward": 1.0126434500000001 } ] }, { "total_reward": 7.9392846, "accuracy": 0.8888888888888888, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 9, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.0047831999999999 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0170713999999998 }, { "step": 3, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": 1.0052135 }, { "step": 4, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 1200, "reward": -0.1000317 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 400, "reward": 1.0056638 }, { "step": 6, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 300, "reward": 1.0016461 }, { "step": 7, "tokens_allocated": 100, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 200, "reward": 1.0017962 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 100, "reward": 1.00187125 }, { "step": 9, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00127085 } ] }, { "total_reward": 5.838685699999999, "accuracy": 0.75, "total_tokens_used": 4000, "budget_utilization": 1.0, "steps": 8, "per_step": [ { "step": 1, "tokens_allocated": 400, "was_correct": true, "difficulty": "math_l1_l2", "remaining_budget": 3600, "reward": 1.00545865 }, { "step": 2, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 2800, "reward": 1.0100167 }, { "step": 3, "tokens_allocated": 800, "was_correct": false, "difficulty": "math_l1_l2", "remaining_budget": 2000, "reward": -0.10003095000000001 }, { "step": 4, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 1900, "reward": 1.0004453 }, { "step": 5, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 1100, "reward": 1.00408775 }, { "step": 6, "tokens_allocated": 800, "was_correct": true, "difficulty": "math_l3", "remaining_budget": 300, "reward": 1.0165460499999999 }, { "step": 7, "tokens_allocated": 200, "was_correct": false, "difficulty": "math_l4_l5", "remaining_budget": 100, "reward": -0.10000925000000001 }, { "step": 8, "tokens_allocated": 100, "was_correct": true, "difficulty": "gsm8k", "remaining_budget": 0, "reward": 1.00217145 } ] } ] } }