{ "train": { "total": 1500, "by_source": { "verification": 67, "multi_step_continuation": 301, "success_first_step": 826, "hint_usage": 74, "failure_recovery": 232 }, "by_source_pct": { "verification": 0.045, "multi_step_continuation": 0.201, "success_first_step": 0.551, "hint_usage": 0.049, "failure_recovery": 0.155 }, "by_tier": { "intermediate": 666, "warmup": 456, "beginner": 378 }, "by_tier_pct": { "intermediate": 0.444, "warmup": 0.304, "beginner": 0.252 }, "unique_tasks": 72, "top_tasks": [ [ 86, 47 ], [ 13, 40 ], [ 67, 37 ], [ 14, 36 ], [ 68, 35 ], [ 85, 35 ], [ 69, 34 ], [ 80, 33 ], [ 72, 33 ], [ 11, 32 ] ] }, "val": { "total": 150, "by_source": { "success_first_step": 92, "multi_step_continuation": 28, "hint_usage": 6, "failure_recovery": 16, "verification": 8 }, "by_source_pct": { "success_first_step": 0.613, "multi_step_continuation": 0.187, "hint_usage": 0.04, "failure_recovery": 0.107, "verification": 0.053 }, "by_tier": { "warmup": 47, "intermediate": 67, "beginner": 36 }, "by_tier_pct": { "warmup": 0.313, "intermediate": 0.447, "beginner": 0.24 }, "unique_tasks": 63, "top_tasks": [ [ 66, 7 ], [ 2, 6 ], [ 67, 6 ], [ 11, 6 ], [ 74, 5 ], [ 70, 5 ], [ 32, 5 ], [ 71, 4 ], [ 42, 4 ], [ 37, 3 ] ] }, "reserve": { "total": 200, "by_source": { "failure_recovery": 30, "success_first_step": 100, "multi_step_continuation": 41, "verification": 17, "hint_usage": 12 }, "by_source_pct": { "failure_recovery": 0.15, "success_first_step": 0.5, "multi_step_continuation": 0.205, "verification": 0.085, "hint_usage": 0.06 }, "by_tier": { "warmup": 74, "intermediate": 89, "beginner": 37 }, "by_tier_pct": { "warmup": 0.37, "intermediate": 0.445, "beginner": 0.185 }, "unique_tasks": 66, "top_tasks": [ [ 72, 10 ], [ 81, 7 ], [ 34, 6 ], [ 86, 6 ], [ 74, 6 ], [ 67, 6 ], [ 71, 6 ], [ 27, 5 ], [ 0, 5 ], [ 42, 5 ] ] }, "targets": { "source_mix": { "success_first_step": 0.55, "multi_step_continuation": 0.2, "failure_recovery": 0.15, "verification": 0.05, "hint_usage": 0.05 }, "tier_weights": { "warmup": 0.5, "beginner": 0.3, "intermediate": 0.15, "advanced": 0.05, "expert": 0.0 } }, "seed": 42 }