diff --git "a/CPPTAI/benchmarks.json" "b/CPPTAI/benchmarks.json" new file mode 100644--- /dev/null +++ "b/CPPTAI/benchmarks.json" @@ -0,0 +1,12669 @@ +{ + "records": [ + { + "problem_id": "energy_crisis_1", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_1", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_2", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.001, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_3", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_4", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_4", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_5", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.002, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_6", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_7", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_7", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.003, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_8", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_9", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_10", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_10", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_11", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.004, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_12", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_13", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.005, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_13", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_14", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_15", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_16", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.006, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_16", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_17", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_18", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_19", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.007, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_19", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_20", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_21", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_22", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.008, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_22", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_23", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_24", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_25", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.009, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_25", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_26", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_27", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_28", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_28", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_29", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_30", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_31", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_31", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.012, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_32", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_33", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_34", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_34", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.013, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_35", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_36", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_37", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_37", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.014, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_38", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_39", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.909 + }, + { + "problem_id": "energy_crisis_40", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.015, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_40", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_41", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.067, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_42", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.97 + }, + { + "problem_id": "energy_crisis_43", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_43", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.016, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_44", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_45", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_46", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.017, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_47", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.018, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_48", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 0.939 + }, + { + "problem_id": "energy_crisis_49", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_49", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ToT", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "GoT", + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "ReAct", + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16, + "robust_diversity": null, + "clusters": null, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.019, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_IV", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.021, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.02, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + }, + { + "problem_id": "energy_crisis_50", + "method": "CPPTAI_no_I", + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.022, + "tokens": 51, + "robust_diversity": 0.812, + "clusters": 3, + "problem_complexity": 1.0 + } + ], + "summary": { + "CoT": { + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 0.992, + "time_sec": 0.0, + "tokens": 17.0, + "robust_diversity": 0.0, + "clusters": 0.0 + }, + "ToT": { + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 15.0, + "robust_diversity": 0.0, + "clusters": 0.0 + }, + "GoT": { + "accuracy": 0.0, + "error_rate": 1.0, + "diversity": 1.0, + "time_sec": 0.0, + "tokens": 14.0, + "robust_diversity": 0.0, + "clusters": 0.0 + }, + "ReAct": { + "accuracy": 0.1, + "error_rate": 0.9, + "diversity": 0.985, + "time_sec": 0.0, + "tokens": 16.0, + "robust_diversity": 0.0, + "clusters": 0.0 + }, + "CPPTAI": { + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51.0, + "robust_diversity": 0.812, + "clusters": 3.0 + }, + "CPPTAI_no_IV": { + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.01, + "tokens": 51.0, + "robust_diversity": 0.812, + "clusters": 3.0 + }, + "CPPTAI_no_I": { + "accuracy": 1.0, + "error_rate": 0.0, + "diversity": 0.992, + "time_sec": 0.011, + "tokens": 51.0, + "robust_diversity": 0.812, + "clusters": 3.0 + } + } +} \ No newline at end of file