| { | |
| "amc-cot": { | |
| "cot": { | |
| "accuracy": 0.05, | |
| "n_samples": 40 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "asdiv-cot": { | |
| "cot": { | |
| "accuracy": 0.8356659142212189, | |
| "n_samples": 2215 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "gsm8k-cot": { | |
| "cot": { | |
| "accuracy": 0.7247915087187263, | |
| "n_samples": 1319 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "math-500-cot": { | |
| "cot": { | |
| "accuracy": 0.378, | |
| "n_samples": 500 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "math-cot": { | |
| "cot": { | |
| "accuracy": 0.4198, | |
| "n_samples": 5000 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "math_sat-cot": { | |
| "cot": { | |
| "accuracy": 0.8125, | |
| "n_samples": 32 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "mathqa-cot": { | |
| "cot": { | |
| "accuracy": 0.628, | |
| "n_samples": 1000 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "mawps-cot": { | |
| "cot": { | |
| "accuracy": 0.9443099273607748, | |
| "n_samples": 2065 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "mmlu-stem-cot": { | |
| "cot": { | |
| "accuracy": 0.6375082836315441, | |
| "n_samples": 3018 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "ocw-courses-cot": { | |
| "cot": { | |
| "accuracy": 0.16176470588235295, | |
| "n_samples": 272 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "olympiad-bench-cot": { | |
| "cot": { | |
| "accuracy": 0.11851851851851852, | |
| "n_samples": 675 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "svamp-cot": { | |
| "cot": { | |
| "accuracy": 0.824, | |
| "n_samples": 1000 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| }, | |
| "tabmwp-cot": { | |
| "cot": { | |
| "accuracy": 0.701, | |
| "n_samples": 1000 | |
| }, | |
| "tool": { | |
| "n_samples": 0 | |
| } | |
| } | |
| } |