| { |
| "pools": { |
| "math-only": [ |
| "gsm8k", |
| "svamp", |
| "asdiv", |
| "mawps", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy" |
| ], |
| "code-only": [ |
| "mbpp", |
| "humaneval", |
| "codealpaca_mini", |
| "mbpp_sanitized", |
| "conala_curated", |
| "livecodebench_easy" |
| ], |
| "science-only": [ |
| "sciq", |
| "arc_easy", |
| "openbookqa", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "pubmedqa_pqal" |
| ], |
| "math+code": [ |
| "gsm8k", |
| "svamp", |
| "asdiv", |
| "mawps", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "codealpaca_mini", |
| "mbpp_sanitized", |
| "conala_curated", |
| "livecodebench_easy" |
| ], |
| "all": [ |
| "gsm8k", |
| "svamp", |
| "asdiv", |
| "mawps", |
| "multiarith", |
| "aqua_rat", |
| "math_algebra_easy", |
| "math_counting_easy", |
| "mbpp", |
| "humaneval", |
| "codealpaca_mini", |
| "mbpp_sanitized", |
| "conala_curated", |
| "livecodebench_easy", |
| "sciq", |
| "arc_easy", |
| "openbookqa", |
| "medmcqa_easy", |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics", |
| "pubmedqa_pqal" |
| ] |
| }, |
| "heatmap": { |
| "math-only": { |
| "gsm_hard": -0.3999999999999996, |
| "math_algebra_medium": 2.000000000000004, |
| "humaneval_plus": 0.5000000000000006, |
| "mbpp_plus": 1.0, |
| "arc_challenge": 0.5, |
| "mmlu_college_chemistry": null |
| }, |
| "code-only": { |
| "gsm_hard": 1.1999999999999995, |
| "math_algebra_medium": 8.000000000000012, |
| "humaneval_plus": 1.0, |
| "mbpp_plus": -3.0000000000000084, |
| "arc_challenge": 1.1666666666666667, |
| "mmlu_college_chemistry": null |
| }, |
| "science-only": { |
| "gsm_hard": 0.4000000000000004, |
| "math_algebra_medium": 6.000000000000008, |
| "humaneval_plus": 0.5000000000000006, |
| "mbpp_plus": -1.0, |
| "arc_challenge": 0.5, |
| "mmlu_college_chemistry": null |
| }, |
| "math+code": { |
| "gsm_hard": -0.3999999999999996, |
| "math_algebra_medium": 0.0, |
| "humaneval_plus": 1.0, |
| "mbpp_plus": -2.0000000000000084, |
| "arc_challenge": 0.3333333333333333, |
| "mmlu_college_chemistry": null |
| }, |
| "all": { |
| "gsm_hard": -0.3999999999999996, |
| "math_algebra_medium": 3.000000000000004, |
| "humaneval_plus": 1.0, |
| "mbpp_plus": -1.0, |
| "arc_challenge": 0.0, |
| "mmlu_college_chemistry": null |
| } |
| }, |
| "selected_top3": { |
| "math-only": { |
| "gsm_hard": [ |
| "multiarith", |
| "math_algebra_easy", |
| "math_counting_easy" |
| ], |
| "math_algebra_medium": [ |
| "math_algebra_easy", |
| "multiarith", |
| "math_counting_easy" |
| ], |
| "humaneval_plus": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mbpp_plus": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "arc_challenge": [ |
| "aqua_rat", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mmlu_college_chemistry": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ] |
| }, |
| "code-only": { |
| "gsm_hard": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "math_algebra_medium": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "humaneval_plus": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "mbpp_plus": [ |
| "mbpp", |
| "mbpp_sanitized", |
| "humaneval" |
| ], |
| "arc_challenge": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "mmlu_college_chemistry": [ |
| "mbpp", |
| "mbpp_sanitized", |
| "humaneval" |
| ] |
| }, |
| "science-only": { |
| "gsm_hard": [ |
| "mmlu_elementary_math", |
| "mmlu_high_school_biology", |
| "mmlu_high_school_physics" |
| ], |
| "math_algebra_medium": [ |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math", |
| "mmlu_high_school_physics" |
| ], |
| "humaneval_plus": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "mbpp_plus": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ], |
| "arc_challenge": [ |
| "arc_easy", |
| "sciq", |
| "openbookqa" |
| ], |
| "mmlu_college_chemistry": [ |
| "mmlu_high_school_physics", |
| "mmlu_high_school_biology", |
| "mmlu_elementary_math" |
| ] |
| }, |
| "math+code": { |
| "gsm_hard": [ |
| "multiarith", |
| "math_algebra_easy", |
| "math_counting_easy" |
| ], |
| "math_algebra_medium": [ |
| "math_algebra_easy", |
| "multiarith", |
| "math_counting_easy" |
| ], |
| "humaneval_plus": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "mbpp", |
| "humaneval" |
| ], |
| "arc_challenge": [ |
| "aqua_rat", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mmlu_college_chemistry": [ |
| "mbpp_sanitized", |
| "mbpp", |
| "math_counting_easy" |
| ] |
| }, |
| "all": { |
| "gsm_hard": [ |
| "multiarith", |
| "math_algebra_easy", |
| "math_counting_easy" |
| ], |
| "math_algebra_medium": [ |
| "math_algebra_easy", |
| "multiarith", |
| "math_counting_easy" |
| ], |
| "humaneval_plus": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "mbpp_plus": [ |
| "mbpp_sanitized", |
| "mbpp", |
| "humaneval" |
| ], |
| "arc_challenge": [ |
| "arc_easy", |
| "sciq", |
| "openbookqa" |
| ], |
| "mmlu_college_chemistry": [ |
| "mmlu_high_school_physics", |
| "mbpp_sanitized", |
| "mbpp" |
| ] |
| } |
| }, |
| "best_pool_by_domain": { |
| "math": { |
| "best_anchor_pool": "code-only", |
| "score": 4.600000000000006, |
| "top3_selections": { |
| "gsm_hard": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "math_algebra_medium": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ] |
| } |
| }, |
| "code": { |
| "best_anchor_pool": "math-only", |
| "score": 0.7500000000000002, |
| "top3_selections": { |
| "humaneval_plus": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ], |
| "mbpp_plus": [ |
| "math_counting_easy", |
| "multiarith", |
| "math_algebra_easy" |
| ] |
| } |
| }, |
| "science": { |
| "best_anchor_pool": "code-only", |
| "score": 1.1666666666666667, |
| "top3_selections": { |
| "arc_challenge": [ |
| "humaneval", |
| "mbpp_sanitized", |
| "mbpp" |
| ], |
| "mmlu_college_chemistry": [ |
| "mbpp", |
| "mbpp_sanitized", |
| "humaneval" |
| ] |
| } |
| } |
| } |
| } |