cross-model-lora-prediction-3b / results_pool_transfer.json
CK0607's picture
Final workshop round: results_pool_transfer.json
b195065 verified
{
"config": {
"hub_repo": "CK0607/cross-model-lora-prediction-3b",
"model_Y": "meta-llama/Llama-3.2-3B-Instruct",
"method": "global_ridge",
"no_surrogate": true,
"heldouts": [
"gsm_hard",
"gsm8k_test_500",
"mbpp_test_held",
"mbpp_plus",
"openbookqa_test"
]
},
"pools": {
"math_only": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"code_only": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"science_only": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"math_plus_code": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"all": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
]
},
"baselines": {
"gsm_hard": {
"base_Y": 0.06333333333333334,
"oracle": 0.15
},
"gsm8k_test_500": {
"base_Y": 0.08,
"oracle": 0.29333333333333333
},
"mbpp_test_held": {
"base_Y": 0.23,
"oracle": 0.32
},
"mbpp_plus": {
"base_Y": 0.21666666666666667,
"oracle": 0.45
},
"openbookqa_test": {
"base_Y": 0.71,
"oracle": 0.9833333333333333
}
},
"summary": {
"by_pool": {
"math_only": {
"mean_gap_recovered": -0.0013892053068882359,
"n": 5
},
"code_only": {
"mean_gap_recovered": -1.2570036332231456,
"n": 5
},
"science_only": {
"mean_gap_recovered": 0.05831325009678664,
"n": 5
},
"math_plus_code": {
"mean_gap_recovered": 0.10617504615980225,
"n": 5
},
"all": {
"mean_gap_recovered": 0.12118442702284167,
"n": 5
}
},
"matched_vs_mismatched": {
"matched_domain": {
"mean_gap_recovered": -0.33363405451820094,
"n": 9
},
"mismatched_domain": {
"mean_gap_recovered": -0.22425602006394688,
"n": 11
},
"all_control": {
"mean_gap_recovered": 0.12118442702284167,
"n": 5
}
},
"winners_by_task": {
"gsm_hard": {
"pool": "science_only",
"gap_recovered": 0.0,
"match_type": "mismatched_domain"
},
"gsm8k_test_500": {
"pool": "science_only",
"gap_recovered": 0.109375,
"match_type": "mismatched_domain"
},
"mbpp_test_held": {
"pool": "math_plus_code",
"gap_recovered": 0.22222222222222213,
"match_type": "matched_domain"
},
"mbpp_plus": {
"pool": "math_plus_code",
"gap_recovered": 0.17142857142857135,
"match_type": "matched_domain"
},
"openbookqa_test": {
"pool": "all",
"gap_recovered": 0.1341463414634149,
"match_type": "all_control"
}
},
"decision": "mixed; more anchors and curation both matter"
},
"records": [
{
"cell_id": "B::all::gsm8k_test_500",
"stage": "pool_transfer",
"pool": "all",
"pool_size": 24,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"code",
"math",
"science"
],
"task": "gsm8k_test_500",
"target_domain": "math",
"method": "global_ridge",
"match_type": "all_control",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_all_N24",
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 7.885,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999
},
{
"cell_id": "B::all::gsm_hard",
"stage": "pool_transfer",
"pool": "all",
"pool_size": 24,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"code",
"math",
"science"
],
"task": "gsm_hard",
"target_domain": "math",
"method": "global_ridge",
"match_type": "all_control",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_all_N24",
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 22.563,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0
},
{
"cell_id": "B::all::mbpp_plus",
"stage": "pool_transfer",
"pool": "all",
"pool_size": 24,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"code",
"math",
"science"
],
"task": "mbpp_plus",
"target_domain": "code",
"method": "global_ridge",
"match_type": "all_control",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_all_N24",
"accuracy": 0.25666666666666665,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 150.049,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.17142857142857135
},
{
"cell_id": "B::all::mbpp_test_held",
"stage": "pool_transfer",
"pool": "all",
"pool_size": 24,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"code",
"math",
"science"
],
"task": "mbpp_test_held",
"target_domain": "code",
"method": "global_ridge",
"match_type": "all_control",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_all_N24",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 6,
"eval_seconds": 50.906,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213
},
{
"cell_id": "B::all::openbookqa_test",
"stage": "pool_transfer",
"pool": "all",
"pool_size": 24,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:svamp",
"r4:multiarith",
"r4:mmlu_high_school_biology",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mmlu_high_school_physics",
"r4:mbpp_sanitized",
"r4:mmlu_elementary_math",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r4:medmcqa_easy",
"r5:aqua_rat_numeric",
"r5:math_counting_easy",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"code",
"math",
"science"
],
"task": "openbookqa_test",
"target_domain": "science",
"method": "global_ridge",
"match_type": "all_control",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_all_N24",
"accuracy": 0.7466666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 22.64,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.1341463414634149
},
{
"cell_id": "B::code_only::gsm8k_test_500",
"stage": "pool_transfer",
"pool": "code_only",
"pool_size": 6,
"pool_anchor_names": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code"
],
"task": "gsm8k_test_500",
"target_domain": "math",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_code_only_N6",
"accuracy": 0.0,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 29.342,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": -0.37500000000000006
},
{
"cell_id": "B::code_only::gsm_hard",
"stage": "pool_transfer",
"pool": "code_only",
"pool_size": 6,
"pool_anchor_names": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code"
],
"task": "gsm_hard",
"target_domain": "math",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_code_only_N6",
"accuracy": 0.0,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 29.625,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.730769230769231
},
{
"cell_id": "B::code_only::mbpp_plus",
"stage": "pool_transfer",
"pool": "code_only",
"pool_size": 6,
"pool_anchor_names": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code"
],
"task": "mbpp_plus",
"target_domain": "code",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_code_only_N6",
"accuracy": 0.0,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 225.873,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.9285714285714286
},
{
"cell_id": "B::code_only::mbpp_test_held",
"stage": "pool_transfer",
"pool": "code_only",
"pool_size": 6,
"pool_anchor_names": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code"
],
"task": "mbpp_test_held",
"target_domain": "code",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_code_only_N6",
"accuracy": 0.0,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 7,
"eval_seconds": 74.141,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": -2.555555555555556
},
{
"cell_id": "B::code_only::openbookqa_test",
"stage": "pool_transfer",
"pool": "code_only",
"pool_size": 6,
"pool_anchor_names": [
"r4:mbpp",
"r4:humaneval",
"r4:mbpp_sanitized",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code"
],
"task": "openbookqa_test",
"target_domain": "science",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_code_only_N6",
"accuracy": 0.24666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 28.739,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": -1.6951219512195121
},
{
"cell_id": "B::math_only::gsm8k_test_500",
"stage": "pool_transfer",
"pool": "math_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"pool_domains": [
"math"
],
"task": "gsm8k_test_500",
"target_domain": "math",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_only_N8",
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 1,
"eval_seconds": 7.423,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999
},
{
"cell_id": "B::math_only::gsm_hard",
"stage": "pool_transfer",
"pool": "math_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"pool_domains": [
"math"
],
"task": "gsm_hard",
"target_domain": "math",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_only_N8",
"accuracy": 0.05333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 20.802,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.11538461538461542
},
{
"cell_id": "B::math_only::mbpp_plus",
"stage": "pool_transfer",
"pool": "math_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"pool_domains": [
"math"
],
"task": "mbpp_plus",
"target_domain": "code",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_only_N8",
"accuracy": 0.20666666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 156.482,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.04285714285714289
},
{
"cell_id": "B::math_only::mbpp_test_held",
"stage": "pool_transfer",
"pool": "math_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"pool_domains": [
"math"
],
"task": "mbpp_test_held",
"target_domain": "code",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_only_N8",
"accuracy": 0.23,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 2,
"eval_seconds": 50.775,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.0
},
{
"cell_id": "B::math_only::openbookqa_test",
"stage": "pool_transfer",
"pool": "math_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:gsm8k",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps"
],
"pool_domains": [
"math"
],
"task": "openbookqa_test",
"target_domain": "science",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_only_N8",
"accuracy": 0.73,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 4,
"eval_seconds": 4.616,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.07317073170731714
},
{
"cell_id": "B::math_plus_code::gsm8k_test_500",
"stage": "pool_transfer",
"pool": "math_plus_code",
"pool_size": 14,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code",
"math"
],
"task": "gsm8k_test_500",
"target_domain": "math",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_math_plus_code_N14",
"accuracy": 0.09666666666666666,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 0,
"eval_seconds": 7.802,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.07812499999999999
},
{
"cell_id": "B::math_plus_code::gsm_hard",
"stage": "pool_transfer",
"pool": "math_plus_code",
"pool_size": 14,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code",
"math"
],
"task": "gsm_hard",
"target_domain": "math",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_math_plus_code_N14",
"accuracy": 0.06,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 7,
"eval_seconds": 20.838,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": -0.038461538461538554
},
{
"cell_id": "B::math_plus_code::mbpp_plus",
"stage": "pool_transfer",
"pool": "math_plus_code",
"pool_size": 14,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code",
"math"
],
"task": "mbpp_plus",
"target_domain": "code",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_math_plus_code_N14",
"accuracy": 0.25666666666666665,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 147.368,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": 0.17142857142857135
},
{
"cell_id": "B::math_plus_code::mbpp_test_held",
"stage": "pool_transfer",
"pool": "math_plus_code",
"pool_size": 14,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code",
"math"
],
"task": "mbpp_test_held",
"target_domain": "code",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_math_plus_code_N14",
"accuracy": 0.25,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 1,
"eval_seconds": 50.993,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.22222222222222213
},
{
"cell_id": "B::math_plus_code::openbookqa_test",
"stage": "pool_transfer",
"pool": "math_plus_code",
"pool_size": 14,
"pool_anchor_names": [
"r4:gsm8k",
"r4:mbpp",
"r4:svamp",
"r4:multiarith",
"r4:math_counting_easy",
"r4:humaneval",
"r4:mbpp_sanitized",
"r4:math_algebra_easy",
"r4:aqua_rat",
"r5:aqua_rat_numeric",
"r5:mawps",
"r5:mbpp_sanitized",
"r5:humaneval",
"r5:conala_curated"
],
"pool_domains": [
"code",
"math"
],
"task": "openbookqa_test",
"target_domain": "science",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_math_plus_code_N14",
"accuracy": 0.7366666666666667,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 4.391,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.09756097560975632
},
{
"cell_id": "B::science_only::gsm8k_test_500",
"stage": "pool_transfer",
"pool": "science_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"science"
],
"task": "gsm8k_test_500",
"target_domain": "math",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm8k_test_500_global_ridge_science_only_N8",
"accuracy": 0.10333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 3,
"eval_seconds": 27.498,
"base_Y": 0.08,
"oracle": 0.29333333333333333,
"gap_recovered": 0.109375
},
{
"cell_id": "B::science_only::gsm_hard",
"stage": "pool_transfer",
"pool": "science_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"science"
],
"task": "gsm_hard",
"target_domain": "math",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/gsm_hard_global_ridge_science_only_N8",
"accuracy": 0.06333333333333334,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 2,
"eval_seconds": 29.001,
"base_Y": 0.06333333333333334,
"oracle": 0.15,
"gap_recovered": 0.0
},
{
"cell_id": "B::science_only::mbpp_plus",
"stage": "pool_transfer",
"pool": "science_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"science"
],
"task": "mbpp_plus",
"target_domain": "code",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_plus_global_ridge_science_only_N8",
"accuracy": 0.21333333333333335,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 5,
"eval_seconds": 164.28,
"base_Y": 0.21666666666666667,
"oracle": 0.45,
"gap_recovered": -0.01428571428571426
},
{
"cell_id": "B::science_only::mbpp_test_held",
"stage": "pool_transfer",
"pool": "science_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"science"
],
"task": "mbpp_test_held",
"target_domain": "code",
"method": "global_ridge",
"match_type": "mismatched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/mbpp_test_held_global_ridge_science_only_N8",
"accuracy": 0.24,
"real_generation_eval": true,
"eval_examples": 100,
"gpu": 4,
"eval_seconds": 50.504,
"base_Y": 0.23,
"oracle": 0.32,
"gap_recovered": 0.11111111111111091
},
{
"cell_id": "B::science_only::openbookqa_test",
"stage": "pool_transfer",
"pool": "science_only",
"pool_size": 8,
"pool_anchor_names": [
"r4:sciq",
"r4:arc_easy",
"r4:openbookqa",
"r4:mmlu_high_school_biology",
"r4:mmlu_high_school_physics",
"r4:mmlu_elementary_math",
"r5:medmcqa_easy",
"r5:pubmedqa_pqal"
],
"pool_domains": [
"science"
],
"task": "openbookqa_test",
"target_domain": "science",
"method": "global_ridge",
"match_type": "matched_domain",
"adapter_dir": "/workspace/round3_out/round_final/Y_pred/openbookqa_test_global_ridge_science_only_N8",
"accuracy": 0.7333333333333333,
"real_generation_eval": true,
"eval_examples": 300,
"gpu": 6,
"eval_seconds": 28.651,
"base_Y": 0.71,
"oracle": 0.9833333333333333,
"gap_recovered": 0.08536585365853654
}
]
}