nips-2026-anon-artifacts / computed_values /data /deployable_analysis /split_half_proxy_analysis.json
| { | |
| "n_splits": 20, | |
| "n_smac": 569, | |
| "n_llm": 195, | |
| "results": [ | |
| { | |
| "split": 0, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4570224663107674, | |
| "top5_eval_ap": 0.554324263047066, | |
| "oracle_eval_ap": 0.6992517338762682 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6701524418076312, | |
| "top5_eval_ap": 0.686699506702746, | |
| "oracle_eval_ap": 0.6992517338762682 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6721980222170849, | |
| "top5_eval_ap": 0.6792901260791705, | |
| "oracle_eval_ap": 0.6992517338762682 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6721980222170849, | |
| "top5_eval_ap": 0.6792901260791705, | |
| "oracle_eval_ap": 0.6992517338762682 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6792901260791705, | |
| "top5_eval_ap": 0.6792901260791705, | |
| "oracle_eval_ap": 0.6992517338762682 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5233387623389887, | |
| "top5_eval_ap": 0.5367247333545382, | |
| "oracle_eval_ap": 0.5821468830123655 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5710383254419367, | |
| "top5_eval_ap": 0.5821468830123655, | |
| "oracle_eval_ap": 0.5821468830123655 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5246631981230342, | |
| "top5_eval_ap": 0.5710383254419367, | |
| "oracle_eval_ap": 0.5821468830123655 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5710383254419367, | |
| "top5_eval_ap": 0.5710383254419367, | |
| "oracle_eval_ap": 0.5821468830123655 | |
| }, | |
| { | |
| "split": 0, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5821468830123655, | |
| "top5_eval_ap": 0.5821468830123655, | |
| "oracle_eval_ap": 0.5821468830123655 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.46441448049443035, | |
| "top5_eval_ap": 0.5270217679633087, | |
| "oracle_eval_ap": 0.6763881841332537 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6373692361609227, | |
| "top5_eval_ap": 0.6574063977544806, | |
| "oracle_eval_ap": 0.6763881841332537 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6412953703595898, | |
| "top5_eval_ap": 0.659747880337521, | |
| "oracle_eval_ap": 0.6763881841332537 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.659747880337521, | |
| "top5_eval_ap": 0.659747880337521, | |
| "oracle_eval_ap": 0.6763881841332537 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6549798755291265, | |
| "top5_eval_ap": 0.659747880337521, | |
| "oracle_eval_ap": 0.6763881841332537 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5122126575807857, | |
| "top5_eval_ap": 0.5309389698647801, | |
| "oracle_eval_ap": 0.5676444796768093 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5639179338971837, | |
| "top5_eval_ap": 0.5676444796768093, | |
| "oracle_eval_ap": 0.5676444796768093 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5192059594256696, | |
| "top5_eval_ap": 0.5639179338971837, | |
| "oracle_eval_ap": 0.5676444796768093 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5192059594256696, | |
| "top5_eval_ap": 0.5639179338971837, | |
| "oracle_eval_ap": 0.5676444796768093 | |
| }, | |
| { | |
| "split": 1, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5676444796768093, | |
| "top5_eval_ap": 0.5676444796768093, | |
| "oracle_eval_ap": 0.5676444796768093 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.48153527880634006, | |
| "top5_eval_ap": 0.583935038683602, | |
| "oracle_eval_ap": 0.6990313570985696 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6623016702621187, | |
| "top5_eval_ap": 0.6975191425541587, | |
| "oracle_eval_ap": 0.6990313570985696 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6623016702621187, | |
| "top5_eval_ap": 0.6668939043306413, | |
| "oracle_eval_ap": 0.6990313570985696 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6623016702621187, | |
| "top5_eval_ap": 0.6765853983546597, | |
| "oracle_eval_ap": 0.6990313570985696 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6623016702621187, | |
| "top5_eval_ap": 0.6872781185782315, | |
| "oracle_eval_ap": 0.6990313570985696 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5716335781219793, | |
| "top5_eval_ap": 0.5924666754414817, | |
| "oracle_eval_ap": 0.6303368424328526 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6197933838977654, | |
| "top5_eval_ap": 0.6303368424328526, | |
| "oracle_eval_ap": 0.6303368424328526 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5595467532159494, | |
| "top5_eval_ap": 0.6303368424328526, | |
| "oracle_eval_ap": 0.6303368424328526 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5595467532159494, | |
| "top5_eval_ap": 0.6303368424328526, | |
| "oracle_eval_ap": 0.6303368424328526 | |
| }, | |
| { | |
| "split": 2, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5939967909066806, | |
| "top5_eval_ap": 0.6197933838977654, | |
| "oracle_eval_ap": 0.6303368424328526 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.47686687736181266, | |
| "top5_eval_ap": 0.5533094784532413, | |
| "oracle_eval_ap": 0.699491642922994 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6670702453072503, | |
| "top5_eval_ap": 0.6845344993452391, | |
| "oracle_eval_ap": 0.699491642922994 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.676851998605726, | |
| "top5_eval_ap": 0.6973621765945375, | |
| "oracle_eval_ap": 0.699491642922994 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.676851998605726, | |
| "top5_eval_ap": 0.6973621765945375, | |
| "oracle_eval_ap": 0.699491642922994 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6973621765945375, | |
| "top5_eval_ap": 0.6973621765945375, | |
| "oracle_eval_ap": 0.699491642922994 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5450044403166794, | |
| "top5_eval_ap": 0.5532931874364065, | |
| "oracle_eval_ap": 0.6134983686209221 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5865040038295037, | |
| "top5_eval_ap": 0.5865040038295037, | |
| "oracle_eval_ap": 0.6134983686209221 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5865040038295037, | |
| "top5_eval_ap": 0.5865040038295037, | |
| "oracle_eval_ap": 0.6134983686209221 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5865040038295037, | |
| "top5_eval_ap": 0.5865040038295037, | |
| "oracle_eval_ap": 0.6134983686209221 | |
| }, | |
| { | |
| "split": 3, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5865040038295037, | |
| "top5_eval_ap": 0.6134983686209221, | |
| "oracle_eval_ap": 0.6134983686209221 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.48965935913922864, | |
| "top5_eval_ap": 0.5659805254448687, | |
| "oracle_eval_ap": 0.6665470019333972 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6458410401680073, | |
| "top5_eval_ap": 0.6538475606777989, | |
| "oracle_eval_ap": 0.6665470019333972 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6458410401680073, | |
| "top5_eval_ap": 0.6514671952126687, | |
| "oracle_eval_ap": 0.6665470019333972 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6458410401680073, | |
| "top5_eval_ap": 0.655507218656965, | |
| "oracle_eval_ap": 0.6665470019333972 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.655507218656965, | |
| "top5_eval_ap": 0.655507218656965, | |
| "oracle_eval_ap": 0.6665470019333972 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5580722760115239, | |
| "top5_eval_ap": 0.5769510779737405, | |
| "oracle_eval_ap": 0.5996924034497422 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5929810545795948, | |
| "top5_eval_ap": 0.5996924034497422, | |
| "oracle_eval_ap": 0.5996924034497422 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5381143061930622, | |
| "top5_eval_ap": 0.5996924034497422, | |
| "oracle_eval_ap": 0.5996924034497422 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5996924034497422, | |
| "top5_eval_ap": 0.5996924034497422, | |
| "oracle_eval_ap": 0.5996924034497422 | |
| }, | |
| { | |
| "split": 4, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5929810545795948, | |
| "top5_eval_ap": 0.5996924034497422, | |
| "oracle_eval_ap": 0.5996924034497422 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4766814331572886, | |
| "top5_eval_ap": 0.5455312939524202, | |
| "oracle_eval_ap": 0.7144895023760132 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6996238632035302, | |
| "top5_eval_ap": 0.6996238632035302, | |
| "oracle_eval_ap": 0.7144895023760132 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.674811244678726, | |
| "top5_eval_ap": 0.6889647183194441, | |
| "oracle_eval_ap": 0.7144895023760132 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.674811244678726, | |
| "top5_eval_ap": 0.6794965240318673, | |
| "oracle_eval_ap": 0.7144895023760132 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6893304832924366, | |
| "top5_eval_ap": 0.707395115377764, | |
| "oracle_eval_ap": 0.7144895023760132 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5699660365691224, | |
| "top5_eval_ap": 0.580786807367587, | |
| "oracle_eval_ap": 0.6170531937027124 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6110326280684486, | |
| "top5_eval_ap": 0.6170531937027124, | |
| "oracle_eval_ap": 0.6170531937027124 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5636349935114392, | |
| "top5_eval_ap": 0.6110326280684486, | |
| "oracle_eval_ap": 0.6170531937027124 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5636349935114392, | |
| "top5_eval_ap": 0.6110326280684486, | |
| "oracle_eval_ap": 0.6170531937027124 | |
| }, | |
| { | |
| "split": 5, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6170531937027124, | |
| "top5_eval_ap": 0.6170531937027124, | |
| "oracle_eval_ap": 0.6170531937027124 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.46762743937652196, | |
| "top5_eval_ap": 0.5497164805265882, | |
| "oracle_eval_ap": 0.6613877313531714 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6363687839062333, | |
| "top5_eval_ap": 0.6495101818993542, | |
| "oracle_eval_ap": 0.6613877313531714 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6302792233786925, | |
| "top5_eval_ap": 0.6449900714167096, | |
| "oracle_eval_ap": 0.6613877313531714 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6218583151992617, | |
| "top5_eval_ap": 0.6613877313531714, | |
| "oracle_eval_ap": 0.6613877313531714 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6547087651871317, | |
| "top5_eval_ap": 0.6547087651871317, | |
| "oracle_eval_ap": 0.6613877313531714 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5228096505652988, | |
| "top5_eval_ap": 0.5472450405857392, | |
| "oracle_eval_ap": 0.5820159930148319 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5749494698388545, | |
| "top5_eval_ap": 0.5820159930148319, | |
| "oracle_eval_ap": 0.5820159930148319 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5197841478683497, | |
| "top5_eval_ap": 0.5820159930148319, | |
| "oracle_eval_ap": 0.5820159930148319 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5820159930148319, | |
| "top5_eval_ap": 0.5820159930148319, | |
| "oracle_eval_ap": 0.5820159930148319 | |
| }, | |
| { | |
| "split": 6, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5749494698388545, | |
| "top5_eval_ap": 0.5749494698388545, | |
| "oracle_eval_ap": 0.5820159930148319 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4655134540960533, | |
| "top5_eval_ap": 0.522726926569494, | |
| "oracle_eval_ap": 0.6861187649104927 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6644946516875663, | |
| "top5_eval_ap": 0.6722847504543532, | |
| "oracle_eval_ap": 0.6861187649104927 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6374107287330919, | |
| "top5_eval_ap": 0.6575185205692705, | |
| "oracle_eval_ap": 0.6861187649104927 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6374107287330919, | |
| "top5_eval_ap": 0.6575185205692705, | |
| "oracle_eval_ap": 0.6861187649104927 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6744851082264265, | |
| "top5_eval_ap": 0.6744851082264265, | |
| "oracle_eval_ap": 0.6861187649104927 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5492889416845734, | |
| "top5_eval_ap": 0.5597422907504639, | |
| "oracle_eval_ap": 0.5881312754667634 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5825149439259846, | |
| "top5_eval_ap": 0.5881312754667634, | |
| "oracle_eval_ap": 0.5881312754667634 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5406230156355737, | |
| "top5_eval_ap": 0.5825149439259846, | |
| "oracle_eval_ap": 0.5881312754667634 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5406230156355737, | |
| "top5_eval_ap": 0.5825149439259846, | |
| "oracle_eval_ap": 0.5881312754667634 | |
| }, | |
| { | |
| "split": 7, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5881312754667634, | |
| "top5_eval_ap": 0.5881312754667634, | |
| "oracle_eval_ap": 0.5881312754667634 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4671888729291876, | |
| "top5_eval_ap": 0.5527941540405327, | |
| "oracle_eval_ap": 0.6909129736754938 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6681493403214834, | |
| "top5_eval_ap": 0.6858005151499248, | |
| "oracle_eval_ap": 0.6909129736754938 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6613137040400633, | |
| "top5_eval_ap": 0.675795982690935, | |
| "oracle_eval_ap": 0.6909129736754938 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6613137040400633, | |
| "top5_eval_ap": 0.675795982690935, | |
| "oracle_eval_ap": 0.6909129736754938 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6681493403214834, | |
| "top5_eval_ap": 0.675795982690935, | |
| "oracle_eval_ap": 0.6909129736754938 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5693244864895343, | |
| "top5_eval_ap": 0.5780891113869431, | |
| "oracle_eval_ap": 0.6174737647198649 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6096789446505377, | |
| "top5_eval_ap": 0.6174737647198649, | |
| "oracle_eval_ap": 0.6174737647198649 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5475975540445444, | |
| "top5_eval_ap": 0.6096789446505377, | |
| "oracle_eval_ap": 0.6174737647198649 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5475975540445444, | |
| "top5_eval_ap": 0.6096789446505377, | |
| "oracle_eval_ap": 0.6174737647198649 | |
| }, | |
| { | |
| "split": 8, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6174737647198649, | |
| "top5_eval_ap": 0.6174737647198649, | |
| "oracle_eval_ap": 0.6174737647198649 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4647268261279126, | |
| "top5_eval_ap": 0.5352504052738676, | |
| "oracle_eval_ap": 0.6595143352252696 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6352649178425169, | |
| "top5_eval_ap": 0.6469598871324141, | |
| "oracle_eval_ap": 0.6595143352252696 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6383312189984667, | |
| "top5_eval_ap": 0.6412995301486579, | |
| "oracle_eval_ap": 0.6595143352252696 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6383312189984667, | |
| "top5_eval_ap": 0.6412995301486579, | |
| "oracle_eval_ap": 0.6595143352252696 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6412995301486579, | |
| "top5_eval_ap": 0.6412995301486579, | |
| "oracle_eval_ap": 0.6595143352252696 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5591218602390743, | |
| "top5_eval_ap": 0.5723850410583204, | |
| "oracle_eval_ap": 0.6060849802997287 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6026304288042699, | |
| "top5_eval_ap": 0.6060849802997287, | |
| "oracle_eval_ap": 0.6060849802997287 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5630703886645162, | |
| "top5_eval_ap": 0.6060849802997287, | |
| "oracle_eval_ap": 0.6060849802997287 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5630703886645162, | |
| "top5_eval_ap": 0.6060849802997287, | |
| "oracle_eval_ap": 0.6060849802997287 | |
| }, | |
| { | |
| "split": 9, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6026304288042699, | |
| "top5_eval_ap": 0.6060849802997287, | |
| "oracle_eval_ap": 0.6060849802997287 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.43252556578138707, | |
| "top5_eval_ap": 0.545737859838442, | |
| "oracle_eval_ap": 0.6510920526336375 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6328251297478321, | |
| "top5_eval_ap": 0.6328251297478321, | |
| "oracle_eval_ap": 0.6510920526336375 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6269563345523465, | |
| "top5_eval_ap": 0.6298090860067376, | |
| "oracle_eval_ap": 0.6510920526336375 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6269563345523465, | |
| "top5_eval_ap": 0.6298090860067376, | |
| "oracle_eval_ap": 0.6510920526336375 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6298090860067376, | |
| "top5_eval_ap": 0.6298090860067376, | |
| "oracle_eval_ap": 0.6510920526336375 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5107000309646199, | |
| "top5_eval_ap": 0.5299460337513402, | |
| "oracle_eval_ap": 0.5705619158989852 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5396149215794253, | |
| "top5_eval_ap": 0.5705619158989852, | |
| "oracle_eval_ap": 0.5705619158989852 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5177459209938902, | |
| "top5_eval_ap": 0.5680304207869833, | |
| "oracle_eval_ap": 0.5705619158989852 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5680304207869833, | |
| "top5_eval_ap": 0.5680304207869833, | |
| "oracle_eval_ap": 0.5705619158989852 | |
| }, | |
| { | |
| "split": 10, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5489515867423438, | |
| "top5_eval_ap": 0.5705619158989852, | |
| "oracle_eval_ap": 0.5705619158989852 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4710269964771887, | |
| "top5_eval_ap": 0.5467773233971552, | |
| "oracle_eval_ap": 0.6719548100783407 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6594384456486362, | |
| "top5_eval_ap": 0.6667222629479316, | |
| "oracle_eval_ap": 0.6719548100783407 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6425104798363381, | |
| "top5_eval_ap": 0.6594384456486362, | |
| "oracle_eval_ap": 0.6719548100783407 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6425104798363381, | |
| "top5_eval_ap": 0.6594384456486362, | |
| "oracle_eval_ap": 0.6719548100783407 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6616147383178763, | |
| "top5_eval_ap": 0.6667222629479316, | |
| "oracle_eval_ap": 0.6719548100783407 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5364096772470438, | |
| "top5_eval_ap": 0.5543741202179301, | |
| "oracle_eval_ap": 0.5968778721083486 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5968778721083486, | |
| "top5_eval_ap": 0.5968778721083486, | |
| "oracle_eval_ap": 0.5968778721083486 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5330398750639705, | |
| "top5_eval_ap": 0.5960184148586979, | |
| "oracle_eval_ap": 0.5968778721083486 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5960184148586979, | |
| "top5_eval_ap": 0.5960184148586979, | |
| "oracle_eval_ap": 0.5968778721083486 | |
| }, | |
| { | |
| "split": 11, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5687816448742656, | |
| "top5_eval_ap": 0.5968778721083486, | |
| "oracle_eval_ap": 0.5968778721083486 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.48477110005647617, | |
| "top5_eval_ap": 0.5662010238381728, | |
| "oracle_eval_ap": 0.6709011946518153 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6683911800553934, | |
| "top5_eval_ap": 0.6683911800553934, | |
| "oracle_eval_ap": 0.6709011946518153 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6537367742804812, | |
| "top5_eval_ap": 0.655984953380698, | |
| "oracle_eval_ap": 0.6709011946518153 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6537367742804812, | |
| "top5_eval_ap": 0.655984953380698, | |
| "oracle_eval_ap": 0.6709011946518153 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6558046451703515, | |
| "top5_eval_ap": 0.6571243796655735, | |
| "oracle_eval_ap": 0.6709011946518153 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5571257414916191, | |
| "top5_eval_ap": 0.5724004287234477, | |
| "oracle_eval_ap": 0.6101282070029501 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5923391898216597, | |
| "top5_eval_ap": 0.6101282070029501, | |
| "oracle_eval_ap": 0.6101282070029501 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5534026034732688, | |
| "top5_eval_ap": 0.6101282070029501, | |
| "oracle_eval_ap": 0.6101282070029501 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5534026034732688, | |
| "top5_eval_ap": 0.6101282070029501, | |
| "oracle_eval_ap": 0.6101282070029501 | |
| }, | |
| { | |
| "split": 12, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5923391898216597, | |
| "top5_eval_ap": 0.5923391898216597, | |
| "oracle_eval_ap": 0.6101282070029501 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.46424878925358426, | |
| "top5_eval_ap": 0.5526461047395709, | |
| "oracle_eval_ap": 0.6596211026179731 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6423379220862562, | |
| "top5_eval_ap": 0.6531102460331455, | |
| "oracle_eval_ap": 0.6596211026179731 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6400759109195303, | |
| "top5_eval_ap": 0.648796848874525, | |
| "oracle_eval_ap": 0.6596211026179731 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6382008066612993, | |
| "top5_eval_ap": 0.648796848874525, | |
| "oracle_eval_ap": 0.6596211026179731 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6433876237385332, | |
| "top5_eval_ap": 0.6596211026179731, | |
| "oracle_eval_ap": 0.6596211026179731 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5542102452808897, | |
| "top5_eval_ap": 0.5713349471167026, | |
| "oracle_eval_ap": 0.6101575436136442 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.597567237938185, | |
| "top5_eval_ap": 0.6101575436136442, | |
| "oracle_eval_ap": 0.6101575436136442 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5560502121944897, | |
| "top5_eval_ap": 0.6101575436136442, | |
| "oracle_eval_ap": 0.6101575436136442 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5560502121944897, | |
| "top5_eval_ap": 0.6101575436136442, | |
| "oracle_eval_ap": 0.6101575436136442 | |
| }, | |
| { | |
| "split": 13, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.597567237938185, | |
| "top5_eval_ap": 0.597567237938185, | |
| "oracle_eval_ap": 0.6101575436136442 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.48573170965343504, | |
| "top5_eval_ap": 0.5571016213991584, | |
| "oracle_eval_ap": 0.706811019289939 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.677433995089546, | |
| "top5_eval_ap": 0.6986598664783432, | |
| "oracle_eval_ap": 0.706811019289939 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6743009496363566, | |
| "top5_eval_ap": 0.6970399756161322, | |
| "oracle_eval_ap": 0.706811019289939 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6743009496363566, | |
| "top5_eval_ap": 0.6970399756161322, | |
| "oracle_eval_ap": 0.706811019289939 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.706811019289939, | |
| "top5_eval_ap": 0.706811019289939, | |
| "oracle_eval_ap": 0.706811019289939 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5576376597054382, | |
| "top5_eval_ap": 0.5669921922681518, | |
| "oracle_eval_ap": 0.6436329100802731 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6115519107659961, | |
| "top5_eval_ap": 0.6115519107659961, | |
| "oracle_eval_ap": 0.6436329100802731 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5543941938024801, | |
| "top5_eval_ap": 0.6115519107659961, | |
| "oracle_eval_ap": 0.6436329100802731 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5543941938024801, | |
| "top5_eval_ap": 0.6115519107659961, | |
| "oracle_eval_ap": 0.6436329100802731 | |
| }, | |
| { | |
| "split": 14, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5930515876054733, | |
| "top5_eval_ap": 0.6436329100802731, | |
| "oracle_eval_ap": 0.6436329100802731 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.478137602574508, | |
| "top5_eval_ap": 0.6035334495920432, | |
| "oracle_eval_ap": 0.6907808119723886 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6585420344863675, | |
| "top5_eval_ap": 0.6626179830682002, | |
| "oracle_eval_ap": 0.6907808119723886 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6488358732455428, | |
| "top5_eval_ap": 0.6585420344863675, | |
| "oracle_eval_ap": 0.6907808119723886 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6488358732455428, | |
| "top5_eval_ap": 0.6585420344863675, | |
| "oracle_eval_ap": 0.6907808119723886 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6526664898991454, | |
| "top5_eval_ap": 0.6626179830682002, | |
| "oracle_eval_ap": 0.6907808119723886 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5263984758406471, | |
| "top5_eval_ap": 0.5405265639280998, | |
| "oracle_eval_ap": 0.5941351543506521 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5757554508817501, | |
| "top5_eval_ap": 0.5941351543506521, | |
| "oracle_eval_ap": 0.5941351543506521 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5757554508817501, | |
| "top5_eval_ap": 0.5757554508817501, | |
| "oracle_eval_ap": 0.5941351543506521 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5757554508817501, | |
| "top5_eval_ap": 0.5757554508817501, | |
| "oracle_eval_ap": 0.5941351543506521 | |
| }, | |
| { | |
| "split": 15, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5609746889071622, | |
| "top5_eval_ap": 0.5941351543506521, | |
| "oracle_eval_ap": 0.5941351543506521 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.48347433637766246, | |
| "top5_eval_ap": 0.5542497960909861, | |
| "oracle_eval_ap": 0.6792151940442446 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6704260223189045, | |
| "top5_eval_ap": 0.6704260223189045, | |
| "oracle_eval_ap": 0.6792151940442446 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6496261958623482, | |
| "top5_eval_ap": 0.667923356262339, | |
| "oracle_eval_ap": 0.6792151940442446 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6538902227589642, | |
| "top5_eval_ap": 0.657860437409312, | |
| "oracle_eval_ap": 0.6792151940442446 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6519335091567344, | |
| "top5_eval_ap": 0.6792151940442446, | |
| "oracle_eval_ap": 0.6792151940442446 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5416059436051286, | |
| "top5_eval_ap": 0.5600151915305791, | |
| "oracle_eval_ap": 0.5932504427370535 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5850524686308571, | |
| "top5_eval_ap": 0.5932504427370535, | |
| "oracle_eval_ap": 0.5932504427370535 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5327389623147321, | |
| "top5_eval_ap": 0.5932504427370535, | |
| "oracle_eval_ap": 0.5932504427370535 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5932504427370535, | |
| "top5_eval_ap": 0.5932504427370535, | |
| "oracle_eval_ap": 0.5932504427370535 | |
| }, | |
| { | |
| "split": 16, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5850524686308571, | |
| "top5_eval_ap": 0.5850524686308571, | |
| "oracle_eval_ap": 0.5932504427370535 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4649606317608773, | |
| "top5_eval_ap": 0.5340079584689927, | |
| "oracle_eval_ap": 0.6649893230620163 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6428038478697953, | |
| "top5_eval_ap": 0.6539513241791239, | |
| "oracle_eval_ap": 0.6649893230620163 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6428038478697953, | |
| "top5_eval_ap": 0.6629995698938378, | |
| "oracle_eval_ap": 0.6649893230620163 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6428038478697953, | |
| "top5_eval_ap": 0.6456547506299378, | |
| "oracle_eval_ap": 0.6649893230620163 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6456547506299378, | |
| "top5_eval_ap": 0.6649893230620163, | |
| "oracle_eval_ap": 0.6649893230620163 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5348676749866335, | |
| "top5_eval_ap": 0.5446056544806404, | |
| "oracle_eval_ap": 0.579352918072389 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5718675106734894, | |
| "top5_eval_ap": 0.579352918072389, | |
| "oracle_eval_ap": 0.579352918072389 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5261790671930053, | |
| "top5_eval_ap": 0.579352918072389, | |
| "oracle_eval_ap": 0.579352918072389 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5261790671930053, | |
| "top5_eval_ap": 0.579352918072389, | |
| "oracle_eval_ap": 0.579352918072389 | |
| }, | |
| { | |
| "split": 17, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5718675106734894, | |
| "top5_eval_ap": 0.579352918072389, | |
| "oracle_eval_ap": 0.579352918072389 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.4531073060880456, | |
| "top5_eval_ap": 0.5436616702921386, | |
| "oracle_eval_ap": 0.6838512120663813 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6223927371229265, | |
| "top5_eval_ap": 0.6648512526938274, | |
| "oracle_eval_ap": 0.6838512120663813 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.654015043616063, | |
| "top5_eval_ap": 0.6638824217815192, | |
| "oracle_eval_ap": 0.6838512120663813 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.654015043616063, | |
| "top5_eval_ap": 0.6638824217815192, | |
| "oracle_eval_ap": 0.6838512120663813 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.6648512526938274, | |
| "top5_eval_ap": 0.6648512526938274, | |
| "oracle_eval_ap": 0.6838512120663813 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5120617315361997, | |
| "top5_eval_ap": 0.5365263833358878, | |
| "oracle_eval_ap": 0.578623298812676 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5664905732884257, | |
| "top5_eval_ap": 0.578623298812676, | |
| "oracle_eval_ap": 0.578623298812676 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5141500314699055, | |
| "top5_eval_ap": 0.5664905732884257, | |
| "oracle_eval_ap": 0.578623298812676 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5664905732884257, | |
| "top5_eval_ap": 0.5664905732884257, | |
| "oracle_eval_ap": 0.578623298812676 | |
| }, | |
| { | |
| "split": 18, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5458621922252527, | |
| "top5_eval_ap": 0.578623298812676, | |
| "oracle_eval_ap": 0.578623298812676 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "SMAC", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.44407823183784984, | |
| "top5_eval_ap": 0.5488701040645936, | |
| "oracle_eval_ap": 0.6512634701693791 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "SMAC", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.6216870554572433, | |
| "top5_eval_ap": 0.6460088873809331, | |
| "oracle_eval_ap": 0.6512634701693791 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "SMAC", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.6216870554572433, | |
| "top5_eval_ap": 0.6216870554572433, | |
| "oracle_eval_ap": 0.6512634701693791 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "SMAC", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.6216870554572433, | |
| "top5_eval_ap": 0.6216870554572433, | |
| "oracle_eval_ap": 0.6512634701693791 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "SMAC", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.630631866544245, | |
| "top5_eval_ap": 0.630631866544245, | |
| "oracle_eval_ap": 0.6512634701693791 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "LLM", | |
| "selector": "train_val_ap", | |
| "deploy_eval_ap": 0.5381205484033079, | |
| "top5_eval_ap": 0.5467457949477925, | |
| "oracle_eval_ap": 0.5795121120881965 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "LLM", | |
| "selector": "proxy_ap", | |
| "deploy_eval_ap": 0.5690007856001769, | |
| "top5_eval_ap": 0.5791181610505296, | |
| "oracle_eval_ap": 0.5795121120881965 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "LLM", | |
| "selector": "proxy_logloss", | |
| "deploy_eval_ap": 0.5213055580115359, | |
| "top5_eval_ap": 0.5791181610505296, | |
| "oracle_eval_ap": 0.5795121120881965 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "LLM", | |
| "selector": "proxy_brier", | |
| "deploy_eval_ap": 0.5791181610505296, | |
| "top5_eval_ap": 0.5791181610505296, | |
| "oracle_eval_ap": 0.5795121120881965 | |
| }, | |
| { | |
| "split": 19, | |
| "policy": "LLM", | |
| "selector": "proxy_auc", | |
| "deploy_eval_ap": 0.5690007856001769, | |
| "top5_eval_ap": 0.5795121120881965, | |
| "oracle_eval_ap": 0.5795121120881965 | |
| } | |
| ] | |
| } |