decodeshare / artifacts /patch_back /results /openanswer /humaneval_gencode_compile.json
Zishan-Shao's picture
Upload folder using huggingface_hub
aa0e435 verified
{
"meta": {
"model": "meta-llama/Llama-2-7b-chat-hf",
"device": "cuda",
"dtype": "fp32",
"layer": 10,
"layers_path": "model.layers",
"seed": 123,
"task": "humaneval",
"eval_mode": "gen_code_compile",
"eval_meta": {
"hf_id": "openai_humaneval",
"split": "test",
"n_total": 164
},
"n_eval_loaded": 164,
"n_scanned": 164,
"base_acc_scan": 0.3475609756097561,
"ablt_acc_scan": 0.31097560975609756,
"flips_total": 38,
"flips_used": 38,
"patch_steps": [
0,
1,
2,
3
],
"patch_n_steps": 4,
"Qs_path": "Q_shared_layer10.npy",
"Qs_shape": [
4096,
97
],
"gold_text_prefix": " ",
"dist_text_prefix": " ",
"gold_max_tokens": 0,
"distractor_mode": "next_gold",
"answer_prefix_effective": "\nFinal answer:",
"max_new_tokens_effective": 256,
"run_coeff_controls": false,
"use_benchmark_loader": false,
"hf_id": "openai_humaneval",
"hf_split": "test"
},
"summary_on_flips": {
"patched_self": {
"n": 38,
"rescued": 8,
"rescued_pct": 21.05263157894737
},
"control_time_shuffled": {
"n": 38,
"rescued": 8,
"rescued_pct": 21.05263157894737
},
"control_shared_randvec": {
"n": 38,
"rescued": 10,
"rescued_pct": 26.31578947368421
},
"control_rand_subspace": {
"n": 38,
"rescued": 7,
"rescued_pct": 18.42105263157895
},
"control_patch_nonshared": {
"n": 38,
"rescued": 2,
"rescued_pct": 5.2631578947368425
}
},
"scan_rows": [
{
"ex_id": "openai_humaneval-test-18",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-31",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-158",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-43",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-39",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-15",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-151",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-101",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-34",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 232
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-52",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-113",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-119",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-83",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-116",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-56",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-131",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 191
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-1",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-159",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 163
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-123",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-23",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 227
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-124",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-38",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-84",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-41",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 174
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-134",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-2",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-80",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-74",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-162",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-138",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-87",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-145",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-54",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-109",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-102",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-62",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-129",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-110",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 208
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-4",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 222
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-8",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-97",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-137",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-96",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-65",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 237
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-114",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-117",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-155",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 233
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-37",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-115",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-86",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-85",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-53",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-130",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-139",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-81",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-19",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-69",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-122",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-108",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-48",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 244
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-17",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-95",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-91",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-79",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-14",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-77",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-3",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 156
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-146",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-57",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-143",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-0",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-106",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-50",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-58",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-147",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-160",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-67",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-55",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-118",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 204
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-154",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-25",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-36",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-63",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-132",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-21",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-9",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-72",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-128",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-126",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-70",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-40",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-66",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-71",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-107",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-32",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-11",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-47",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-22",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-140",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-30",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-10",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-112",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-111",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-28",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-135",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-94",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-13",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-7",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-157",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-49",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-120",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-89",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-98",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-100",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-64",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-103",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-125",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-51",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 229
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-90",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-152",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-24",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-20",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-148",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-92",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-26",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 235
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-16",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-142",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-99",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-127",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 171
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-156",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-141",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-78",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-68",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-93",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-60",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-82",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-59",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-149",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 205
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-42",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-163",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-133",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-161",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-29",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 239
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-27",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-61",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 253
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-104",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-45",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 213
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-75",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-6",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-136",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-33",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-44",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-88",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 233
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-12",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 177
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-105",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-121",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-144",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-73",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-76",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-5",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": true,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-46",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-150",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-35",
"baseline": {
"compile_ok": false,
"n_gen_tokens": 256
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
},
{
"ex_id": "openai_humaneval-test-153",
"baseline": {
"compile_ok": true,
"n_gen_tokens": 242
},
"ablated": {
"compile_ok": false,
"n_gen_tokens": 256
}
}
],
"flip_rows": [
{
"ex_id": "openai_humaneval-test-31",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-158",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-39",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-101",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-56",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-1",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-159",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-123",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-38",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-84",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-134",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": true
}
},
{
"ex_id": "openai_humaneval-test-54",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-110",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-86",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": true
}
},
{
"ex_id": "openai_humaneval-test-53",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-81",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-122",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-17",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-95",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-3",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-58",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-147",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-55",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-118",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-30",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-112",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-49",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-64",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-92",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-127",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-68",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-93",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-133",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-61",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-104",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-12",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-105",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": false
},
"control_time_shuffled": {
"compile_ok": true
},
"control_shared_randvec": {
"compile_ok": true
},
"control_rand_subspace": {
"compile_ok": true
},
"control_patch_nonshared": {
"compile_ok": false
}
},
{
"ex_id": "openai_humaneval-test-153",
"baseline": {
"compile_ok": true
},
"ablated": {
"compile_ok": false
},
"patched_self": {
"compile_ok": true
},
"control_time_shuffled": {
"compile_ok": false
},
"control_shared_randvec": {
"compile_ok": false
},
"control_rand_subspace": {
"compile_ok": false
},
"control_patch_nonshared": {
"compile_ok": false
}
}
]
}