| { |
| "meta": { |
| "model": "meta-llama/Llama-2-7b-chat-hf", |
| "device": "cuda", |
| "dtype": "fp32", |
| "layer": 10, |
| "layers_path": "model.layers", |
| "seed": 123, |
| "task": "humaneval", |
| "eval_mode": "gen_code_compile", |
| "eval_meta": { |
| "hf_id": "openai_humaneval", |
| "split": "test", |
| "n_total": 164 |
| }, |
| "n_eval_loaded": 164, |
| "n_scanned": 164, |
| "base_acc_scan": 0.3475609756097561, |
| "ablt_acc_scan": 0.31097560975609756, |
| "flips_total": 38, |
| "flips_used": 38, |
| "patch_steps": [ |
| 0, |
| 1, |
| 2, |
| 3 |
| ], |
| "patch_n_steps": 4, |
| "Qs_path": "Q_shared_layer10.npy", |
| "Qs_shape": [ |
| 4096, |
| 97 |
| ], |
| "gold_text_prefix": " ", |
| "dist_text_prefix": " ", |
| "gold_max_tokens": 0, |
| "distractor_mode": "next_gold", |
| "answer_prefix_effective": "\nFinal answer:", |
| "max_new_tokens_effective": 256, |
| "run_coeff_controls": false, |
| "use_benchmark_loader": false, |
| "hf_id": "openai_humaneval", |
| "hf_split": "test" |
| }, |
| "summary_on_flips": { |
| "patched_self": { |
| "n": 38, |
| "rescued": 8, |
| "rescued_pct": 21.05263157894737 |
| }, |
| "control_time_shuffled": { |
| "n": 38, |
| "rescued": 8, |
| "rescued_pct": 21.05263157894737 |
| }, |
| "control_shared_randvec": { |
| "n": 38, |
| "rescued": 10, |
| "rescued_pct": 26.31578947368421 |
| }, |
| "control_rand_subspace": { |
| "n": 38, |
| "rescued": 7, |
| "rescued_pct": 18.42105263157895 |
| }, |
| "control_patch_nonshared": { |
| "n": 38, |
| "rescued": 2, |
| "rescued_pct": 5.2631578947368425 |
| } |
| }, |
| "scan_rows": [ |
| { |
| "ex_id": "openai_humaneval-test-18", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-31", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-158", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-43", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-39", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-15", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-151", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-101", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-34", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 232 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-52", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-113", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-119", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-83", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-116", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-56", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-131", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 191 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-1", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-159", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 163 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-123", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-23", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 227 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-124", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-38", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-84", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-41", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 174 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-134", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-2", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-80", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-74", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-162", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-138", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-87", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-145", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-54", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-109", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-102", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-62", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-129", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-110", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 208 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-4", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 222 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-8", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-97", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-137", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-96", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-65", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 237 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-114", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-117", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-155", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 233 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-37", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-115", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-86", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-85", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-53", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-130", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-139", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-81", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-19", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-69", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-122", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-108", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-48", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 244 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-17", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-95", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-91", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-79", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-14", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-77", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-3", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 156 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-146", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-57", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-143", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-0", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-106", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-50", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-58", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-147", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-160", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-67", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-55", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-118", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 204 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-154", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-25", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-36", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-63", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-132", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-21", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-9", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-72", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-128", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-126", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-70", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-40", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-66", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-71", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-107", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-32", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-11", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-47", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-22", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-140", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-30", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-10", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-112", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-111", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-28", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-135", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-94", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-13", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-7", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-157", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-49", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-120", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-89", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-98", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-100", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-64", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-103", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-125", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-51", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 229 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-90", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-152", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-24", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-20", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-148", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-92", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-26", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 235 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-16", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-142", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-99", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-127", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 171 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-156", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-141", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-78", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-68", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-93", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-60", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-82", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-59", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-149", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 205 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-42", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-163", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-133", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-161", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-29", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 239 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-27", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-61", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 253 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-104", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-45", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 213 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-75", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-6", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-136", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-33", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-44", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-88", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 233 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-12", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 177 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-105", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-121", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-144", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-73", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-76", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-5", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": true, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-46", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-150", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-35", |
| "baseline": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-153", |
| "baseline": { |
| "compile_ok": true, |
| "n_gen_tokens": 242 |
| }, |
| "ablated": { |
| "compile_ok": false, |
| "n_gen_tokens": 256 |
| } |
| } |
| ], |
| "flip_rows": [ |
| { |
| "ex_id": "openai_humaneval-test-31", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-158", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-39", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-101", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-56", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-1", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-159", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-123", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-38", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-84", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-134", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": true |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-54", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-110", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-86", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": true |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-53", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-81", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-122", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-17", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-95", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-3", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-58", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-147", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-55", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-118", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-30", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-112", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-49", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-64", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-92", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-127", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-68", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-93", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-133", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-61", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-104", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-12", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-105", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": false |
| }, |
| "control_time_shuffled": { |
| "compile_ok": true |
| }, |
| "control_shared_randvec": { |
| "compile_ok": true |
| }, |
| "control_rand_subspace": { |
| "compile_ok": true |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| }, |
| { |
| "ex_id": "openai_humaneval-test-153", |
| "baseline": { |
| "compile_ok": true |
| }, |
| "ablated": { |
| "compile_ok": false |
| }, |
| "patched_self": { |
| "compile_ok": true |
| }, |
| "control_time_shuffled": { |
| "compile_ok": false |
| }, |
| "control_shared_randvec": { |
| "compile_ok": false |
| }, |
| "control_rand_subspace": { |
| "compile_ok": false |
| }, |
| "control_patch_nonshared": { |
| "compile_ok": false |
| } |
| } |
| ] |
| } |