| [ |
| { |
| "quiz_id": 1309, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.44677734375, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 228, |
| "ground_truth": "A", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.56494140625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 51, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.376708984375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1518, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.974609375, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 563, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.381103515625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 501, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.71875, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 457, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.27197265625, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 285, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.278076171875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1508, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.3203125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 209, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.3203125, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1385, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.53271484375, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1516, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.64697265625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1116, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.29736328125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 178, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.353271484375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1209, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.453125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 864, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.51513671875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 65, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.313720703125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 61, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.463623046875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 191, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.408447265625, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 447, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.342529296875, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 476, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.3046875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1034, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.74365234375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1232, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.33056640625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 54, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.414306640625, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1149, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.388916015625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 407, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.60498046875, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1466, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.453125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1330, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.403076171875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1436, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.456298828125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 859, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.95751953125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 451, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.468017578125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 919, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.28759765625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1206, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.396728515625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 569, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.88134765625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 13, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.42822265625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 326, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.34130859375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1429, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.3974609375, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 865, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.9482421875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 696, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.51025390625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 318, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.30078125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 440, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.7216796875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 689, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.368896484375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 189, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.315673828125, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 778, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.79541015625, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 198, |
| "ground_truth": "B", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.3310546875, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 735, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.8076171875, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 704, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.448974609375, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1236, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.408203125, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 541, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.44970703125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 88, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.54833984375, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1494, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.355224609375, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 940, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.30615234375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1098, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.361083984375, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 255, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.298583984375, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 775, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.57421875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 161, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.298828125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1130, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.40087890625, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 600, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.3583984375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1287, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.30859375, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1266, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.386962890625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 740, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.27490234375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1182, |
| "ground_truth": "B", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.411865234375, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 393, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.623046875, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1442, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.57763671875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 142, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.38525390625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 93, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.84228515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1354, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.681640625, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 466, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.290771484375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 592, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.5087890625, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 163, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.309326171875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 206, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.36083984375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 928, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.289794921875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1301, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.3837890625, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 747, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.41650390625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 333, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.359130859375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 758, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.892578125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 727, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.96435546875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 429, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.40869140625, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1372, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.82568359375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 546, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.9599609375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1437, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.401611328125, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1399, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.490234375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1327, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.33056640625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 146, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.419677734375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1247, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.32666015625, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1300, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.263671875, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 350, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.307373046875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1093, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.317626953125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1493, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.939453125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 334, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.5390625, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 946, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.299072265625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 777, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.544921875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 552, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.428466796875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1310, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.41748046875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1409, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.486572265625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1140, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.278564453125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 449, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.334228515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1402, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.336669921875, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 664, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.93505859375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 114, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.435302734375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 469, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.2890625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 646, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.466552734375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 821, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.88427734375, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 548, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.9853515625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 135, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.443603515625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 432, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.468994140625, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1161, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.4111328125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1470, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.6083984375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 644, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.373291015625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 435, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.447509765625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1342, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.402099609375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1022, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.401123046875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 810, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.45556640625, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1316, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.36474609375, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 939, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.28759765625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 292, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.302001953125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 542, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.44287109375, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 505, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.406005859375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1525, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.7001953125, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1103, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.43017578125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 538, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.300048828125, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1529, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.85107421875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1197, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.32080078125, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 877, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.3955078125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1195, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.46630859375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 817, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.509765625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 741, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.377197265625, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 283, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.327392578125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1043, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.62841796875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1010, |
| "ground_truth": "D", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.287109375, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 186, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.52099609375, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 96, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.413330078125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 224, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.31884765625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 313, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.418212890625, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1285, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.318603515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 327, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.5048828125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1393, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.7958984375, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1221, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.392578125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 130, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.428955078125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 788, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.271240234375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 781, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.29345703125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1220, |
| "ground_truth": "A", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.428955078125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 958, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.286376953125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1083, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.2939453125, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 514, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.5419921875, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1133, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.400390625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 23, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.4228515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1476, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.673828125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 234, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.44921875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1396, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.5234375, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1099, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.89208984375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1312, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.396728515625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 601, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.414794921875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 890, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.487060546875, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 323, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.8212890625, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 929, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.287353515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 6, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.376220703125, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1478, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.7705078125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1473, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.440185546875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 539, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.89306640625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1025, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.6728515625, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 365, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.470947265625, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1039, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.341552734375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 217, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.56494140625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1280, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.60595703125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 611, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.376220703125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1308, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.460205078125, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 765, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.93359375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 330, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.3525390625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1104, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.86328125, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1086, |
| "ground_truth": "C", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.74365234375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.296142578125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1226, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.393310546875, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 663, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.822265625, |
| "ppl_prediction": "B", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1000, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.28076171875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 39, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.339599609375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 229, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.331787109375, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 743, |
| "ground_truth": "B", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.32373046875, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 629, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.517578125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 490, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.333251953125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 118, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.30322265625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 493, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.5322265625, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 175, |
| "ground_truth": "A", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.476318359375, |
| "ppl_prediction": "D", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1498, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.6748046875, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 995, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.286376953125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 141, |
| "ground_truth": "D", |
| "raw_static_prediction": "D", |
| "raw_static_confidence": 0.30517578125, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1090, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.71435546875, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 257, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.324462890625, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 262, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.306396484375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1351, |
| "ground_truth": "A", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.59521484375, |
| "ppl_prediction": "A", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 973, |
| "ground_truth": "C", |
| "raw_static_prediction": "B", |
| "raw_static_confidence": 0.281982421875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1125, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.465576171875, |
| "ppl_prediction": "B", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 338, |
| "ground_truth": "B", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.306396484375, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1080, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.72412109375, |
| "ppl_prediction": "C", |
| "is_raw_correct": true, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1242, |
| "ground_truth": "C", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.49609375, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 866, |
| "ground_truth": "B", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.32568359375, |
| "ppl_prediction": "C", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 433, |
| "ground_truth": "D", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.31298828125, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 1412, |
| "ground_truth": "D", |
| "raw_static_prediction": "A", |
| "raw_static_confidence": 0.461669921875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": false |
| }, |
| { |
| "quiz_id": 411, |
| "ground_truth": "A", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.58544921875, |
| "ppl_prediction": "A", |
| "is_raw_correct": false, |
| "is_ppl_correct": true |
| }, |
| { |
| "quiz_id": 1460, |
| "ground_truth": "C", |
| "raw_static_prediction": "C", |
| "raw_static_confidence": 0.3486328125, |
| "ppl_prediction": "D", |
| "is_raw_correct": true, |
| "is_ppl_correct": false |
| } |
| ] |