diff --git "a/evals/core_9mcqa/task-005-boolq-predictions.jsonl" "b/evals/core_9mcqa/task-005-boolq-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-005-boolq-predictions.jsonl" @@ -0,0 +1,1000 @@ +{"doc_id": 0, "native_id": 3187, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3579683005809784, "incorrect_loss_raw": 1.375915765762329, "correct_loss_per_char": 0.0894920751452446, "incorrect_loss_per_char": 0.45863858858744305, "correct_loss_per_token": 0.3579683005809784, "incorrect_loss_per_token": 1.375915765762329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3579683005809784, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.3579683005809784, "logits_per_char": -0.0894920751452446, "num_chars": 4}, {"sum_logits": -1.375915765762329, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.375915765762329, "logits_per_char": -0.45863858858744305, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1, "native_id": 1805, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9337705373764038, "incorrect_loss_raw": 0.5820800065994263, "correct_loss_per_char": 0.3112568457921346, "incorrect_loss_per_char": 0.14552000164985657, "correct_loss_per_token": 0.9337705373764038, "incorrect_loss_per_token": 0.5820800065994263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5820800065994263, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5820800065994263, "logits_per_char": -0.14552000164985657, "num_chars": 4}, {"sum_logits": -0.9337705373764038, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.9337705373764038, "logits_per_char": -0.3112568457921346, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 2, "native_id": 478, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.525580883026123, "incorrect_loss_raw": 0.2807081341743469, "correct_loss_per_char": 0.5085269610087076, "incorrect_loss_per_char": 0.07017703354358673, "correct_loss_per_token": 1.525580883026123, "incorrect_loss_per_token": 0.2807081341743469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2807081341743469, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.2807081341743469, "logits_per_char": -0.07017703354358673, "num_chars": 4}, {"sum_logits": -1.525580883026123, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.525580883026123, "logits_per_char": -0.5085269610087076, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 3, "native_id": 30, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3320602774620056, "incorrect_loss_raw": 1.4507026672363281, "correct_loss_per_char": 0.0830150693655014, "incorrect_loss_per_char": 0.4835675557454427, "correct_loss_per_token": 0.3320602774620056, "incorrect_loss_per_token": 1.4507026672363281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3320602774620056, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.3320602774620056, "logits_per_char": -0.0830150693655014, "num_chars": 4}, {"sum_logits": -1.4507026672363281, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.4507026672363281, "logits_per_char": -0.4835675557454427, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 4, "native_id": 371, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20125991106033325, "incorrect_loss_raw": 1.9293098449707031, "correct_loss_per_char": 0.05031497776508331, "incorrect_loss_per_char": 0.643103281656901, "correct_loss_per_token": 0.20125991106033325, "incorrect_loss_per_token": 1.9293098449707031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20125991106033325, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.20125991106033325, "logits_per_char": -0.05031497776508331, "num_chars": 4}, {"sum_logits": -1.9293098449707031, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.9293098449707031, "logits_per_char": -0.643103281656901, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 5, "native_id": 2384, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2837315499782562, "incorrect_loss_raw": 1.5969822406768799, "correct_loss_per_char": 0.07093288749456406, "incorrect_loss_per_char": 0.53232741355896, "correct_loss_per_token": 0.2837315499782562, "incorrect_loss_per_token": 1.5969822406768799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2837315499782562, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.2837315499782562, "logits_per_char": -0.07093288749456406, "num_chars": 4}, {"sum_logits": -1.5969822406768799, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.5969822406768799, "logits_per_char": -0.53232741355896, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 6, "native_id": 143, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34293144941329956, "incorrect_loss_raw": 1.4537203311920166, "correct_loss_per_char": 0.08573286235332489, "incorrect_loss_per_char": 0.4845734437306722, "correct_loss_per_token": 0.34293144941329956, "incorrect_loss_per_token": 1.4537203311920166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34293144941329956, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.34293144941329956, "logits_per_char": -0.08573286235332489, "num_chars": 4}, {"sum_logits": -1.4537203311920166, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4537203311920166, "logits_per_char": -0.4845734437306722, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 7, "native_id": 2750, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24351157248020172, "incorrect_loss_raw": 1.7148209810256958, "correct_loss_per_char": 0.06087789312005043, "incorrect_loss_per_char": 0.5716069936752319, "correct_loss_per_token": 0.24351157248020172, "incorrect_loss_per_token": 1.7148209810256958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24351157248020172, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.24351157248020172, "logits_per_char": -0.06087789312005043, "num_chars": 4}, {"sum_logits": -1.7148209810256958, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.7148209810256958, "logits_per_char": -0.5716069936752319, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 8, "native_id": 2838, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6487526297569275, "incorrect_loss_raw": 0.8484337329864502, "correct_loss_per_char": 0.2162508765856425, "incorrect_loss_per_char": 0.21210843324661255, "correct_loss_per_token": 0.6487526297569275, "incorrect_loss_per_token": 0.8484337329864502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8484337329864502, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.8484337329864502, "logits_per_char": -0.21210843324661255, "num_chars": 4}, {"sum_logits": -0.6487526297569275, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.6487526297569275, "logits_per_char": -0.2162508765856425, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 9, "native_id": 343, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5000104904174805, "incorrect_loss_raw": 1.071984052658081, "correct_loss_per_char": 0.12500262260437012, "incorrect_loss_per_char": 0.35732801755269367, "correct_loss_per_token": 0.5000104904174805, "incorrect_loss_per_token": 1.071984052658081, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5000104904174805, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -0.5000104904174805, "logits_per_char": -0.12500262260437012, "num_chars": 4}, {"sum_logits": -1.071984052658081, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.071984052658081, "logits_per_char": -0.35732801755269367, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 10, "native_id": 403, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3334189653396606, "incorrect_loss_raw": 0.3594166040420532, "correct_loss_per_char": 0.44447298844655353, "incorrect_loss_per_char": 0.0898541510105133, "correct_loss_per_token": 1.3334189653396606, "incorrect_loss_per_token": 0.3594166040420532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3594166040420532, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.3594166040420532, "logits_per_char": -0.0898541510105133, "num_chars": 4}, {"sum_logits": -1.3334189653396606, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.3334189653396606, "logits_per_char": -0.44447298844655353, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 11, "native_id": 3139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.225430428981781, "incorrect_loss_raw": 1.7386341094970703, "correct_loss_per_char": 0.05635760724544525, "incorrect_loss_per_char": 0.5795447031656901, "correct_loss_per_token": 0.225430428981781, "incorrect_loss_per_token": 1.7386341094970703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.225430428981781, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.225430428981781, "logits_per_char": -0.05635760724544525, "num_chars": 4}, {"sum_logits": -1.7386341094970703, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.7386341094970703, "logits_per_char": -0.5795447031656901, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 12, "native_id": 1452, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2408473789691925, "incorrect_loss_raw": 1.7431831359863281, "correct_loss_per_char": 0.060211844742298126, "incorrect_loss_per_char": 0.581061045328776, "correct_loss_per_token": 0.2408473789691925, "incorrect_loss_per_token": 1.7431831359863281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2408473789691925, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.2408473789691925, "logits_per_char": -0.060211844742298126, "num_chars": 4}, {"sum_logits": -1.7431831359863281, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.7431831359863281, "logits_per_char": -0.581061045328776, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 13, "native_id": 969, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.400888055562973, "incorrect_loss_raw": 1.274802803993225, "correct_loss_per_char": 0.10022201389074326, "incorrect_loss_per_char": 0.4249342679977417, "correct_loss_per_token": 0.400888055562973, "incorrect_loss_per_token": 1.274802803993225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.400888055562973, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.400888055562973, "logits_per_char": -0.10022201389074326, "num_chars": 4}, {"sum_logits": -1.274802803993225, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.274802803993225, "logits_per_char": -0.4249342679977417, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 14, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44100016355514526, "incorrect_loss_raw": 1.1301610469818115, "correct_loss_per_char": 0.11025004088878632, "incorrect_loss_per_char": 0.3767203489939372, "correct_loss_per_token": 0.44100016355514526, "incorrect_loss_per_token": 1.1301610469818115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44100016355514526, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.44100016355514526, "logits_per_char": -0.11025004088878632, "num_chars": 4}, {"sum_logits": -1.1301610469818115, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.1301610469818115, "logits_per_char": -0.3767203489939372, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 15, "native_id": 126, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7747220993041992, "incorrect_loss_raw": 0.7395960092544556, "correct_loss_per_char": 0.2582406997680664, "incorrect_loss_per_char": 0.1848990023136139, "correct_loss_per_token": 0.7747220993041992, "incorrect_loss_per_token": 0.7395960092544556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7395960092544556, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.7395960092544556, "logits_per_char": -0.1848990023136139, "num_chars": 4}, {"sum_logits": -0.7747220993041992, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -0.7747220993041992, "logits_per_char": -0.2582406997680664, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 16, "native_id": 3230, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3665168881416321, "incorrect_loss_raw": 1.2709765434265137, "correct_loss_per_char": 0.09162922203540802, "incorrect_loss_per_char": 0.4236588478088379, "correct_loss_per_token": 0.3665168881416321, "incorrect_loss_per_token": 1.2709765434265137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3665168881416321, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.3665168881416321, "logits_per_char": -0.09162922203540802, "num_chars": 4}, {"sum_logits": -1.2709765434265137, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.2709765434265137, "logits_per_char": -0.4236588478088379, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 17, "native_id": 64, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6226434707641602, "incorrect_loss_raw": 1.0195975303649902, "correct_loss_per_char": 0.15566086769104004, "incorrect_loss_per_char": 0.33986584345499676, "correct_loss_per_token": 0.6226434707641602, "incorrect_loss_per_token": 1.0195975303649902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6226434707641602, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.6226434707641602, "logits_per_char": -0.15566086769104004, "num_chars": 4}, {"sum_logits": -1.0195975303649902, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -1.0195975303649902, "logits_per_char": -0.33986584345499676, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 18, "native_id": 1417, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1657167226076126, "incorrect_loss_raw": 2.0830349922180176, "correct_loss_per_char": 0.04142918065190315, "incorrect_loss_per_char": 0.6943449974060059, "correct_loss_per_token": 0.1657167226076126, "incorrect_loss_per_token": 2.0830349922180176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1657167226076126, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.1657167226076126, "logits_per_char": -0.04142918065190315, "num_chars": 4}, {"sum_logits": -2.0830349922180176, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -2.0830349922180176, "logits_per_char": -0.6943449974060059, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 19, "native_id": 2655, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5216712355613708, "incorrect_loss_raw": 1.0791106224060059, "correct_loss_per_char": 0.1304178088903427, "incorrect_loss_per_char": 0.35970354080200195, "correct_loss_per_token": 0.5216712355613708, "incorrect_loss_per_token": 1.0791106224060059, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5216712355613708, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.5216712355613708, "logits_per_char": -0.1304178088903427, "num_chars": 4}, {"sum_logits": -1.0791106224060059, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.0791106224060059, "logits_per_char": -0.35970354080200195, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 20, "native_id": 2552, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.12167946249246597, "incorrect_loss_raw": 2.313276529312134, "correct_loss_per_char": 0.030419865623116493, "incorrect_loss_per_char": 0.7710921764373779, "correct_loss_per_token": 0.12167946249246597, "incorrect_loss_per_token": 2.313276529312134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12167946249246597, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.12167946249246597, "logits_per_char": -0.030419865623116493, "num_chars": 4}, {"sum_logits": -2.313276529312134, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -2.313276529312134, "logits_per_char": -0.7710921764373779, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 21, "native_id": 1983, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26589345932006836, "incorrect_loss_raw": 1.6957310438156128, "correct_loss_per_char": 0.06647336483001709, "incorrect_loss_per_char": 0.565243681271871, "correct_loss_per_token": 0.26589345932006836, "incorrect_loss_per_token": 1.6957310438156128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26589345932006836, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.26589345932006836, "logits_per_char": -0.06647336483001709, "num_chars": 4}, {"sum_logits": -1.6957310438156128, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6957310438156128, "logits_per_char": -0.565243681271871, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 22, "native_id": 2522, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26476341485977173, "incorrect_loss_raw": 1.7391626834869385, "correct_loss_per_char": 0.06619085371494293, "incorrect_loss_per_char": 0.5797208944956461, "correct_loss_per_token": 0.26476341485977173, "incorrect_loss_per_token": 1.7391626834869385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26476341485977173, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.26476341485977173, "logits_per_char": -0.06619085371494293, "num_chars": 4}, {"sum_logits": -1.7391626834869385, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.7391626834869385, "logits_per_char": -0.5797208944956461, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 23, "native_id": 1898, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20361822843551636, "incorrect_loss_raw": 1.857959508895874, "correct_loss_per_char": 0.05090455710887909, "incorrect_loss_per_char": 0.6193198362986246, "correct_loss_per_token": 0.20361822843551636, "incorrect_loss_per_token": 1.857959508895874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20361822843551636, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.20361822843551636, "logits_per_char": -0.05090455710887909, "num_chars": 4}, {"sum_logits": -1.857959508895874, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.857959508895874, "logits_per_char": -0.6193198362986246, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 24, "native_id": 608, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4307490587234497, "incorrect_loss_raw": 1.1727632284164429, "correct_loss_per_char": 0.10768726468086243, "incorrect_loss_per_char": 0.39092107613881427, "correct_loss_per_token": 0.4307490587234497, "incorrect_loss_per_token": 1.1727632284164429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4307490587234497, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.4307490587234497, "logits_per_char": -0.10768726468086243, "num_chars": 4}, {"sum_logits": -1.1727632284164429, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.1727632284164429, "logits_per_char": -0.39092107613881427, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 25, "native_id": 373, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.223164677619934, "incorrect_loss_raw": 0.45626258850097656, "correct_loss_per_char": 0.4077215592066447, "incorrect_loss_per_char": 0.11406564712524414, "correct_loss_per_token": 1.223164677619934, "incorrect_loss_per_token": 0.45626258850097656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45626258850097656, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.45626258850097656, "logits_per_char": -0.11406564712524414, "num_chars": 4}, {"sum_logits": -1.223164677619934, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.223164677619934, "logits_per_char": -0.4077215592066447, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 26, "native_id": 749, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7415421009063721, "incorrect_loss_raw": 0.8134037256240845, "correct_loss_per_char": 0.18538552522659302, "incorrect_loss_per_char": 0.27113457520802814, "correct_loss_per_token": 0.7415421009063721, "incorrect_loss_per_token": 0.8134037256240845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7415421009063721, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.7415421009063721, "logits_per_char": -0.18538552522659302, "num_chars": 4}, {"sum_logits": -0.8134037256240845, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -0.8134037256240845, "logits_per_char": -0.27113457520802814, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 27, "native_id": 2922, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14429083466529846, "incorrect_loss_raw": 2.243906259536743, "correct_loss_per_char": 0.036072708666324615, "incorrect_loss_per_char": 0.7479687531789144, "correct_loss_per_token": 0.14429083466529846, "incorrect_loss_per_token": 2.243906259536743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14429083466529846, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.14429083466529846, "logits_per_char": -0.036072708666324615, "num_chars": 4}, {"sum_logits": -2.243906259536743, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -2.243906259536743, "logits_per_char": -0.7479687531789144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 28, "native_id": 468, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14290213584899902, "incorrect_loss_raw": 2.3159499168395996, "correct_loss_per_char": 0.035725533962249756, "incorrect_loss_per_char": 0.7719833056131998, "correct_loss_per_token": 0.14290213584899902, "incorrect_loss_per_token": 2.3159499168395996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14290213584899902, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.14290213584899902, "logits_per_char": -0.035725533962249756, "num_chars": 4}, {"sum_logits": -2.3159499168395996, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -2.3159499168395996, "logits_per_char": -0.7719833056131998, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 29, "native_id": 59, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46136316657066345, "incorrect_loss_raw": 1.1836488246917725, "correct_loss_per_char": 0.11534079164266586, "incorrect_loss_per_char": 0.3945496082305908, "correct_loss_per_token": 0.46136316657066345, "incorrect_loss_per_token": 1.1836488246917725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46136316657066345, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.46136316657066345, "logits_per_char": -0.11534079164266586, "num_chars": 4}, {"sum_logits": -1.1836488246917725, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.1836488246917725, "logits_per_char": -0.3945496082305908, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 30, "native_id": 2060, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1796503067016602, "incorrect_loss_raw": 0.4531911611557007, "correct_loss_per_char": 0.3932167689005534, "incorrect_loss_per_char": 0.11329779028892517, "correct_loss_per_token": 1.1796503067016602, "incorrect_loss_per_token": 0.4531911611557007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4531911611557007, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.4531911611557007, "logits_per_char": -0.11329779028892517, "num_chars": 4}, {"sum_logits": -1.1796503067016602, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.1796503067016602, "logits_per_char": -0.3932167689005534, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 31, "native_id": 1993, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26821401715278625, "incorrect_loss_raw": 1.6398433446884155, "correct_loss_per_char": 0.06705350428819656, "incorrect_loss_per_char": 0.5466144482294718, "correct_loss_per_token": 0.26821401715278625, "incorrect_loss_per_token": 1.6398433446884155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26821401715278625, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": true, "logits_per_token": -0.26821401715278625, "logits_per_char": -0.06705350428819656, "num_chars": 4}, {"sum_logits": -1.6398433446884155, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": false, "logits_per_token": -1.6398433446884155, "logits_per_char": -0.5466144482294718, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 32, "native_id": 1023, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19414041936397552, "incorrect_loss_raw": 1.898543119430542, "correct_loss_per_char": 0.04853510484099388, "incorrect_loss_per_char": 0.6328477064768473, "correct_loss_per_token": 0.19414041936397552, "incorrect_loss_per_token": 1.898543119430542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19414041936397552, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.19414041936397552, "logits_per_char": -0.04853510484099388, "num_chars": 4}, {"sum_logits": -1.898543119430542, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.898543119430542, "logits_per_char": -0.6328477064768473, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 33, "native_id": 264, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19997833669185638, "incorrect_loss_raw": 1.8947105407714844, "correct_loss_per_char": 0.049994584172964096, "incorrect_loss_per_char": 0.6315701802571615, "correct_loss_per_token": 0.19997833669185638, "incorrect_loss_per_token": 1.8947105407714844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19997833669185638, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.19997833669185638, "logits_per_char": -0.049994584172964096, "num_chars": 4}, {"sum_logits": -1.8947105407714844, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.8947105407714844, "logits_per_char": -0.6315701802571615, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 34, "native_id": 2733, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.68265700340271, "incorrect_loss_raw": 0.24236911535263062, "correct_loss_per_char": 0.5608856678009033, "incorrect_loss_per_char": 0.060592278838157654, "correct_loss_per_token": 1.68265700340271, "incorrect_loss_per_token": 0.24236911535263062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24236911535263062, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.24236911535263062, "logits_per_char": -0.060592278838157654, "num_chars": 4}, {"sum_logits": -1.68265700340271, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.68265700340271, "logits_per_char": -0.5608856678009033, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 35, "native_id": 2216, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1772102117538452, "incorrect_loss_raw": 0.42527636885643005, "correct_loss_per_char": 0.3924034039179484, "incorrect_loss_per_char": 0.10631909221410751, "correct_loss_per_token": 1.1772102117538452, "incorrect_loss_per_token": 0.42527636885643005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42527636885643005, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": true, "logits_per_token": -0.42527636885643005, "logits_per_char": -0.10631909221410751, "num_chars": 4}, {"sum_logits": -1.1772102117538452, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -1.1772102117538452, "logits_per_char": -0.3924034039179484, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 36, "native_id": 1908, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23094461858272552, "incorrect_loss_raw": 1.8784533739089966, "correct_loss_per_char": 0.05773615464568138, "incorrect_loss_per_char": 0.6261511246363322, "correct_loss_per_token": 0.23094461858272552, "incorrect_loss_per_token": 1.8784533739089966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23094461858272552, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.23094461858272552, "logits_per_char": -0.05773615464568138, "num_chars": 4}, {"sum_logits": -1.8784533739089966, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.8784533739089966, "logits_per_char": -0.6261511246363322, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 37, "native_id": 280, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22647272050380707, "incorrect_loss_raw": 1.7668107748031616, "correct_loss_per_char": 0.05661818012595177, "incorrect_loss_per_char": 0.5889369249343872, "correct_loss_per_token": 0.22647272050380707, "incorrect_loss_per_token": 1.7668107748031616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22647272050380707, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.22647272050380707, "logits_per_char": -0.05661818012595177, "num_chars": 4}, {"sum_logits": -1.7668107748031616, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.7668107748031616, "logits_per_char": -0.5889369249343872, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 38, "native_id": 2463, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17631031572818756, "incorrect_loss_raw": 1.9375088214874268, "correct_loss_per_char": 0.04407757893204689, "incorrect_loss_per_char": 0.6458362738291422, "correct_loss_per_token": 0.17631031572818756, "incorrect_loss_per_token": 1.9375088214874268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17631031572818756, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.17631031572818756, "logits_per_char": -0.04407757893204689, "num_chars": 4}, {"sum_logits": -1.9375088214874268, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.9375088214874268, "logits_per_char": -0.6458362738291422, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 39, "native_id": 2765, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23182019591331482, "incorrect_loss_raw": 1.7293049097061157, "correct_loss_per_char": 0.057955048978328705, "incorrect_loss_per_char": 0.5764349699020386, "correct_loss_per_token": 0.23182019591331482, "incorrect_loss_per_token": 1.7293049097061157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23182019591331482, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.23182019591331482, "logits_per_char": -0.057955048978328705, "num_chars": 4}, {"sum_logits": -1.7293049097061157, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.7293049097061157, "logits_per_char": -0.5764349699020386, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 40, "native_id": 364, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2860245406627655, "incorrect_loss_raw": 1.735397458076477, "correct_loss_per_char": 0.07150613516569138, "incorrect_loss_per_char": 0.5784658193588257, "correct_loss_per_token": 0.2860245406627655, "incorrect_loss_per_token": 1.735397458076477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2860245406627655, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.2860245406627655, "logits_per_char": -0.07150613516569138, "num_chars": 4}, {"sum_logits": -1.735397458076477, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.735397458076477, "logits_per_char": -0.5784658193588257, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 41, "native_id": 2109, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5469892621040344, "incorrect_loss_raw": 0.9406692981719971, "correct_loss_per_char": 0.1367473155260086, "incorrect_loss_per_char": 0.313556432723999, "correct_loss_per_token": 0.5469892621040344, "incorrect_loss_per_token": 0.9406692981719971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5469892621040344, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.5469892621040344, "logits_per_char": -0.1367473155260086, "num_chars": 4}, {"sum_logits": -0.9406692981719971, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -0.9406692981719971, "logits_per_char": -0.313556432723999, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 42, "native_id": 2371, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23829132318496704, "incorrect_loss_raw": 1.7156963348388672, "correct_loss_per_char": 0.05957283079624176, "incorrect_loss_per_char": 0.5718987782796224, "correct_loss_per_token": 0.23829132318496704, "incorrect_loss_per_token": 1.7156963348388672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23829132318496704, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.23829132318496704, "logits_per_char": -0.05957283079624176, "num_chars": 4}, {"sum_logits": -1.7156963348388672, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.7156963348388672, "logits_per_char": -0.5718987782796224, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 43, "native_id": 188, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5429006814956665, "incorrect_loss_raw": 1.1407122611999512, "correct_loss_per_char": 0.13572517037391663, "incorrect_loss_per_char": 0.3802374203999837, "correct_loss_per_token": 0.5429006814956665, "incorrect_loss_per_token": 1.1407122611999512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5429006814956665, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.5429006814956665, "logits_per_char": -0.13572517037391663, "num_chars": 4}, {"sum_logits": -1.1407122611999512, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.1407122611999512, "logits_per_char": -0.3802374203999837, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 44, "native_id": 1104, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7946494817733765, "incorrect_loss_raw": 0.6651671528816223, "correct_loss_per_char": 0.2648831605911255, "incorrect_loss_per_char": 0.16629178822040558, "correct_loss_per_token": 0.7946494817733765, "incorrect_loss_per_token": 0.6651671528816223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6651671528816223, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.6651671528816223, "logits_per_char": -0.16629178822040558, "num_chars": 4}, {"sum_logits": -0.7946494817733765, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -0.7946494817733765, "logits_per_char": -0.2648831605911255, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 45, "native_id": 2279, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16386482119560242, "incorrect_loss_raw": 2.0461459159851074, "correct_loss_per_char": 0.040966205298900604, "incorrect_loss_per_char": 0.6820486386617025, "correct_loss_per_token": 0.16386482119560242, "incorrect_loss_per_token": 2.0461459159851074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16386482119560242, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.16386482119560242, "logits_per_char": -0.040966205298900604, "num_chars": 4}, {"sum_logits": -2.0461459159851074, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -2.0461459159851074, "logits_per_char": -0.6820486386617025, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 46, "native_id": 258, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6480200290679932, "incorrect_loss_raw": 0.3047153949737549, "correct_loss_per_char": 0.549340009689331, "incorrect_loss_per_char": 0.07617884874343872, "correct_loss_per_token": 1.6480200290679932, "incorrect_loss_per_token": 0.3047153949737549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3047153949737549, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.3047153949737549, "logits_per_char": -0.07617884874343872, "num_chars": 4}, {"sum_logits": -1.6480200290679932, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6480200290679932, "logits_per_char": -0.549340009689331, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 47, "native_id": 2640, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3812053203582764, "incorrect_loss_raw": 0.34616634249687195, "correct_loss_per_char": 0.4604017734527588, "incorrect_loss_per_char": 0.08654158562421799, "correct_loss_per_token": 1.3812053203582764, "incorrect_loss_per_token": 0.34616634249687195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34616634249687195, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.34616634249687195, "logits_per_char": -0.08654158562421799, "num_chars": 4}, {"sum_logits": -1.3812053203582764, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.3812053203582764, "logits_per_char": -0.4604017734527588, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 48, "native_id": 1238, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2664133608341217, "incorrect_loss_raw": 1.5246161222457886, "correct_loss_per_char": 0.06660334020853043, "incorrect_loss_per_char": 0.5082053740819296, "correct_loss_per_token": 0.2664133608341217, "incorrect_loss_per_token": 1.5246161222457886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2664133608341217, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.2664133608341217, "logits_per_char": -0.06660334020853043, "num_chars": 4}, {"sum_logits": -1.5246161222457886, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.5246161222457886, "logits_per_char": -0.5082053740819296, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 49, "native_id": 1970, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3907347917556763, "incorrect_loss_raw": 0.3378373384475708, "correct_loss_per_char": 0.4635782639185588, "incorrect_loss_per_char": 0.0844593346118927, "correct_loss_per_token": 1.3907347917556763, "incorrect_loss_per_token": 0.3378373384475708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3378373384475708, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.3378373384475708, "logits_per_char": -0.0844593346118927, "num_chars": 4}, {"sum_logits": -1.3907347917556763, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.3907347917556763, "logits_per_char": -0.4635782639185588, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 50, "native_id": 1455, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.465635061264038, "incorrect_loss_raw": 0.30594509840011597, "correct_loss_per_char": 0.488545020421346, "incorrect_loss_per_char": 0.07648627460002899, "correct_loss_per_token": 1.465635061264038, "incorrect_loss_per_token": 0.30594509840011597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30594509840011597, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.30594509840011597, "logits_per_char": -0.07648627460002899, "num_chars": 4}, {"sum_logits": -1.465635061264038, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.465635061264038, "logits_per_char": -0.488545020421346, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 51, "native_id": 1091, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3211390972137451, "incorrect_loss_raw": 0.3588244915008545, "correct_loss_per_char": 0.44037969907124835, "incorrect_loss_per_char": 0.08970612287521362, "correct_loss_per_token": 1.3211390972137451, "incorrect_loss_per_token": 0.3588244915008545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3588244915008545, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.3588244915008545, "logits_per_char": -0.08970612287521362, "num_chars": 4}, {"sum_logits": -1.3211390972137451, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.3211390972137451, "logits_per_char": -0.44037969907124835, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 52, "native_id": 1020, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1753618717193604, "incorrect_loss_raw": 0.4347723126411438, "correct_loss_per_char": 0.3917872905731201, "incorrect_loss_per_char": 0.10869307816028595, "correct_loss_per_token": 1.1753618717193604, "incorrect_loss_per_token": 0.4347723126411438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4347723126411438, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -0.4347723126411438, "logits_per_char": -0.10869307816028595, "num_chars": 4}, {"sum_logits": -1.1753618717193604, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.1753618717193604, "logits_per_char": -0.3917872905731201, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 53, "native_id": 2684, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7209947109222412, "incorrect_loss_raw": 0.24125917255878448, "correct_loss_per_char": 0.5736649036407471, "incorrect_loss_per_char": 0.06031479313969612, "correct_loss_per_token": 1.7209947109222412, "incorrect_loss_per_token": 0.24125917255878448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24125917255878448, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.24125917255878448, "logits_per_char": -0.06031479313969612, "num_chars": 4}, {"sum_logits": -1.7209947109222412, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.7209947109222412, "logits_per_char": -0.5736649036407471, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 54, "native_id": 819, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8442732095718384, "incorrect_loss_raw": 0.6494188904762268, "correct_loss_per_char": 0.2110683023929596, "incorrect_loss_per_char": 0.2164729634920756, "correct_loss_per_token": 0.8442732095718384, "incorrect_loss_per_token": 0.6494188904762268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8442732095718384, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -0.8442732095718384, "logits_per_char": -0.2110683023929596, "num_chars": 4}, {"sum_logits": -0.6494188904762268, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.6494188904762268, "logits_per_char": -0.2164729634920756, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 55, "native_id": 1857, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4064122140407562, "incorrect_loss_raw": 1.3190326690673828, "correct_loss_per_char": 0.10160305351018906, "incorrect_loss_per_char": 0.43967755635579425, "correct_loss_per_token": 0.4064122140407562, "incorrect_loss_per_token": 1.3190326690673828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4064122140407562, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4064122140407562, "logits_per_char": -0.10160305351018906, "num_chars": 4}, {"sum_logits": -1.3190326690673828, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.3190326690673828, "logits_per_char": -0.43967755635579425, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 56, "native_id": 2171, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.276294469833374, "incorrect_loss_raw": 0.3719325661659241, "correct_loss_per_char": 0.425431489944458, "incorrect_loss_per_char": 0.09298314154148102, "correct_loss_per_token": 1.276294469833374, "incorrect_loss_per_token": 0.3719325661659241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3719325661659241, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.3719325661659241, "logits_per_char": -0.09298314154148102, "num_chars": 4}, {"sum_logits": -1.276294469833374, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.276294469833374, "logits_per_char": -0.425431489944458, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 57, "native_id": 2725, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2044069916009903, "incorrect_loss_raw": 1.7922308444976807, "correct_loss_per_char": 0.051101747900247574, "incorrect_loss_per_char": 0.5974102814992269, "correct_loss_per_token": 0.2044069916009903, "incorrect_loss_per_token": 1.7922308444976807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2044069916009903, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.2044069916009903, "logits_per_char": -0.051101747900247574, "num_chars": 4}, {"sum_logits": -1.7922308444976807, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.7922308444976807, "logits_per_char": -0.5974102814992269, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 58, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0291738510131836, "incorrect_loss_raw": 0.5132186412811279, "correct_loss_per_char": 0.34305795033772785, "incorrect_loss_per_char": 0.12830466032028198, "correct_loss_per_token": 1.0291738510131836, "incorrect_loss_per_token": 0.5132186412811279, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5132186412811279, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.5132186412811279, "logits_per_char": -0.12830466032028198, "num_chars": 4}, {"sum_logits": -1.0291738510131836, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.0291738510131836, "logits_per_char": -0.34305795033772785, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 59, "native_id": 2081, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27416229248046875, "incorrect_loss_raw": 1.583591103553772, "correct_loss_per_char": 0.06854057312011719, "incorrect_loss_per_char": 0.5278637011845907, "correct_loss_per_token": 0.27416229248046875, "incorrect_loss_per_token": 1.583591103553772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27416229248046875, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.27416229248046875, "logits_per_char": -0.06854057312011719, "num_chars": 4}, {"sum_logits": -1.583591103553772, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.583591103553772, "logits_per_char": -0.5278637011845907, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 60, "native_id": 289, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0860650539398193, "incorrect_loss_raw": 0.4928826689720154, "correct_loss_per_char": 0.36202168464660645, "incorrect_loss_per_char": 0.12322066724300385, "correct_loss_per_token": 1.0860650539398193, "incorrect_loss_per_token": 0.4928826689720154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4928826689720154, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": true, "logits_per_token": -0.4928826689720154, "logits_per_char": -0.12322066724300385, "num_chars": 4}, {"sum_logits": -1.0860650539398193, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -1.0860650539398193, "logits_per_char": -0.36202168464660645, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 61, "native_id": 23, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20514415204524994, "incorrect_loss_raw": 1.8326139450073242, "correct_loss_per_char": 0.051286038011312485, "incorrect_loss_per_char": 0.6108713150024414, "correct_loss_per_token": 0.20514415204524994, "incorrect_loss_per_token": 1.8326139450073242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20514415204524994, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.20514415204524994, "logits_per_char": -0.051286038011312485, "num_chars": 4}, {"sum_logits": -1.8326139450073242, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.8326139450073242, "logits_per_char": -0.6108713150024414, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 62, "native_id": 1366, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.02545428276062, "incorrect_loss_raw": 0.19090862572193146, "correct_loss_per_char": 0.6751514275868734, "incorrect_loss_per_char": 0.047727156430482864, "correct_loss_per_token": 2.02545428276062, "incorrect_loss_per_token": 0.19090862572193146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19090862572193146, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.19090862572193146, "logits_per_char": -0.047727156430482864, "num_chars": 4}, {"sum_logits": -2.02545428276062, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -2.02545428276062, "logits_per_char": -0.6751514275868734, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 63, "native_id": 588, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.929520845413208, "incorrect_loss_raw": 0.6263728737831116, "correct_loss_per_char": 0.30984028180440265, "incorrect_loss_per_char": 0.1565932184457779, "correct_loss_per_token": 0.929520845413208, "incorrect_loss_per_token": 0.6263728737831116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6263728737831116, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.6263728737831116, "logits_per_char": -0.1565932184457779, "num_chars": 4}, {"sum_logits": -0.929520845413208, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -0.929520845413208, "logits_per_char": -0.30984028180440265, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 64, "native_id": 2908, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.591020107269287, "incorrect_loss_raw": 0.3292371928691864, "correct_loss_per_char": 0.5303400357564291, "incorrect_loss_per_char": 0.0823092982172966, "correct_loss_per_token": 1.591020107269287, "incorrect_loss_per_token": 0.3292371928691864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3292371928691864, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.3292371928691864, "logits_per_char": -0.0823092982172966, "num_chars": 4}, {"sum_logits": -1.591020107269287, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.591020107269287, "logits_per_char": -0.5303400357564291, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 65, "native_id": 1936, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5488527417182922, "incorrect_loss_raw": 1.088613748550415, "correct_loss_per_char": 0.13721318542957306, "incorrect_loss_per_char": 0.362871249516805, "correct_loss_per_token": 0.5488527417182922, "incorrect_loss_per_token": 1.088613748550415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5488527417182922, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.5488527417182922, "logits_per_char": -0.13721318542957306, "num_chars": 4}, {"sum_logits": -1.088613748550415, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.088613748550415, "logits_per_char": -0.362871249516805, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 66, "native_id": 2692, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44319379329681396, "incorrect_loss_raw": 1.188639521598816, "correct_loss_per_char": 0.11079844832420349, "incorrect_loss_per_char": 0.396213173866272, "correct_loss_per_token": 0.44319379329681396, "incorrect_loss_per_token": 1.188639521598816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44319379329681396, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.44319379329681396, "logits_per_char": -0.11079844832420349, "num_chars": 4}, {"sum_logits": -1.188639521598816, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.188639521598816, "logits_per_char": -0.396213173866272, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 67, "native_id": 1545, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.391083687543869, "incorrect_loss_raw": 1.2734206914901733, "correct_loss_per_char": 0.09777092188596725, "incorrect_loss_per_char": 0.4244735638300578, "correct_loss_per_token": 0.391083687543869, "incorrect_loss_per_token": 1.2734206914901733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.391083687543869, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.391083687543869, "logits_per_char": -0.09777092188596725, "num_chars": 4}, {"sum_logits": -1.2734206914901733, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.2734206914901733, "logits_per_char": -0.4244735638300578, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 68, "native_id": 684, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9862450361251831, "incorrect_loss_raw": 0.5489174723625183, "correct_loss_per_char": 0.32874834537506104, "incorrect_loss_per_char": 0.13722936809062958, "correct_loss_per_token": 0.9862450361251831, "incorrect_loss_per_token": 0.5489174723625183, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5489174723625183, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5489174723625183, "logits_per_char": -0.13722936809062958, "num_chars": 4}, {"sum_logits": -0.9862450361251831, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.9862450361251831, "logits_per_char": -0.32874834537506104, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 69, "native_id": 221, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2715668678283691, "incorrect_loss_raw": 0.36323150992393494, "correct_loss_per_char": 0.42385562260945636, "incorrect_loss_per_char": 0.09080787748098373, "correct_loss_per_token": 1.2715668678283691, "incorrect_loss_per_token": 0.36323150992393494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36323150992393494, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.36323150992393494, "logits_per_char": -0.09080787748098373, "num_chars": 4}, {"sum_logits": -1.2715668678283691, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.2715668678283691, "logits_per_char": -0.42385562260945636, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 70, "native_id": 312, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54640793800354, "incorrect_loss_raw": 0.3030254542827606, "correct_loss_per_char": 0.5154693126678467, "incorrect_loss_per_char": 0.07575636357069016, "correct_loss_per_token": 1.54640793800354, "incorrect_loss_per_token": 0.3030254542827606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3030254542827606, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.3030254542827606, "logits_per_char": -0.07575636357069016, "num_chars": 4}, {"sum_logits": -1.54640793800354, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.54640793800354, "logits_per_char": -0.5154693126678467, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 71, "native_id": 2406, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3521433472633362, "incorrect_loss_raw": 1.426668405532837, "correct_loss_per_char": 0.08803583681583405, "incorrect_loss_per_char": 0.4755561351776123, "correct_loss_per_token": 0.3521433472633362, "incorrect_loss_per_token": 1.426668405532837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3521433472633362, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.3521433472633362, "logits_per_char": -0.08803583681583405, "num_chars": 4}, {"sum_logits": -1.426668405532837, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.426668405532837, "logits_per_char": -0.4755561351776123, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 72, "native_id": 2033, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.09467244148254395, "incorrect_loss_raw": 2.737905263900757, "correct_loss_per_char": 0.023668110370635986, "incorrect_loss_per_char": 0.912635087966919, "correct_loss_per_token": 0.09467244148254395, "incorrect_loss_per_token": 2.737905263900757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.09467244148254395, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.09467244148254395, "logits_per_char": -0.023668110370635986, "num_chars": 4}, {"sum_logits": -2.737905263900757, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -2.737905263900757, "logits_per_char": -0.912635087966919, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 73, "native_id": 671, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3277161419391632, "incorrect_loss_raw": 1.4944672584533691, "correct_loss_per_char": 0.0819290354847908, "incorrect_loss_per_char": 0.49815575281778973, "correct_loss_per_token": 0.3277161419391632, "incorrect_loss_per_token": 1.4944672584533691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3277161419391632, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.3277161419391632, "logits_per_char": -0.0819290354847908, "num_chars": 4}, {"sum_logits": -1.4944672584533691, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.4944672584533691, "logits_per_char": -0.49815575281778973, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 74, "native_id": 308, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4201418161392212, "incorrect_loss_raw": 0.32740435004234314, "correct_loss_per_char": 0.4733806053797404, "incorrect_loss_per_char": 0.08185108751058578, "correct_loss_per_token": 1.4201418161392212, "incorrect_loss_per_token": 0.32740435004234314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32740435004234314, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.32740435004234314, "logits_per_char": -0.08185108751058578, "num_chars": 4}, {"sum_logits": -1.4201418161392212, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.4201418161392212, "logits_per_char": -0.4733806053797404, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 75, "native_id": 2282, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5365564823150635, "incorrect_loss_raw": 0.32823482155799866, "correct_loss_per_char": 0.5121854941050211, "incorrect_loss_per_char": 0.08205870538949966, "correct_loss_per_token": 1.5365564823150635, "incorrect_loss_per_token": 0.32823482155799866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32823482155799866, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.32823482155799866, "logits_per_char": -0.08205870538949966, "num_chars": 4}, {"sum_logits": -1.5365564823150635, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.5365564823150635, "logits_per_char": -0.5121854941050211, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 76, "native_id": 881, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2067742794752121, "incorrect_loss_raw": 1.913257360458374, "correct_loss_per_char": 0.051693569868803024, "incorrect_loss_per_char": 0.6377524534861246, "correct_loss_per_token": 0.2067742794752121, "incorrect_loss_per_token": 1.913257360458374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2067742794752121, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.2067742794752121, "logits_per_char": -0.051693569868803024, "num_chars": 4}, {"sum_logits": -1.913257360458374, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.913257360458374, "logits_per_char": -0.6377524534861246, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 77, "native_id": 590, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37853550910949707, "incorrect_loss_raw": 1.2756315469741821, "correct_loss_per_char": 0.09463387727737427, "incorrect_loss_per_char": 0.42521051565806073, "correct_loss_per_token": 0.37853550910949707, "incorrect_loss_per_token": 1.2756315469741821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37853550910949707, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.37853550910949707, "logits_per_char": -0.09463387727737427, "num_chars": 4}, {"sum_logits": -1.2756315469741821, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.2756315469741821, "logits_per_char": -0.42521051565806073, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 78, "native_id": 111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2007155567407608, "incorrect_loss_raw": 1.8603086471557617, "correct_loss_per_char": 0.0501788891851902, "incorrect_loss_per_char": 0.6201028823852539, "correct_loss_per_token": 0.2007155567407608, "incorrect_loss_per_token": 1.8603086471557617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2007155567407608, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.2007155567407608, "logits_per_char": -0.0501788891851902, "num_chars": 4}, {"sum_logits": -1.8603086471557617, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.8603086471557617, "logits_per_char": -0.6201028823852539, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 79, "native_id": 1418, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24315501749515533, "incorrect_loss_raw": 1.8413907289505005, "correct_loss_per_char": 0.060788754373788834, "incorrect_loss_per_char": 0.6137969096501669, "correct_loss_per_token": 0.24315501749515533, "incorrect_loss_per_token": 1.8413907289505005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24315501749515533, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.24315501749515533, "logits_per_char": -0.060788754373788834, "num_chars": 4}, {"sum_logits": -1.8413907289505005, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.8413907289505005, "logits_per_char": -0.6137969096501669, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 80, "native_id": 3157, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.09638196974992752, "incorrect_loss_raw": 2.778376340866089, "correct_loss_per_char": 0.02409549243748188, "incorrect_loss_per_char": 0.9261254469553629, "correct_loss_per_token": 0.09638196974992752, "incorrect_loss_per_token": 2.778376340866089, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.09638196974992752, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.09638196974992752, "logits_per_char": -0.02409549243748188, "num_chars": 4}, {"sum_logits": -2.778376340866089, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -2.778376340866089, "logits_per_char": -0.9261254469553629, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 81, "native_id": 454, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3793770372867584, "incorrect_loss_raw": 1.3302946090698242, "correct_loss_per_char": 0.0948442593216896, "incorrect_loss_per_char": 0.4434315363566081, "correct_loss_per_token": 0.3793770372867584, "incorrect_loss_per_token": 1.3302946090698242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3793770372867584, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.3793770372867584, "logits_per_char": -0.0948442593216896, "num_chars": 4}, {"sum_logits": -1.3302946090698242, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.3302946090698242, "logits_per_char": -0.4434315363566081, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 82, "native_id": 2169, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0709481239318848, "incorrect_loss_raw": 0.5149736404418945, "correct_loss_per_char": 0.3569827079772949, "incorrect_loss_per_char": 0.12874341011047363, "correct_loss_per_token": 1.0709481239318848, "incorrect_loss_per_token": 0.5149736404418945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5149736404418945, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5149736404418945, "logits_per_char": -0.12874341011047363, "num_chars": 4}, {"sum_logits": -1.0709481239318848, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0709481239318848, "logits_per_char": -0.3569827079772949, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 83, "native_id": 578, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1904006004333496, "incorrect_loss_raw": 0.17238959670066833, "correct_loss_per_char": 0.7301335334777832, "incorrect_loss_per_char": 0.043097399175167084, "correct_loss_per_token": 2.1904006004333496, "incorrect_loss_per_token": 0.17238959670066833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17238959670066833, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.17238959670066833, "logits_per_char": -0.043097399175167084, "num_chars": 4}, {"sum_logits": -2.1904006004333496, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -2.1904006004333496, "logits_per_char": -0.7301335334777832, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 84, "native_id": 2746, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21901114284992218, "incorrect_loss_raw": 1.7296234369277954, "correct_loss_per_char": 0.054752785712480545, "incorrect_loss_per_char": 0.5765411456425985, "correct_loss_per_token": 0.21901114284992218, "incorrect_loss_per_token": 1.7296234369277954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21901114284992218, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.21901114284992218, "logits_per_char": -0.054752785712480545, "num_chars": 4}, {"sum_logits": -1.7296234369277954, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.7296234369277954, "logits_per_char": -0.5765411456425985, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 85, "native_id": 1250, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.369418740272522, "incorrect_loss_raw": 1.2637271881103516, "correct_loss_per_char": 0.0923546850681305, "incorrect_loss_per_char": 0.4212423960367839, "correct_loss_per_token": 0.369418740272522, "incorrect_loss_per_token": 1.2637271881103516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.369418740272522, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.369418740272522, "logits_per_char": -0.0923546850681305, "num_chars": 4}, {"sum_logits": -1.2637271881103516, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.2637271881103516, "logits_per_char": -0.4212423960367839, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 86, "native_id": 1860, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5521727800369263, "incorrect_loss_raw": 1.09517502784729, "correct_loss_per_char": 0.13804319500923157, "incorrect_loss_per_char": 0.36505834261576336, "correct_loss_per_token": 0.5521727800369263, "incorrect_loss_per_token": 1.09517502784729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5521727800369263, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.5521727800369263, "logits_per_char": -0.13804319500923157, "num_chars": 4}, {"sum_logits": -1.09517502784729, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.09517502784729, "logits_per_char": -0.36505834261576336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 87, "native_id": 162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18359841406345367, "incorrect_loss_raw": 2.0176022052764893, "correct_loss_per_char": 0.04589960351586342, "incorrect_loss_per_char": 0.6725340684254965, "correct_loss_per_token": 0.18359841406345367, "incorrect_loss_per_token": 2.0176022052764893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18359841406345367, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.18359841406345367, "logits_per_char": -0.04589960351586342, "num_chars": 4}, {"sum_logits": -2.0176022052764893, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -2.0176022052764893, "logits_per_char": -0.6725340684254965, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 88, "native_id": 1704, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6461193561553955, "incorrect_loss_raw": 0.8947358131408691, "correct_loss_per_char": 0.16152983903884888, "incorrect_loss_per_char": 0.29824527104695636, "correct_loss_per_token": 0.6461193561553955, "incorrect_loss_per_token": 0.8947358131408691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6461193561553955, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.6461193561553955, "logits_per_char": -0.16152983903884888, "num_chars": 4}, {"sum_logits": -0.8947358131408691, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -0.8947358131408691, "logits_per_char": -0.29824527104695636, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 89, "native_id": 1133, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1488397866487503, "incorrect_loss_raw": 2.1328766345977783, "correct_loss_per_char": 0.037209946662187576, "incorrect_loss_per_char": 0.7109588781992594, "correct_loss_per_token": 0.1488397866487503, "incorrect_loss_per_token": 2.1328766345977783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1488397866487503, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.1488397866487503, "logits_per_char": -0.037209946662187576, "num_chars": 4}, {"sum_logits": -2.1328766345977783, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -2.1328766345977783, "logits_per_char": -0.7109588781992594, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 90, "native_id": 2713, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4828599989414215, "incorrect_loss_raw": 1.1023372411727905, "correct_loss_per_char": 0.12071499973535538, "incorrect_loss_per_char": 0.36744574705759686, "correct_loss_per_token": 0.4828599989414215, "incorrect_loss_per_token": 1.1023372411727905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4828599989414215, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.4828599989414215, "logits_per_char": -0.12071499973535538, "num_chars": 4}, {"sum_logits": -1.1023372411727905, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.1023372411727905, "logits_per_char": -0.36744574705759686, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 91, "native_id": 164, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6526764631271362, "incorrect_loss_raw": 0.25082817673683167, "correct_loss_per_char": 0.550892154375712, "incorrect_loss_per_char": 0.06270704418420792, "correct_loss_per_token": 1.6526764631271362, "incorrect_loss_per_token": 0.25082817673683167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25082817673683167, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.25082817673683167, "logits_per_char": -0.06270704418420792, "num_chars": 4}, {"sum_logits": -1.6526764631271362, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6526764631271362, "logits_per_char": -0.550892154375712, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 92, "native_id": 726, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33381739258766174, "incorrect_loss_raw": 1.3935002088546753, "correct_loss_per_char": 0.08345434814691544, "incorrect_loss_per_char": 0.4645000696182251, "correct_loss_per_token": 0.33381739258766174, "incorrect_loss_per_token": 1.3935002088546753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33381739258766174, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.33381739258766174, "logits_per_char": -0.08345434814691544, "num_chars": 4}, {"sum_logits": -1.3935002088546753, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.3935002088546753, "logits_per_char": -0.4645000696182251, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 93, "native_id": 1112, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20461122691631317, "incorrect_loss_raw": 1.7704806327819824, "correct_loss_per_char": 0.05115280672907829, "incorrect_loss_per_char": 0.5901602109273275, "correct_loss_per_token": 0.20461122691631317, "incorrect_loss_per_token": 1.7704806327819824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20461122691631317, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.20461122691631317, "logits_per_char": -0.05115280672907829, "num_chars": 4}, {"sum_logits": -1.7704806327819824, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.7704806327819824, "logits_per_char": -0.5901602109273275, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 94, "native_id": 633, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2600661516189575, "incorrect_loss_raw": 0.3954412043094635, "correct_loss_per_char": 0.4200220505396525, "incorrect_loss_per_char": 0.09886030107736588, "correct_loss_per_token": 1.2600661516189575, "incorrect_loss_per_token": 0.3954412043094635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3954412043094635, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.3954412043094635, "logits_per_char": -0.09886030107736588, "num_chars": 4}, {"sum_logits": -1.2600661516189575, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -1.2600661516189575, "logits_per_char": -0.4200220505396525, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 95, "native_id": 1229, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4159623086452484, "incorrect_loss_raw": 1.2112932205200195, "correct_loss_per_char": 0.1039905771613121, "incorrect_loss_per_char": 0.40376440684000653, "correct_loss_per_token": 0.4159623086452484, "incorrect_loss_per_token": 1.2112932205200195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4159623086452484, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.4159623086452484, "logits_per_char": -0.1039905771613121, "num_chars": 4}, {"sum_logits": -1.2112932205200195, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.2112932205200195, "logits_per_char": -0.40376440684000653, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 96, "native_id": 3175, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22542832791805267, "incorrect_loss_raw": 1.788995623588562, "correct_loss_per_char": 0.05635708197951317, "incorrect_loss_per_char": 0.5963318745295206, "correct_loss_per_token": 0.22542832791805267, "incorrect_loss_per_token": 1.788995623588562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22542832791805267, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.22542832791805267, "logits_per_char": -0.05635708197951317, "num_chars": 4}, {"sum_logits": -1.788995623588562, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.788995623588562, "logits_per_char": -0.5963318745295206, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 97, "native_id": 1902, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1263298988342285, "incorrect_loss_raw": 0.46481794118881226, "correct_loss_per_char": 0.3754432996114095, "incorrect_loss_per_char": 0.11620448529720306, "correct_loss_per_token": 1.1263298988342285, "incorrect_loss_per_token": 0.46481794118881226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46481794118881226, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.46481794118881226, "logits_per_char": -0.11620448529720306, "num_chars": 4}, {"sum_logits": -1.1263298988342285, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.1263298988342285, "logits_per_char": -0.3754432996114095, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 98, "native_id": 168, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1962175369262695, "incorrect_loss_raw": 0.403886079788208, "correct_loss_per_char": 0.39873917897542316, "incorrect_loss_per_char": 0.100971519947052, "correct_loss_per_token": 1.1962175369262695, "incorrect_loss_per_token": 0.403886079788208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.403886079788208, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.403886079788208, "logits_per_char": -0.100971519947052, "num_chars": 4}, {"sum_logits": -1.1962175369262695, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.1962175369262695, "logits_per_char": -0.39873917897542316, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 99, "native_id": 2306, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24675294756889343, "incorrect_loss_raw": 1.7364788055419922, "correct_loss_per_char": 0.06168823689222336, "incorrect_loss_per_char": 0.5788262685139974, "correct_loss_per_token": 0.24675294756889343, "incorrect_loss_per_token": 1.7364788055419922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24675294756889343, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": true, "logits_per_token": -0.24675294756889343, "logits_per_char": -0.06168823689222336, "num_chars": 4}, {"sum_logits": -1.7364788055419922, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.7364788055419922, "logits_per_char": -0.5788262685139974, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 100, "native_id": 1581, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29249027371406555, "incorrect_loss_raw": 1.5404173135757446, "correct_loss_per_char": 0.07312256842851639, "incorrect_loss_per_char": 0.5134724378585815, "correct_loss_per_token": 0.29249027371406555, "incorrect_loss_per_token": 1.5404173135757446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29249027371406555, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.29249027371406555, "logits_per_char": -0.07312256842851639, "num_chars": 4}, {"sum_logits": -1.5404173135757446, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.5404173135757446, "logits_per_char": -0.5134724378585815, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 101, "native_id": 3130, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1874682903289795, "incorrect_loss_raw": 0.4192968010902405, "correct_loss_per_char": 0.39582276344299316, "incorrect_loss_per_char": 0.10482420027256012, "correct_loss_per_token": 1.1874682903289795, "incorrect_loss_per_token": 0.4192968010902405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4192968010902405, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4192968010902405, "logits_per_char": -0.10482420027256012, "num_chars": 4}, {"sum_logits": -1.1874682903289795, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.1874682903289795, "logits_per_char": -0.39582276344299316, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 102, "native_id": 1431, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3553008437156677, "incorrect_loss_raw": 1.4123117923736572, "correct_loss_per_char": 0.08882521092891693, "incorrect_loss_per_char": 0.47077059745788574, "correct_loss_per_token": 0.3553008437156677, "incorrect_loss_per_token": 1.4123117923736572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3553008437156677, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.3553008437156677, "logits_per_char": -0.08882521092891693, "num_chars": 4}, {"sum_logits": -1.4123117923736572, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.4123117923736572, "logits_per_char": -0.47077059745788574, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 103, "native_id": 2031, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16323338449001312, "incorrect_loss_raw": 2.0951907634735107, "correct_loss_per_char": 0.04080834612250328, "incorrect_loss_per_char": 0.6983969211578369, "correct_loss_per_token": 0.16323338449001312, "incorrect_loss_per_token": 2.0951907634735107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16323338449001312, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.16323338449001312, "logits_per_char": -0.04080834612250328, "num_chars": 4}, {"sum_logits": -2.0951907634735107, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -2.0951907634735107, "logits_per_char": -0.6983969211578369, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 104, "native_id": 1399, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.307879090309143, "incorrect_loss_raw": 0.4053937494754791, "correct_loss_per_char": 0.43595969676971436, "incorrect_loss_per_char": 0.10134843736886978, "correct_loss_per_token": 1.307879090309143, "incorrect_loss_per_token": 0.4053937494754791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4053937494754791, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.4053937494754791, "logits_per_char": -0.10134843736886978, "num_chars": 4}, {"sum_logits": -1.307879090309143, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -1.307879090309143, "logits_per_char": -0.43595969676971436, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 105, "native_id": 2387, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7645000219345093, "incorrect_loss_raw": 0.23074029386043549, "correct_loss_per_char": 0.5881666739781698, "incorrect_loss_per_char": 0.05768507346510887, "correct_loss_per_token": 1.7645000219345093, "incorrect_loss_per_token": 0.23074029386043549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23074029386043549, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.23074029386043549, "logits_per_char": -0.05768507346510887, "num_chars": 4}, {"sum_logits": -1.7645000219345093, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.7645000219345093, "logits_per_char": -0.5881666739781698, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 106, "native_id": 1917, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17284530401229858, "incorrect_loss_raw": 2.226663827896118, "correct_loss_per_char": 0.043211326003074646, "incorrect_loss_per_char": 0.7422212759653727, "correct_loss_per_token": 0.17284530401229858, "incorrect_loss_per_token": 2.226663827896118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17284530401229858, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.17284530401229858, "logits_per_char": -0.043211326003074646, "num_chars": 4}, {"sum_logits": -2.226663827896118, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -2.226663827896118, "logits_per_char": -0.7422212759653727, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 107, "native_id": 1949, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2321350574493408, "incorrect_loss_raw": 0.4272080957889557, "correct_loss_per_char": 0.41071168581644696, "incorrect_loss_per_char": 0.10680202394723892, "correct_loss_per_token": 1.2321350574493408, "incorrect_loss_per_token": 0.4272080957889557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4272080957889557, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.4272080957889557, "logits_per_char": -0.10680202394723892, "num_chars": 4}, {"sum_logits": -1.2321350574493408, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.2321350574493408, "logits_per_char": -0.41071168581644696, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 108, "native_id": 185, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1996544450521469, "incorrect_loss_raw": 1.9036283493041992, "correct_loss_per_char": 0.04991361126303673, "incorrect_loss_per_char": 0.6345427831013998, "correct_loss_per_token": 0.1996544450521469, "incorrect_loss_per_token": 1.9036283493041992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1996544450521469, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.1996544450521469, "logits_per_char": -0.04991361126303673, "num_chars": 4}, {"sum_logits": -1.9036283493041992, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.9036283493041992, "logits_per_char": -0.6345427831013998, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 109, "native_id": 1928, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17682577669620514, "incorrect_loss_raw": 2.11171293258667, "correct_loss_per_char": 0.044206444174051285, "incorrect_loss_per_char": 0.7039043108622233, "correct_loss_per_token": 0.17682577669620514, "incorrect_loss_per_token": 2.11171293258667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17682577669620514, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.17682577669620514, "logits_per_char": -0.044206444174051285, "num_chars": 4}, {"sum_logits": -2.11171293258667, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -2.11171293258667, "logits_per_char": -0.7039043108622233, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 110, "native_id": 2436, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5952368378639221, "incorrect_loss_raw": 0.8933083415031433, "correct_loss_per_char": 0.14880920946598053, "incorrect_loss_per_char": 0.2977694471677144, "correct_loss_per_token": 0.5952368378639221, "incorrect_loss_per_token": 0.8933083415031433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5952368378639221, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": true, "logits_per_token": -0.5952368378639221, "logits_per_char": -0.14880920946598053, "num_chars": 4}, {"sum_logits": -0.8933083415031433, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": false, "logits_per_token": -0.8933083415031433, "logits_per_char": -0.2977694471677144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 111, "native_id": 696, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44392746686935425, "incorrect_loss_raw": 1.1821813583374023, "correct_loss_per_char": 0.11098186671733856, "incorrect_loss_per_char": 0.3940604527791341, "correct_loss_per_token": 0.44392746686935425, "incorrect_loss_per_token": 1.1821813583374023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44392746686935425, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.44392746686935425, "logits_per_char": -0.11098186671733856, "num_chars": 4}, {"sum_logits": -1.1821813583374023, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.1821813583374023, "logits_per_char": -0.3940604527791341, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 112, "native_id": 1800, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5379742383956909, "incorrect_loss_raw": 1.0432451963424683, "correct_loss_per_char": 0.13449355959892273, "incorrect_loss_per_char": 0.34774839878082275, "correct_loss_per_token": 0.5379742383956909, "incorrect_loss_per_token": 1.0432451963424683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5379742383956909, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.5379742383956909, "logits_per_char": -0.13449355959892273, "num_chars": 4}, {"sum_logits": -1.0432451963424683, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.0432451963424683, "logits_per_char": -0.34774839878082275, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 113, "native_id": 3004, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9061881303787231, "incorrect_loss_raw": 0.6755712032318115, "correct_loss_per_char": 0.30206271012624103, "incorrect_loss_per_char": 0.16889280080795288, "correct_loss_per_token": 0.9061881303787231, "incorrect_loss_per_token": 0.6755712032318115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6755712032318115, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.6755712032318115, "logits_per_char": -0.16889280080795288, "num_chars": 4}, {"sum_logits": -0.9061881303787231, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -0.9061881303787231, "logits_per_char": -0.30206271012624103, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 114, "native_id": 2126, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27117547392845154, "incorrect_loss_raw": 1.6588891744613647, "correct_loss_per_char": 0.06779386848211288, "incorrect_loss_per_char": 0.5529630581537882, "correct_loss_per_token": 0.27117547392845154, "incorrect_loss_per_token": 1.6588891744613647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27117547392845154, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.27117547392845154, "logits_per_char": -0.06779386848211288, "num_chars": 4}, {"sum_logits": -1.6588891744613647, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.6588891744613647, "logits_per_char": -0.5529630581537882, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 115, "native_id": 1793, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2144855409860611, "incorrect_loss_raw": 1.8961889743804932, "correct_loss_per_char": 0.053621385246515274, "incorrect_loss_per_char": 0.6320629914601644, "correct_loss_per_token": 0.2144855409860611, "incorrect_loss_per_token": 1.8961889743804932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2144855409860611, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.2144855409860611, "logits_per_char": -0.053621385246515274, "num_chars": 4}, {"sum_logits": -1.8961889743804932, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.8961889743804932, "logits_per_char": -0.6320629914601644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 116, "native_id": 1211, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2769269943237305, "incorrect_loss_raw": 0.4380030035972595, "correct_loss_per_char": 0.42564233144124347, "incorrect_loss_per_char": 0.10950075089931488, "correct_loss_per_token": 1.2769269943237305, "incorrect_loss_per_token": 0.4380030035972595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4380030035972595, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.4380030035972595, "logits_per_char": -0.10950075089931488, "num_chars": 4}, {"sum_logits": -1.2769269943237305, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.2769269943237305, "logits_per_char": -0.42564233144124347, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 117, "native_id": 1126, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1325011253356934, "incorrect_loss_raw": 0.483429878950119, "correct_loss_per_char": 0.37750037511189777, "incorrect_loss_per_char": 0.12085746973752975, "correct_loss_per_token": 1.1325011253356934, "incorrect_loss_per_token": 0.483429878950119, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.483429878950119, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.483429878950119, "logits_per_char": -0.12085746973752975, "num_chars": 4}, {"sum_logits": -1.1325011253356934, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.1325011253356934, "logits_per_char": -0.37750037511189777, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 118, "native_id": 507, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9146512746810913, "incorrect_loss_raw": 0.617286741733551, "correct_loss_per_char": 0.30488375822703045, "incorrect_loss_per_char": 0.15432168543338776, "correct_loss_per_token": 0.9146512746810913, "incorrect_loss_per_token": 0.617286741733551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.617286741733551, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.617286741733551, "logits_per_char": -0.15432168543338776, "num_chars": 4}, {"sum_logits": -0.9146512746810913, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -0.9146512746810913, "logits_per_char": -0.30488375822703045, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 119, "native_id": 760, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5566046237945557, "incorrect_loss_raw": 1.1799325942993164, "correct_loss_per_char": 0.13915115594863892, "incorrect_loss_per_char": 0.3933108647664388, "correct_loss_per_token": 0.5566046237945557, "incorrect_loss_per_token": 1.1799325942993164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5566046237945557, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.5566046237945557, "logits_per_char": -0.13915115594863892, "num_chars": 4}, {"sum_logits": -1.1799325942993164, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.1799325942993164, "logits_per_char": -0.3933108647664388, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 120, "native_id": 1705, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.811285138130188, "incorrect_loss_raw": 0.6952874660491943, "correct_loss_per_char": 0.202821284532547, "incorrect_loss_per_char": 0.2317624886830648, "correct_loss_per_token": 0.811285138130188, "incorrect_loss_per_token": 0.6952874660491943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.811285138130188, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -0.811285138130188, "logits_per_char": -0.202821284532547, "num_chars": 4}, {"sum_logits": -0.6952874660491943, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.6952874660491943, "logits_per_char": -0.2317624886830648, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 121, "native_id": 1786, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3344864249229431, "incorrect_loss_raw": 1.4874236583709717, "correct_loss_per_char": 0.08362160623073578, "incorrect_loss_per_char": 0.4958078861236572, "correct_loss_per_token": 0.3344864249229431, "incorrect_loss_per_token": 1.4874236583709717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3344864249229431, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.3344864249229431, "logits_per_char": -0.08362160623073578, "num_chars": 4}, {"sum_logits": -1.4874236583709717, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.4874236583709717, "logits_per_char": -0.4958078861236572, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 122, "native_id": 489, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0740766525268555, "incorrect_loss_raw": 0.4759710729122162, "correct_loss_per_char": 0.35802555084228516, "incorrect_loss_per_char": 0.11899276822805405, "correct_loss_per_token": 1.0740766525268555, "incorrect_loss_per_token": 0.4759710729122162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4759710729122162, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.4759710729122162, "logits_per_char": -0.11899276822805405, "num_chars": 4}, {"sum_logits": -1.0740766525268555, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.0740766525268555, "logits_per_char": -0.35802555084228516, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 123, "native_id": 2170, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5066723823547363, "incorrect_loss_raw": 0.32411178946495056, "correct_loss_per_char": 0.5022241274515787, "incorrect_loss_per_char": 0.08102794736623764, "correct_loss_per_token": 1.5066723823547363, "incorrect_loss_per_token": 0.32411178946495056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32411178946495056, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.32411178946495056, "logits_per_char": -0.08102794736623764, "num_chars": 4}, {"sum_logits": -1.5066723823547363, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.5066723823547363, "logits_per_char": -0.5022241274515787, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 124, "native_id": 422, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7460163831710815, "incorrect_loss_raw": 0.7638234496116638, "correct_loss_per_char": 0.24867212772369385, "incorrect_loss_per_char": 0.19095586240291595, "correct_loss_per_token": 0.7460163831710815, "incorrect_loss_per_token": 0.7638234496116638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7638234496116638, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -0.7638234496116638, "logits_per_char": -0.19095586240291595, "num_chars": 4}, {"sum_logits": -0.7460163831710815, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.7460163831710815, "logits_per_char": -0.24867212772369385, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 125, "native_id": 1987, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36710575222969055, "incorrect_loss_raw": 1.424883246421814, "correct_loss_per_char": 0.09177643805742264, "incorrect_loss_per_char": 0.4749610821406047, "correct_loss_per_token": 0.36710575222969055, "incorrect_loss_per_token": 1.424883246421814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36710575222969055, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.36710575222969055, "logits_per_char": -0.09177643805742264, "num_chars": 4}, {"sum_logits": -1.424883246421814, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.424883246421814, "logits_per_char": -0.4749610821406047, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 126, "native_id": 1543, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3902393579483032, "incorrect_loss_raw": 0.3417406380176544, "correct_loss_per_char": 0.4634131193161011, "incorrect_loss_per_char": 0.0854351595044136, "correct_loss_per_token": 1.3902393579483032, "incorrect_loss_per_token": 0.3417406380176544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3417406380176544, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.3417406380176544, "logits_per_char": -0.0854351595044136, "num_chars": 4}, {"sum_logits": -1.3902393579483032, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -1.3902393579483032, "logits_per_char": -0.4634131193161011, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 127, "native_id": 2688, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9109987020492554, "incorrect_loss_raw": 0.1824399083852768, "correct_loss_per_char": 0.6369995673497518, "incorrect_loss_per_char": 0.0456099770963192, "correct_loss_per_token": 1.9109987020492554, "incorrect_loss_per_token": 0.1824399083852768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1824399083852768, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.1824399083852768, "logits_per_char": -0.0456099770963192, "num_chars": 4}, {"sum_logits": -1.9109987020492554, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.9109987020492554, "logits_per_char": -0.6369995673497518, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 128, "native_id": 1046, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6688045859336853, "incorrect_loss_raw": 0.8402540683746338, "correct_loss_per_char": 0.16720114648342133, "incorrect_loss_per_char": 0.28008468945821124, "correct_loss_per_token": 0.6688045859336853, "incorrect_loss_per_token": 0.8402540683746338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6688045859336853, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.6688045859336853, "logits_per_char": -0.16720114648342133, "num_chars": 4}, {"sum_logits": -0.8402540683746338, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -0.8402540683746338, "logits_per_char": -0.28008468945821124, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 129, "native_id": 2625, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49173203110694885, "incorrect_loss_raw": 1.0828450918197632, "correct_loss_per_char": 0.16391067703564963, "incorrect_loss_per_char": 0.2707112729549408, "correct_loss_per_token": 0.49173203110694885, "incorrect_loss_per_token": 1.0828450918197632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0828450918197632, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -1.0828450918197632, "logits_per_char": -0.2707112729549408, "num_chars": 4}, {"sum_logits": -0.49173203110694885, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -0.49173203110694885, "logits_per_char": -0.16391067703564963, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 130, "native_id": 784, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7028936743736267, "incorrect_loss_raw": 0.7578234672546387, "correct_loss_per_char": 0.23429789145787558, "incorrect_loss_per_char": 0.18945586681365967, "correct_loss_per_token": 0.7028936743736267, "incorrect_loss_per_token": 0.7578234672546387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7578234672546387, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -0.7578234672546387, "logits_per_char": -0.18945586681365967, "num_chars": 4}, {"sum_logits": -0.7028936743736267, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.7028936743736267, "logits_per_char": -0.23429789145787558, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 131, "native_id": 1414, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4667505621910095, "incorrect_loss_raw": 1.1253284215927124, "correct_loss_per_char": 0.11668764054775238, "incorrect_loss_per_char": 0.3751094738642375, "correct_loss_per_token": 0.4667505621910095, "incorrect_loss_per_token": 1.1253284215927124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4667505621910095, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.4667505621910095, "logits_per_char": -0.11668764054775238, "num_chars": 4}, {"sum_logits": -1.1253284215927124, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.1253284215927124, "logits_per_char": -0.3751094738642375, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 132, "native_id": 443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19480228424072266, "incorrect_loss_raw": 1.9451639652252197, "correct_loss_per_char": 0.048700571060180664, "incorrect_loss_per_char": 0.6483879884084066, "correct_loss_per_token": 0.19480228424072266, "incorrect_loss_per_token": 1.9451639652252197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19480228424072266, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.19480228424072266, "logits_per_char": -0.048700571060180664, "num_chars": 4}, {"sum_logits": -1.9451639652252197, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.9451639652252197, "logits_per_char": -0.6483879884084066, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 133, "native_id": 2878, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0758192539215088, "incorrect_loss_raw": 0.5066338777542114, "correct_loss_per_char": 0.2689548134803772, "incorrect_loss_per_char": 0.1688779592514038, "correct_loss_per_token": 1.0758192539215088, "incorrect_loss_per_token": 0.5066338777542114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0758192539215088, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": false, "logits_per_token": -1.0758192539215088, "logits_per_char": -0.2689548134803772, "num_chars": 4}, {"sum_logits": -0.5066338777542114, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": true, "logits_per_token": -0.5066338777542114, "logits_per_char": -0.1688779592514038, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 134, "native_id": 2867, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.403616189956665, "incorrect_loss_raw": 0.34348586201667786, "correct_loss_per_char": 0.46787206331888836, "incorrect_loss_per_char": 0.08587146550416946, "correct_loss_per_token": 1.403616189956665, "incorrect_loss_per_token": 0.34348586201667786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34348586201667786, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.34348586201667786, "logits_per_char": -0.08587146550416946, "num_chars": 4}, {"sum_logits": -1.403616189956665, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.403616189956665, "logits_per_char": -0.46787206331888836, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 135, "native_id": 643, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5244249105453491, "incorrect_loss_raw": 1.1340358257293701, "correct_loss_per_char": 0.13110622763633728, "incorrect_loss_per_char": 0.37801194190979004, "correct_loss_per_token": 0.5244249105453491, "incorrect_loss_per_token": 1.1340358257293701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5244249105453491, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.5244249105453491, "logits_per_char": -0.13110622763633728, "num_chars": 4}, {"sum_logits": -1.1340358257293701, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.1340358257293701, "logits_per_char": -0.37801194190979004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 136, "native_id": 2377, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14726576209068298, "incorrect_loss_raw": 2.241745710372925, "correct_loss_per_char": 0.036816440522670746, "incorrect_loss_per_char": 0.7472485701243082, "correct_loss_per_token": 0.14726576209068298, "incorrect_loss_per_token": 2.241745710372925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14726576209068298, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.14726576209068298, "logits_per_char": -0.036816440522670746, "num_chars": 4}, {"sum_logits": -2.241745710372925, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -2.241745710372925, "logits_per_char": -0.7472485701243082, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 137, "native_id": 1103, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.151376485824585, "incorrect_loss_raw": 0.4996231198310852, "correct_loss_per_char": 0.3837921619415283, "incorrect_loss_per_char": 0.1249057799577713, "correct_loss_per_token": 1.151376485824585, "incorrect_loss_per_token": 0.4996231198310852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4996231198310852, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4996231198310852, "logits_per_char": -0.1249057799577713, "num_chars": 4}, {"sum_logits": -1.151376485824585, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.151376485824585, "logits_per_char": -0.3837921619415283, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 138, "native_id": 634, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1462384462356567, "incorrect_loss_raw": 0.4652518033981323, "correct_loss_per_char": 0.38207948207855225, "incorrect_loss_per_char": 0.11631295084953308, "correct_loss_per_token": 1.1462384462356567, "incorrect_loss_per_token": 0.4652518033981323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4652518033981323, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": true, "logits_per_token": -0.4652518033981323, "logits_per_char": -0.11631295084953308, "num_chars": 4}, {"sum_logits": -1.1462384462356567, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.1462384462356567, "logits_per_char": -0.38207948207855225, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 139, "native_id": 2949, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28969627618789673, "incorrect_loss_raw": 1.5353355407714844, "correct_loss_per_char": 0.07242406904697418, "incorrect_loss_per_char": 0.5117785135904948, "correct_loss_per_token": 0.28969627618789673, "incorrect_loss_per_token": 1.5353355407714844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28969627618789673, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.28969627618789673, "logits_per_char": -0.07242406904697418, "num_chars": 4}, {"sum_logits": -1.5353355407714844, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.5353355407714844, "logits_per_char": -0.5117785135904948, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 140, "native_id": 1325, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8147688508033752, "incorrect_loss_raw": 0.6600760221481323, "correct_loss_per_char": 0.27158961693445843, "incorrect_loss_per_char": 0.16501900553703308, "correct_loss_per_token": 0.8147688508033752, "incorrect_loss_per_token": 0.6600760221481323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6600760221481323, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.6600760221481323, "logits_per_char": -0.16501900553703308, "num_chars": 4}, {"sum_logits": -0.8147688508033752, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -0.8147688508033752, "logits_per_char": -0.27158961693445843, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 141, "native_id": 1829, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.35401660203933716, "incorrect_loss_raw": 1.344321846961975, "correct_loss_per_char": 0.08850415050983429, "incorrect_loss_per_char": 0.4481072823206584, "correct_loss_per_token": 0.35401660203933716, "incorrect_loss_per_token": 1.344321846961975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35401660203933716, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.35401660203933716, "logits_per_char": -0.08850415050983429, "num_chars": 4}, {"sum_logits": -1.344321846961975, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.344321846961975, "logits_per_char": -0.4481072823206584, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 142, "native_id": 2951, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30736058950424194, "incorrect_loss_raw": 1.5480015277862549, "correct_loss_per_char": 0.07684014737606049, "incorrect_loss_per_char": 0.516000509262085, "correct_loss_per_token": 0.30736058950424194, "incorrect_loss_per_token": 1.5480015277862549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30736058950424194, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -0.30736058950424194, "logits_per_char": -0.07684014737606049, "num_chars": 4}, {"sum_logits": -1.5480015277862549, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -1.5480015277862549, "logits_per_char": -0.516000509262085, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 143, "native_id": 3209, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16189490258693695, "incorrect_loss_raw": 2.239610195159912, "correct_loss_per_char": 0.04047372564673424, "incorrect_loss_per_char": 0.7465367317199707, "correct_loss_per_token": 0.16189490258693695, "incorrect_loss_per_token": 2.239610195159912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16189490258693695, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.16189490258693695, "logits_per_char": -0.04047372564673424, "num_chars": 4}, {"sum_logits": -2.239610195159912, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -2.239610195159912, "logits_per_char": -0.7465367317199707, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 144, "native_id": 321, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2278348058462143, "incorrect_loss_raw": 1.7662423849105835, "correct_loss_per_char": 0.056958701461553574, "incorrect_loss_per_char": 0.5887474616368612, "correct_loss_per_token": 0.2278348058462143, "incorrect_loss_per_token": 1.7662423849105835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2278348058462143, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.2278348058462143, "logits_per_char": -0.056958701461553574, "num_chars": 4}, {"sum_logits": -1.7662423849105835, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.7662423849105835, "logits_per_char": -0.5887474616368612, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 145, "native_id": 1618, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0175905227661133, "incorrect_loss_raw": 0.5362159609794617, "correct_loss_per_char": 0.3391968409220378, "incorrect_loss_per_char": 0.13405399024486542, "correct_loss_per_token": 1.0175905227661133, "incorrect_loss_per_token": 0.5362159609794617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5362159609794617, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.5362159609794617, "logits_per_char": -0.13405399024486542, "num_chars": 4}, {"sum_logits": -1.0175905227661133, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -1.0175905227661133, "logits_per_char": -0.3391968409220378, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 146, "native_id": 877, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38359779119491577, "incorrect_loss_raw": 1.247079610824585, "correct_loss_per_char": 0.09589944779872894, "incorrect_loss_per_char": 0.415693203608195, "correct_loss_per_token": 0.38359779119491577, "incorrect_loss_per_token": 1.247079610824585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38359779119491577, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.38359779119491577, "logits_per_char": -0.09589944779872894, "num_chars": 4}, {"sum_logits": -1.247079610824585, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.247079610824585, "logits_per_char": -0.415693203608195, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 147, "native_id": 195, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.41101112961769104, "incorrect_loss_raw": 1.1811823844909668, "correct_loss_per_char": 0.10275278240442276, "incorrect_loss_per_char": 0.39372746149698895, "correct_loss_per_token": 0.41101112961769104, "incorrect_loss_per_token": 1.1811823844909668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41101112961769104, "num_tokens": 1, "num_tokens_all": 1270, "is_greedy": true, "logits_per_token": -0.41101112961769104, "logits_per_char": -0.10275278240442276, "num_chars": 4}, {"sum_logits": -1.1811823844909668, "num_tokens": 1, "num_tokens_all": 1270, "is_greedy": false, "logits_per_token": -1.1811823844909668, "logits_per_char": -0.39372746149698895, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 148, "native_id": 1172, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.296902060508728, "incorrect_loss_raw": 0.4172477722167969, "correct_loss_per_char": 0.4323006868362427, "incorrect_loss_per_char": 0.10431194305419922, "correct_loss_per_token": 1.296902060508728, "incorrect_loss_per_token": 0.4172477722167969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4172477722167969, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.4172477722167969, "logits_per_char": -0.10431194305419922, "num_chars": 4}, {"sum_logits": -1.296902060508728, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.296902060508728, "logits_per_char": -0.4323006868362427, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 149, "native_id": 155, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8683995604515076, "incorrect_loss_raw": 0.6882686018943787, "correct_loss_per_char": 0.2170998901128769, "incorrect_loss_per_char": 0.22942286729812622, "correct_loss_per_token": 0.8683995604515076, "incorrect_loss_per_token": 0.6882686018943787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8683995604515076, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.8683995604515076, "logits_per_char": -0.2170998901128769, "num_chars": 4}, {"sum_logits": -0.6882686018943787, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.6882686018943787, "logits_per_char": -0.22942286729812622, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 150, "native_id": 898, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22696970403194427, "incorrect_loss_raw": 1.8481580018997192, "correct_loss_per_char": 0.05674242600798607, "incorrect_loss_per_char": 0.6160526672999064, "correct_loss_per_token": 0.22696970403194427, "incorrect_loss_per_token": 1.8481580018997192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22696970403194427, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.22696970403194427, "logits_per_char": -0.05674242600798607, "num_chars": 4}, {"sum_logits": -1.8481580018997192, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.8481580018997192, "logits_per_char": -0.6160526672999064, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 151, "native_id": 2075, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5664156675338745, "incorrect_loss_raw": 0.256670206785202, "correct_loss_per_char": 0.5221385558446249, "incorrect_loss_per_char": 0.0641675516963005, "correct_loss_per_token": 1.5664156675338745, "incorrect_loss_per_token": 0.256670206785202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.256670206785202, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.256670206785202, "logits_per_char": -0.0641675516963005, "num_chars": 4}, {"sum_logits": -1.5664156675338745, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.5664156675338745, "logits_per_char": -0.5221385558446249, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 152, "native_id": 359, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4497263431549072, "incorrect_loss_raw": 0.339224636554718, "correct_loss_per_char": 0.48324211438496906, "incorrect_loss_per_char": 0.0848061591386795, "correct_loss_per_token": 1.4497263431549072, "incorrect_loss_per_token": 0.339224636554718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.339224636554718, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.339224636554718, "logits_per_char": -0.0848061591386795, "num_chars": 4}, {"sum_logits": -1.4497263431549072, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.4497263431549072, "logits_per_char": -0.48324211438496906, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 153, "native_id": 2864, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39003682136535645, "incorrect_loss_raw": 1.2916796207427979, "correct_loss_per_char": 0.09750920534133911, "incorrect_loss_per_char": 0.4305598735809326, "correct_loss_per_token": 0.39003682136535645, "incorrect_loss_per_token": 1.2916796207427979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39003682136535645, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.39003682136535645, "logits_per_char": -0.09750920534133911, "num_chars": 4}, {"sum_logits": -1.2916796207427979, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.2916796207427979, "logits_per_char": -0.4305598735809326, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 154, "native_id": 1298, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2471916377544403, "incorrect_loss_raw": 1.6414436101913452, "correct_loss_per_char": 0.06179790943861008, "incorrect_loss_per_char": 0.5471478700637817, "correct_loss_per_token": 0.2471916377544403, "incorrect_loss_per_token": 1.6414436101913452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2471916377544403, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2471916377544403, "logits_per_char": -0.06179790943861008, "num_chars": 4}, {"sum_logits": -1.6414436101913452, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.6414436101913452, "logits_per_char": -0.5471478700637817, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 155, "native_id": 1251, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23484385013580322, "incorrect_loss_raw": 1.8594396114349365, "correct_loss_per_char": 0.058710962533950806, "incorrect_loss_per_char": 0.6198132038116455, "correct_loss_per_token": 0.23484385013580322, "incorrect_loss_per_token": 1.8594396114349365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23484385013580322, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.23484385013580322, "logits_per_char": -0.058710962533950806, "num_chars": 4}, {"sum_logits": -1.8594396114349365, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.8594396114349365, "logits_per_char": -0.6198132038116455, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 156, "native_id": 1887, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19672846794128418, "incorrect_loss_raw": 2.1279067993164062, "correct_loss_per_char": 0.049182116985321045, "incorrect_loss_per_char": 0.7093022664388021, "correct_loss_per_token": 0.19672846794128418, "incorrect_loss_per_token": 2.1279067993164062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19672846794128418, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.19672846794128418, "logits_per_char": -0.049182116985321045, "num_chars": 4}, {"sum_logits": -2.1279067993164062, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -2.1279067993164062, "logits_per_char": -0.7093022664388021, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 157, "native_id": 271, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1932939291000366, "incorrect_loss_raw": 0.431530624628067, "correct_loss_per_char": 0.3977646430333455, "incorrect_loss_per_char": 0.10788265615701675, "correct_loss_per_token": 1.1932939291000366, "incorrect_loss_per_token": 0.431530624628067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.431530624628067, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.431530624628067, "logits_per_char": -0.10788265615701675, "num_chars": 4}, {"sum_logits": -1.1932939291000366, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.1932939291000366, "logits_per_char": -0.3977646430333455, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 158, "native_id": 2396, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20651483535766602, "incorrect_loss_raw": 1.9215552806854248, "correct_loss_per_char": 0.051628708839416504, "incorrect_loss_per_char": 0.6405184268951416, "correct_loss_per_token": 0.20651483535766602, "incorrect_loss_per_token": 1.9215552806854248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20651483535766602, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.20651483535766602, "logits_per_char": -0.051628708839416504, "num_chars": 4}, {"sum_logits": -1.9215552806854248, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.9215552806854248, "logits_per_char": -0.6405184268951416, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 159, "native_id": 1054, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5001388788223267, "incorrect_loss_raw": 1.0350538492202759, "correct_loss_per_char": 0.12503471970558167, "incorrect_loss_per_char": 0.345017949740092, "correct_loss_per_token": 0.5001388788223267, "incorrect_loss_per_token": 1.0350538492202759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5001388788223267, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.5001388788223267, "logits_per_char": -0.12503471970558167, "num_chars": 4}, {"sum_logits": -1.0350538492202759, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -1.0350538492202759, "logits_per_char": -0.345017949740092, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 160, "native_id": 299, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1589617133140564, "incorrect_loss_raw": 2.0985488891601562, "correct_loss_per_char": 0.0397404283285141, "incorrect_loss_per_char": 0.6995162963867188, "correct_loss_per_token": 0.1589617133140564, "incorrect_loss_per_token": 2.0985488891601562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1589617133140564, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.1589617133140564, "logits_per_char": -0.0397404283285141, "num_chars": 4}, {"sum_logits": -2.0985488891601562, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -2.0985488891601562, "logits_per_char": -0.6995162963867188, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 161, "native_id": 2821, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2603999376296997, "incorrect_loss_raw": 1.5670992136001587, "correct_loss_per_char": 0.06509998440742493, "incorrect_loss_per_char": 0.5223664045333862, "correct_loss_per_token": 0.2603999376296997, "incorrect_loss_per_token": 1.5670992136001587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2603999376296997, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.2603999376296997, "logits_per_char": -0.06509998440742493, "num_chars": 4}, {"sum_logits": -1.5670992136001587, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.5670992136001587, "logits_per_char": -0.5223664045333862, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 162, "native_id": 1746, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23805758357048035, "incorrect_loss_raw": 1.8860902786254883, "correct_loss_per_char": 0.05951439589262009, "incorrect_loss_per_char": 0.6286967595418295, "correct_loss_per_token": 0.23805758357048035, "incorrect_loss_per_token": 1.8860902786254883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23805758357048035, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.23805758357048035, "logits_per_char": -0.05951439589262009, "num_chars": 4}, {"sum_logits": -1.8860902786254883, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.8860902786254883, "logits_per_char": -0.6286967595418295, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 163, "native_id": 826, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15680094063282013, "incorrect_loss_raw": 2.2245593070983887, "correct_loss_per_char": 0.03920023515820503, "incorrect_loss_per_char": 0.7415197690327963, "correct_loss_per_token": 0.15680094063282013, "incorrect_loss_per_token": 2.2245593070983887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15680094063282013, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.15680094063282013, "logits_per_char": -0.03920023515820503, "num_chars": 4}, {"sum_logits": -2.2245593070983887, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -2.2245593070983887, "logits_per_char": -0.7415197690327963, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 164, "native_id": 414, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8368138074874878, "incorrect_loss_raw": 0.6369546055793762, "correct_loss_per_char": 0.2789379358291626, "incorrect_loss_per_char": 0.15923865139484406, "correct_loss_per_token": 0.8368138074874878, "incorrect_loss_per_token": 0.6369546055793762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6369546055793762, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.6369546055793762, "logits_per_char": -0.15923865139484406, "num_chars": 4}, {"sum_logits": -0.8368138074874878, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -0.8368138074874878, "logits_per_char": -0.2789379358291626, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 165, "native_id": 1624, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5018479824066162, "incorrect_loss_raw": 1.0277957916259766, "correct_loss_per_char": 0.12546199560165405, "incorrect_loss_per_char": 0.3425985972086589, "correct_loss_per_token": 0.5018479824066162, "incorrect_loss_per_token": 1.0277957916259766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5018479824066162, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5018479824066162, "logits_per_char": -0.12546199560165405, "num_chars": 4}, {"sum_logits": -1.0277957916259766, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.0277957916259766, "logits_per_char": -0.3425985972086589, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 166, "native_id": 797, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9070876836776733, "incorrect_loss_raw": 0.6088957190513611, "correct_loss_per_char": 0.3023625612258911, "incorrect_loss_per_char": 0.15222392976284027, "correct_loss_per_token": 0.9070876836776733, "incorrect_loss_per_token": 0.6088957190513611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6088957190513611, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.6088957190513611, "logits_per_char": -0.15222392976284027, "num_chars": 4}, {"sum_logits": -0.9070876836776733, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.9070876836776733, "logits_per_char": -0.3023625612258911, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 167, "native_id": 2887, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18483176827430725, "incorrect_loss_raw": 2.076383590698242, "correct_loss_per_char": 0.04620794206857681, "incorrect_loss_per_char": 0.6921278635660807, "correct_loss_per_token": 0.18483176827430725, "incorrect_loss_per_token": 2.076383590698242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18483176827430725, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.18483176827430725, "logits_per_char": -0.04620794206857681, "num_chars": 4}, {"sum_logits": -2.076383590698242, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -2.076383590698242, "logits_per_char": -0.6921278635660807, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 168, "native_id": 1882, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4533159732818604, "incorrect_loss_raw": 0.3207291066646576, "correct_loss_per_char": 0.4844386577606201, "incorrect_loss_per_char": 0.0801822766661644, "correct_loss_per_token": 1.4533159732818604, "incorrect_loss_per_token": 0.3207291066646576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3207291066646576, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.3207291066646576, "logits_per_char": -0.0801822766661644, "num_chars": 4}, {"sum_logits": -1.4533159732818604, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.4533159732818604, "logits_per_char": -0.4844386577606201, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 169, "native_id": 2050, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.148688554763794, "incorrect_loss_raw": 0.45594415068626404, "correct_loss_per_char": 0.38289618492126465, "incorrect_loss_per_char": 0.11398603767156601, "correct_loss_per_token": 1.148688554763794, "incorrect_loss_per_token": 0.45594415068626404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45594415068626404, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -0.45594415068626404, "logits_per_char": -0.11398603767156601, "num_chars": 4}, {"sum_logits": -1.148688554763794, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -1.148688554763794, "logits_per_char": -0.38289618492126465, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 170, "native_id": 967, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3899138867855072, "incorrect_loss_raw": 1.4266629219055176, "correct_loss_per_char": 0.0974784716963768, "incorrect_loss_per_char": 0.4755543073018392, "correct_loss_per_token": 0.3899138867855072, "incorrect_loss_per_token": 1.4266629219055176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3899138867855072, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.3899138867855072, "logits_per_char": -0.0974784716963768, "num_chars": 4}, {"sum_logits": -1.4266629219055176, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.4266629219055176, "logits_per_char": -0.4755543073018392, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 171, "native_id": 1479, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.10541388392448425, "incorrect_loss_raw": 2.7645976543426514, "correct_loss_per_char": 0.026353470981121063, "incorrect_loss_per_char": 0.9215325514475504, "correct_loss_per_token": 0.10541388392448425, "incorrect_loss_per_token": 2.7645976543426514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.10541388392448425, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.10541388392448425, "logits_per_char": -0.026353470981121063, "num_chars": 4}, {"sum_logits": -2.7645976543426514, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -2.7645976543426514, "logits_per_char": -0.9215325514475504, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 172, "native_id": 840, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6546761989593506, "incorrect_loss_raw": 0.8117265701293945, "correct_loss_per_char": 0.21822539965311685, "incorrect_loss_per_char": 0.20293164253234863, "correct_loss_per_token": 0.6546761989593506, "incorrect_loss_per_token": 0.8117265701293945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8117265701293945, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.8117265701293945, "logits_per_char": -0.20293164253234863, "num_chars": 4}, {"sum_logits": -0.6546761989593506, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.6546761989593506, "logits_per_char": -0.21822539965311685, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 173, "native_id": 3228, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1524043083190918, "incorrect_loss_raw": 2.140326499938965, "correct_loss_per_char": 0.03810107707977295, "incorrect_loss_per_char": 0.7134421666463217, "correct_loss_per_token": 0.1524043083190918, "incorrect_loss_per_token": 2.140326499938965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1524043083190918, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.1524043083190918, "logits_per_char": -0.03810107707977295, "num_chars": 4}, {"sum_logits": -2.140326499938965, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -2.140326499938965, "logits_per_char": -0.7134421666463217, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 174, "native_id": 2877, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2197532206773758, "incorrect_loss_raw": 1.7441937923431396, "correct_loss_per_char": 0.05493830516934395, "incorrect_loss_per_char": 0.5813979307810465, "correct_loss_per_token": 0.2197532206773758, "incorrect_loss_per_token": 1.7441937923431396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2197532206773758, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.2197532206773758, "logits_per_char": -0.05493830516934395, "num_chars": 4}, {"sum_logits": -1.7441937923431396, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.7441937923431396, "logits_per_char": -0.5813979307810465, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 175, "native_id": 1725, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0169084072113037, "incorrect_loss_raw": 0.1864660531282425, "correct_loss_per_char": 0.6723028024037679, "incorrect_loss_per_char": 0.04661651328206062, "correct_loss_per_token": 2.0169084072113037, "incorrect_loss_per_token": 0.1864660531282425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1864660531282425, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.1864660531282425, "logits_per_char": -0.04661651328206062, "num_chars": 4}, {"sum_logits": -2.0169084072113037, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -2.0169084072113037, "logits_per_char": -0.6723028024037679, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 176, "native_id": 715, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4108642041683197, "incorrect_loss_raw": 1.350534439086914, "correct_loss_per_char": 0.10271605104207993, "incorrect_loss_per_char": 0.4501781463623047, "correct_loss_per_token": 0.4108642041683197, "incorrect_loss_per_token": 1.350534439086914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4108642041683197, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.4108642041683197, "logits_per_char": -0.10271605104207993, "num_chars": 4}, {"sum_logits": -1.350534439086914, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.350534439086914, "logits_per_char": -0.4501781463623047, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 177, "native_id": 2394, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6609622836112976, "incorrect_loss_raw": 0.8485350608825684, "correct_loss_per_char": 0.1652405709028244, "incorrect_loss_per_char": 0.28284502029418945, "correct_loss_per_token": 0.6609622836112976, "incorrect_loss_per_token": 0.8485350608825684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6609622836112976, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.6609622836112976, "logits_per_char": -0.1652405709028244, "num_chars": 4}, {"sum_logits": -0.8485350608825684, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -0.8485350608825684, "logits_per_char": -0.28284502029418945, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 178, "native_id": 832, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47064992785453796, "incorrect_loss_raw": 1.1746716499328613, "correct_loss_per_char": 0.15688330928484598, "incorrect_loss_per_char": 0.29366791248321533, "correct_loss_per_token": 0.47064992785453796, "incorrect_loss_per_token": 1.1746716499328613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1746716499328613, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.1746716499328613, "logits_per_char": -0.29366791248321533, "num_chars": 4}, {"sum_logits": -0.47064992785453796, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -0.47064992785453796, "logits_per_char": -0.15688330928484598, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 179, "native_id": 1236, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3330498933792114, "incorrect_loss_raw": 0.3703475892543793, "correct_loss_per_char": 0.4443499644597371, "incorrect_loss_per_char": 0.09258689731359482, "correct_loss_per_token": 1.3330498933792114, "incorrect_loss_per_token": 0.3703475892543793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3703475892543793, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.3703475892543793, "logits_per_char": -0.09258689731359482, "num_chars": 4}, {"sum_logits": -1.3330498933792114, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.3330498933792114, "logits_per_char": -0.4443499644597371, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 180, "native_id": 247, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5977982878684998, "incorrect_loss_raw": 0.9544247388839722, "correct_loss_per_char": 0.14944957196712494, "incorrect_loss_per_char": 0.3181415796279907, "correct_loss_per_token": 0.5977982878684998, "incorrect_loss_per_token": 0.9544247388839722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5977982878684998, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.5977982878684998, "logits_per_char": -0.14944957196712494, "num_chars": 4}, {"sum_logits": -0.9544247388839722, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -0.9544247388839722, "logits_per_char": -0.3181415796279907, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 181, "native_id": 1443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17676673829555511, "incorrect_loss_raw": 2.022977352142334, "correct_loss_per_char": 0.04419168457388878, "incorrect_loss_per_char": 0.6743257840474447, "correct_loss_per_token": 0.17676673829555511, "incorrect_loss_per_token": 2.022977352142334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17676673829555511, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.17676673829555511, "logits_per_char": -0.04419168457388878, "num_chars": 4}, {"sum_logits": -2.022977352142334, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -2.022977352142334, "logits_per_char": -0.6743257840474447, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 182, "native_id": 2188, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30103710293769836, "incorrect_loss_raw": 1.615457534790039, "correct_loss_per_char": 0.07525927573442459, "incorrect_loss_per_char": 0.5384858449300131, "correct_loss_per_token": 0.30103710293769836, "incorrect_loss_per_token": 1.615457534790039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30103710293769836, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.30103710293769836, "logits_per_char": -0.07525927573442459, "num_chars": 4}, {"sum_logits": -1.615457534790039, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.615457534790039, "logits_per_char": -0.5384858449300131, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 183, "native_id": 626, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5456048250198364, "incorrect_loss_raw": 1.223517656326294, "correct_loss_per_char": 0.1364012062549591, "incorrect_loss_per_char": 0.40783921877543133, "correct_loss_per_token": 0.5456048250198364, "incorrect_loss_per_token": 1.223517656326294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5456048250198364, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.5456048250198364, "logits_per_char": -0.1364012062549591, "num_chars": 4}, {"sum_logits": -1.223517656326294, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.223517656326294, "logits_per_char": -0.40783921877543133, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 184, "native_id": 2046, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19491304457187653, "incorrect_loss_raw": 2.168513298034668, "correct_loss_per_char": 0.04872826114296913, "incorrect_loss_per_char": 0.722837766011556, "correct_loss_per_token": 0.19491304457187653, "incorrect_loss_per_token": 2.168513298034668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19491304457187653, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.19491304457187653, "logits_per_char": -0.04872826114296913, "num_chars": 4}, {"sum_logits": -2.168513298034668, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -2.168513298034668, "logits_per_char": -0.722837766011556, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 185, "native_id": 2248, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.10843789577484131, "incorrect_loss_raw": 2.582789897918701, "correct_loss_per_char": 0.027109473943710327, "incorrect_loss_per_char": 0.8609299659729004, "correct_loss_per_token": 0.10843789577484131, "incorrect_loss_per_token": 2.582789897918701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.10843789577484131, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.10843789577484131, "logits_per_char": -0.027109473943710327, "num_chars": 4}, {"sum_logits": -2.582789897918701, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -2.582789897918701, "logits_per_char": -0.8609299659729004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 186, "native_id": 1935, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38491275906562805, "incorrect_loss_raw": 1.3541815280914307, "correct_loss_per_char": 0.09622818976640701, "incorrect_loss_per_char": 0.45139384269714355, "correct_loss_per_token": 0.38491275906562805, "incorrect_loss_per_token": 1.3541815280914307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38491275906562805, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.38491275906562805, "logits_per_char": -0.09622818976640701, "num_chars": 4}, {"sum_logits": -1.3541815280914307, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.3541815280914307, "logits_per_char": -0.45139384269714355, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 187, "native_id": 1367, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.13299524784088135, "incorrect_loss_raw": 2.286731719970703, "correct_loss_per_char": 0.03324881196022034, "incorrect_loss_per_char": 0.762243906656901, "correct_loss_per_token": 0.13299524784088135, "incorrect_loss_per_token": 2.286731719970703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13299524784088135, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.13299524784088135, "logits_per_char": -0.03324881196022034, "num_chars": 4}, {"sum_logits": -2.286731719970703, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -2.286731719970703, "logits_per_char": -0.762243906656901, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 188, "native_id": 568, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8460174798965454, "incorrect_loss_raw": 0.6278822422027588, "correct_loss_per_char": 0.2820058266321818, "incorrect_loss_per_char": 0.1569705605506897, "correct_loss_per_token": 0.8460174798965454, "incorrect_loss_per_token": 0.6278822422027588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6278822422027588, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.6278822422027588, "logits_per_char": -0.1569705605506897, "num_chars": 4}, {"sum_logits": -0.8460174798965454, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -0.8460174798965454, "logits_per_char": -0.2820058266321818, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 189, "native_id": 536, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2746193408966064, "incorrect_loss_raw": 0.41622406244277954, "correct_loss_per_char": 0.42487311363220215, "incorrect_loss_per_char": 0.10405601561069489, "correct_loss_per_token": 1.2746193408966064, "incorrect_loss_per_token": 0.41622406244277954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41622406244277954, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.41622406244277954, "logits_per_char": -0.10405601561069489, "num_chars": 4}, {"sum_logits": -1.2746193408966064, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.2746193408966064, "logits_per_char": -0.42487311363220215, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 190, "native_id": 196, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3383551836013794, "incorrect_loss_raw": 0.4220036268234253, "correct_loss_per_char": 0.44611839453379315, "incorrect_loss_per_char": 0.10550090670585632, "correct_loss_per_token": 1.3383551836013794, "incorrect_loss_per_token": 0.4220036268234253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4220036268234253, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.4220036268234253, "logits_per_char": -0.10550090670585632, "num_chars": 4}, {"sum_logits": -1.3383551836013794, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.3383551836013794, "logits_per_char": -0.44611839453379315, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 191, "native_id": 2557, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4451082944869995, "incorrect_loss_raw": 1.1418207883834839, "correct_loss_per_char": 0.11127707362174988, "incorrect_loss_per_char": 0.3806069294611613, "correct_loss_per_token": 0.4451082944869995, "incorrect_loss_per_token": 1.1418207883834839, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4451082944869995, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.4451082944869995, "logits_per_char": -0.11127707362174988, "num_chars": 4}, {"sum_logits": -1.1418207883834839, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.1418207883834839, "logits_per_char": -0.3806069294611613, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 192, "native_id": 676, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6381628513336182, "incorrect_loss_raw": 0.26203587651252747, "correct_loss_per_char": 0.5460542837778727, "incorrect_loss_per_char": 0.06550896912813187, "correct_loss_per_token": 1.6381628513336182, "incorrect_loss_per_token": 0.26203587651252747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26203587651252747, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.26203587651252747, "logits_per_char": -0.06550896912813187, "num_chars": 4}, {"sum_logits": -1.6381628513336182, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.6381628513336182, "logits_per_char": -0.5460542837778727, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 193, "native_id": 593, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2866270542144775, "incorrect_loss_raw": 0.3661457896232605, "correct_loss_per_char": 0.4288756847381592, "incorrect_loss_per_char": 0.09153644740581512, "correct_loss_per_token": 1.2866270542144775, "incorrect_loss_per_token": 0.3661457896232605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3661457896232605, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.3661457896232605, "logits_per_char": -0.09153644740581512, "num_chars": 4}, {"sum_logits": -1.2866270542144775, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.2866270542144775, "logits_per_char": -0.4288756847381592, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 194, "native_id": 2236, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8011682033538818, "incorrect_loss_raw": 0.21037603914737701, "correct_loss_per_char": 0.6003894011179606, "incorrect_loss_per_char": 0.052594009786844254, "correct_loss_per_token": 1.8011682033538818, "incorrect_loss_per_token": 0.21037603914737701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21037603914737701, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.21037603914737701, "logits_per_char": -0.052594009786844254, "num_chars": 4}, {"sum_logits": -1.8011682033538818, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.8011682033538818, "logits_per_char": -0.6003894011179606, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 195, "native_id": 285, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2994641065597534, "incorrect_loss_raw": 1.6550556421279907, "correct_loss_per_char": 0.07486602663993835, "incorrect_loss_per_char": 0.5516852140426636, "correct_loss_per_token": 0.2994641065597534, "incorrect_loss_per_token": 1.6550556421279907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2994641065597534, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.2994641065597534, "logits_per_char": -0.07486602663993835, "num_chars": 4}, {"sum_logits": -1.6550556421279907, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.6550556421279907, "logits_per_char": -0.5516852140426636, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 196, "native_id": 2923, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2817387580871582, "incorrect_loss_raw": 1.537035584449768, "correct_loss_per_char": 0.07043468952178955, "incorrect_loss_per_char": 0.5123451948165894, "correct_loss_per_token": 0.2817387580871582, "incorrect_loss_per_token": 1.537035584449768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2817387580871582, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.2817387580871582, "logits_per_char": -0.07043468952178955, "num_chars": 4}, {"sum_logits": -1.537035584449768, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.537035584449768, "logits_per_char": -0.5123451948165894, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 197, "native_id": 1332, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3727126717567444, "incorrect_loss_raw": 1.3650952577590942, "correct_loss_per_char": 0.0931781679391861, "incorrect_loss_per_char": 0.45503175258636475, "correct_loss_per_token": 0.3727126717567444, "incorrect_loss_per_token": 1.3650952577590942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3727126717567444, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.3727126717567444, "logits_per_char": -0.0931781679391861, "num_chars": 4}, {"sum_logits": -1.3650952577590942, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.3650952577590942, "logits_per_char": -0.45503175258636475, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 198, "native_id": 700, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.625762939453125, "incorrect_loss_raw": 0.8606458306312561, "correct_loss_per_char": 0.15644073486328125, "incorrect_loss_per_char": 0.286881943543752, "correct_loss_per_token": 0.625762939453125, "incorrect_loss_per_token": 0.8606458306312561, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.625762939453125, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.625762939453125, "logits_per_char": -0.15644073486328125, "num_chars": 4}, {"sum_logits": -0.8606458306312561, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.8606458306312561, "logits_per_char": -0.286881943543752, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 199, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46488985419273376, "incorrect_loss_raw": 1.0962884426116943, "correct_loss_per_char": 0.11622246354818344, "incorrect_loss_per_char": 0.36542948087056476, "correct_loss_per_token": 0.46488985419273376, "incorrect_loss_per_token": 1.0962884426116943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46488985419273376, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.46488985419273376, "logits_per_char": -0.11622246354818344, "num_chars": 4}, {"sum_logits": -1.0962884426116943, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.0962884426116943, "logits_per_char": -0.36542948087056476, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 200, "native_id": 2737, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4935264587402344, "incorrect_loss_raw": 0.30963367223739624, "correct_loss_per_char": 0.49784215291341144, "incorrect_loss_per_char": 0.07740841805934906, "correct_loss_per_token": 1.4935264587402344, "incorrect_loss_per_token": 0.30963367223739624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30963367223739624, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.30963367223739624, "logits_per_char": -0.07740841805934906, "num_chars": 4}, {"sum_logits": -1.4935264587402344, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.4935264587402344, "logits_per_char": -0.49784215291341144, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 201, "native_id": 2763, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14678701758384705, "incorrect_loss_raw": 2.1425552368164062, "correct_loss_per_char": 0.03669675439596176, "incorrect_loss_per_char": 0.7141850789388021, "correct_loss_per_token": 0.14678701758384705, "incorrect_loss_per_token": 2.1425552368164062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14678701758384705, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.14678701758384705, "logits_per_char": -0.03669675439596176, "num_chars": 4}, {"sum_logits": -2.1425552368164062, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -2.1425552368164062, "logits_per_char": -0.7141850789388021, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 202, "native_id": 249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26766788959503174, "incorrect_loss_raw": 1.617118000984192, "correct_loss_per_char": 0.06691697239875793, "incorrect_loss_per_char": 0.5390393336613973, "correct_loss_per_token": 0.26766788959503174, "incorrect_loss_per_token": 1.617118000984192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26766788959503174, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.26766788959503174, "logits_per_char": -0.06691697239875793, "num_chars": 4}, {"sum_logits": -1.617118000984192, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.617118000984192, "logits_per_char": -0.5390393336613973, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 203, "native_id": 2614, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2555209994316101, "incorrect_loss_raw": 1.5892386436462402, "correct_loss_per_char": 0.06388024985790253, "incorrect_loss_per_char": 0.5297462145487467, "correct_loss_per_token": 0.2555209994316101, "incorrect_loss_per_token": 1.5892386436462402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2555209994316101, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.2555209994316101, "logits_per_char": -0.06388024985790253, "num_chars": 4}, {"sum_logits": -1.5892386436462402, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.5892386436462402, "logits_per_char": -0.5297462145487467, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 204, "native_id": 358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26188305020332336, "incorrect_loss_raw": 1.6630125045776367, "correct_loss_per_char": 0.06547076255083084, "incorrect_loss_per_char": 0.5543375015258789, "correct_loss_per_token": 0.26188305020332336, "incorrect_loss_per_token": 1.6630125045776367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26188305020332336, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.26188305020332336, "logits_per_char": -0.06547076255083084, "num_chars": 4}, {"sum_logits": -1.6630125045776367, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.6630125045776367, "logits_per_char": -0.5543375015258789, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 205, "native_id": 607, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.09773047268390656, "incorrect_loss_raw": 2.766216993331909, "correct_loss_per_char": 0.02443261817097664, "incorrect_loss_per_char": 0.9220723311106364, "correct_loss_per_token": 0.09773047268390656, "incorrect_loss_per_token": 2.766216993331909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.09773047268390656, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.09773047268390656, "logits_per_char": -0.02443261817097664, "num_chars": 4}, {"sum_logits": -2.766216993331909, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -2.766216993331909, "logits_per_char": -0.9220723311106364, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 206, "native_id": 888, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32281169295310974, "incorrect_loss_raw": 1.458453893661499, "correct_loss_per_char": 0.08070292323827744, "incorrect_loss_per_char": 0.4861512978871663, "correct_loss_per_token": 0.32281169295310974, "incorrect_loss_per_token": 1.458453893661499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32281169295310974, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.32281169295310974, "logits_per_char": -0.08070292323827744, "num_chars": 4}, {"sum_logits": -1.458453893661499, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.458453893661499, "logits_per_char": -0.4861512978871663, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 207, "native_id": 163, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29654690623283386, "incorrect_loss_raw": 1.4978886842727661, "correct_loss_per_char": 0.07413672655820847, "incorrect_loss_per_char": 0.49929622809092206, "correct_loss_per_token": 0.29654690623283386, "incorrect_loss_per_token": 1.4978886842727661, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29654690623283386, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.29654690623283386, "logits_per_char": -0.07413672655820847, "num_chars": 4}, {"sum_logits": -1.4978886842727661, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.4978886842727661, "logits_per_char": -0.49929622809092206, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 208, "native_id": 1772, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5146589279174805, "incorrect_loss_raw": 1.0323295593261719, "correct_loss_per_char": 0.12866473197937012, "incorrect_loss_per_char": 0.34410985310872394, "correct_loss_per_token": 0.5146589279174805, "incorrect_loss_per_token": 1.0323295593261719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5146589279174805, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.5146589279174805, "logits_per_char": -0.12866473197937012, "num_chars": 4}, {"sum_logits": -1.0323295593261719, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.0323295593261719, "logits_per_char": -0.34410985310872394, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 209, "native_id": 1603, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9171408414840698, "incorrect_loss_raw": 0.6079727411270142, "correct_loss_per_char": 0.30571361382802326, "incorrect_loss_per_char": 0.15199318528175354, "correct_loss_per_token": 0.9171408414840698, "incorrect_loss_per_token": 0.6079727411270142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6079727411270142, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.6079727411270142, "logits_per_char": -0.15199318528175354, "num_chars": 4}, {"sum_logits": -0.9171408414840698, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -0.9171408414840698, "logits_per_char": -0.30571361382802326, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 210, "native_id": 3017, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8632546663284302, "incorrect_loss_raw": 0.75841224193573, "correct_loss_per_char": 0.28775155544281006, "incorrect_loss_per_char": 0.1896030604839325, "correct_loss_per_token": 0.8632546663284302, "incorrect_loss_per_token": 0.75841224193573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.75841224193573, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.75841224193573, "logits_per_char": -0.1896030604839325, "num_chars": 4}, {"sum_logits": -0.8632546663284302, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.8632546663284302, "logits_per_char": -0.28775155544281006, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 211, "native_id": 1328, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6626052856445312, "incorrect_loss_raw": 0.2681617736816406, "correct_loss_per_char": 0.5542017618815104, "incorrect_loss_per_char": 0.06704044342041016, "correct_loss_per_token": 1.6626052856445312, "incorrect_loss_per_token": 0.2681617736816406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2681617736816406, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.2681617736816406, "logits_per_char": -0.06704044342041016, "num_chars": 4}, {"sum_logits": -1.6626052856445312, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.6626052856445312, "logits_per_char": -0.5542017618815104, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 212, "native_id": 848, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4060783386230469, "incorrect_loss_raw": 0.344931960105896, "correct_loss_per_char": 0.4686927795410156, "incorrect_loss_per_char": 0.086232990026474, "correct_loss_per_token": 1.4060783386230469, "incorrect_loss_per_token": 0.344931960105896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.344931960105896, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.344931960105896, "logits_per_char": -0.086232990026474, "num_chars": 4}, {"sum_logits": -1.4060783386230469, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.4060783386230469, "logits_per_char": -0.4686927795410156, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 213, "native_id": 3068, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1795527935028076, "incorrect_loss_raw": 0.42666593194007874, "correct_loss_per_char": 0.39318426450093585, "incorrect_loss_per_char": 0.10666648298501968, "correct_loss_per_token": 1.1795527935028076, "incorrect_loss_per_token": 0.42666593194007874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42666593194007874, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.42666593194007874, "logits_per_char": -0.10666648298501968, "num_chars": 4}, {"sum_logits": -1.1795527935028076, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.1795527935028076, "logits_per_char": -0.39318426450093585, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 214, "native_id": 1561, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.162041187286377, "incorrect_loss_raw": 2.224552869796753, "correct_loss_per_char": 0.29051029682159424, "incorrect_loss_per_char": 0.7415176232655843, "correct_loss_per_token": 1.162041187286377, "incorrect_loss_per_token": 2.224552869796753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.162041187286377, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -1.162041187286377, "logits_per_char": -0.29051029682159424, "num_chars": 4}, {"sum_logits": -2.224552869796753, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -2.224552869796753, "logits_per_char": -0.7415176232655843, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 215, "native_id": 1147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6847670078277588, "incorrect_loss_raw": 0.2533378005027771, "correct_loss_per_char": 0.5615890026092529, "incorrect_loss_per_char": 0.06333445012569427, "correct_loss_per_token": 1.6847670078277588, "incorrect_loss_per_token": 0.2533378005027771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2533378005027771, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.2533378005027771, "logits_per_char": -0.06333445012569427, "num_chars": 4}, {"sum_logits": -1.6847670078277588, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.6847670078277588, "logits_per_char": -0.5615890026092529, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 216, "native_id": 2201, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1498546004295349, "incorrect_loss_raw": 2.1185195446014404, "correct_loss_per_char": 0.03746365010738373, "incorrect_loss_per_char": 0.7061731815338135, "correct_loss_per_token": 0.1498546004295349, "incorrect_loss_per_token": 2.1185195446014404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1498546004295349, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.1498546004295349, "logits_per_char": -0.03746365010738373, "num_chars": 4}, {"sum_logits": -2.1185195446014404, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -2.1185195446014404, "logits_per_char": -0.7061731815338135, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 217, "native_id": 2588, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49236100912094116, "incorrect_loss_raw": 1.0842022895812988, "correct_loss_per_char": 0.12309025228023529, "incorrect_loss_per_char": 0.3614007631937663, "correct_loss_per_token": 0.49236100912094116, "incorrect_loss_per_token": 1.0842022895812988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.49236100912094116, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -0.49236100912094116, "logits_per_char": -0.12309025228023529, "num_chars": 4}, {"sum_logits": -1.0842022895812988, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -1.0842022895812988, "logits_per_char": -0.3614007631937663, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 218, "native_id": 1247, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4896419644355774, "incorrect_loss_raw": 1.1284713745117188, "correct_loss_per_char": 0.12241049110889435, "incorrect_loss_per_char": 0.37615712483723956, "correct_loss_per_token": 0.4896419644355774, "incorrect_loss_per_token": 1.1284713745117188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4896419644355774, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.4896419644355774, "logits_per_char": -0.12241049110889435, "num_chars": 4}, {"sum_logits": -1.1284713745117188, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.1284713745117188, "logits_per_char": -0.37615712483723956, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 219, "native_id": 1728, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5433684587478638, "incorrect_loss_raw": 1.024738073348999, "correct_loss_per_char": 0.13584211468696594, "incorrect_loss_per_char": 0.3415793577829997, "correct_loss_per_token": 0.5433684587478638, "incorrect_loss_per_token": 1.024738073348999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5433684587478638, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.5433684587478638, "logits_per_char": -0.13584211468696594, "num_chars": 4}, {"sum_logits": -1.024738073348999, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.024738073348999, "logits_per_char": -0.3415793577829997, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 220, "native_id": 1306, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2541179955005646, "incorrect_loss_raw": 1.6835962533950806, "correct_loss_per_char": 0.06352949887514114, "incorrect_loss_per_char": 0.5611987511316935, "correct_loss_per_token": 0.2541179955005646, "incorrect_loss_per_token": 1.6835962533950806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2541179955005646, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": true, "logits_per_token": -0.2541179955005646, "logits_per_char": -0.06352949887514114, "num_chars": 4}, {"sum_logits": -1.6835962533950806, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.6835962533950806, "logits_per_char": -0.5611987511316935, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 221, "native_id": 2806, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6103454828262329, "incorrect_loss_raw": 1.0351226329803467, "correct_loss_per_char": 0.15258637070655823, "incorrect_loss_per_char": 0.34504087766011554, "correct_loss_per_token": 0.6103454828262329, "incorrect_loss_per_token": 1.0351226329803467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6103454828262329, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.6103454828262329, "logits_per_char": -0.15258637070655823, "num_chars": 4}, {"sum_logits": -1.0351226329803467, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.0351226329803467, "logits_per_char": -0.34504087766011554, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 222, "native_id": 2366, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5678563117980957, "incorrect_loss_raw": 0.9637438654899597, "correct_loss_per_char": 0.14196407794952393, "incorrect_loss_per_char": 0.3212479551633199, "correct_loss_per_token": 0.5678563117980957, "incorrect_loss_per_token": 0.9637438654899597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5678563117980957, "num_tokens": 1, "num_tokens_all": 1207, "is_greedy": true, "logits_per_token": -0.5678563117980957, "logits_per_char": -0.14196407794952393, "num_chars": 4}, {"sum_logits": -0.9637438654899597, "num_tokens": 1, "num_tokens_all": 1207, "is_greedy": false, "logits_per_token": -0.9637438654899597, "logits_per_char": -0.3212479551633199, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 223, "native_id": 620, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5876128673553467, "incorrect_loss_raw": 0.2777184844017029, "correct_loss_per_char": 0.5292042891184489, "incorrect_loss_per_char": 0.06942962110042572, "correct_loss_per_token": 1.5876128673553467, "incorrect_loss_per_token": 0.2777184844017029, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2777184844017029, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.2777184844017029, "logits_per_char": -0.06942962110042572, "num_chars": 4}, {"sum_logits": -1.5876128673553467, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.5876128673553467, "logits_per_char": -0.5292042891184489, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 224, "native_id": 2181, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27094757556915283, "incorrect_loss_raw": 1.587291955947876, "correct_loss_per_char": 0.06773689389228821, "incorrect_loss_per_char": 0.529097318649292, "correct_loss_per_token": 0.27094757556915283, "incorrect_loss_per_token": 1.587291955947876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27094757556915283, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.27094757556915283, "logits_per_char": -0.06773689389228821, "num_chars": 4}, {"sum_logits": -1.587291955947876, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.587291955947876, "logits_per_char": -0.529097318649292, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 225, "native_id": 380, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.268716037273407, "incorrect_loss_raw": 1.5603723526000977, "correct_loss_per_char": 0.06717900931835175, "incorrect_loss_per_char": 0.5201241175333658, "correct_loss_per_token": 0.268716037273407, "incorrect_loss_per_token": 1.5603723526000977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.268716037273407, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.268716037273407, "logits_per_char": -0.06717900931835175, "num_chars": 4}, {"sum_logits": -1.5603723526000977, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.5603723526000977, "logits_per_char": -0.5201241175333658, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 226, "native_id": 1066, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26921066641807556, "incorrect_loss_raw": 1.6368743181228638, "correct_loss_per_char": 0.06730266660451889, "incorrect_loss_per_char": 0.5456247727076212, "correct_loss_per_token": 0.26921066641807556, "incorrect_loss_per_token": 1.6368743181228638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26921066641807556, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.26921066641807556, "logits_per_char": -0.06730266660451889, "num_chars": 4}, {"sum_logits": -1.6368743181228638, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.6368743181228638, "logits_per_char": -0.5456247727076212, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 227, "native_id": 1138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17449606955051422, "incorrect_loss_raw": 2.0567514896392822, "correct_loss_per_char": 0.043624017387628555, "incorrect_loss_per_char": 0.6855838298797607, "correct_loss_per_token": 0.17449606955051422, "incorrect_loss_per_token": 2.0567514896392822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17449606955051422, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.17449606955051422, "logits_per_char": -0.043624017387628555, "num_chars": 4}, {"sum_logits": -2.0567514896392822, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -2.0567514896392822, "logits_per_char": -0.6855838298797607, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 228, "native_id": 1680, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36966636776924133, "incorrect_loss_raw": 1.3108843564987183, "correct_loss_per_char": 0.09241659194231033, "incorrect_loss_per_char": 0.43696145216623944, "correct_loss_per_token": 0.36966636776924133, "incorrect_loss_per_token": 1.3108843564987183, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36966636776924133, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.36966636776924133, "logits_per_char": -0.09241659194231033, "num_chars": 4}, {"sum_logits": -1.3108843564987183, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.3108843564987183, "logits_per_char": -0.43696145216623944, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 229, "native_id": 1638, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9027587175369263, "incorrect_loss_raw": 0.1835954338312149, "correct_loss_per_char": 0.6342529058456421, "incorrect_loss_per_char": 0.045898858457803726, "correct_loss_per_token": 1.9027587175369263, "incorrect_loss_per_token": 0.1835954338312149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1835954338312149, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.1835954338312149, "logits_per_char": -0.045898858457803726, "num_chars": 4}, {"sum_logits": -1.9027587175369263, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.9027587175369263, "logits_per_char": -0.6342529058456421, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 230, "native_id": 2314, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22999686002731323, "incorrect_loss_raw": 1.7896151542663574, "correct_loss_per_char": 0.05749921500682831, "incorrect_loss_per_char": 0.5965383847554525, "correct_loss_per_token": 0.22999686002731323, "incorrect_loss_per_token": 1.7896151542663574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22999686002731323, "num_tokens": 1, "num_tokens_all": 841, "is_greedy": true, "logits_per_token": -0.22999686002731323, "logits_per_char": -0.05749921500682831, "num_chars": 4}, {"sum_logits": -1.7896151542663574, "num_tokens": 1, "num_tokens_all": 841, "is_greedy": false, "logits_per_token": -1.7896151542663574, "logits_per_char": -0.5965383847554525, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 231, "native_id": 3180, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2166584730148315, "incorrect_loss_raw": 0.3907054364681244, "correct_loss_per_char": 0.40555282433827716, "incorrect_loss_per_char": 0.0976763591170311, "correct_loss_per_token": 1.2166584730148315, "incorrect_loss_per_token": 0.3907054364681244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3907054364681244, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.3907054364681244, "logits_per_char": -0.0976763591170311, "num_chars": 4}, {"sum_logits": -1.2166584730148315, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.2166584730148315, "logits_per_char": -0.40555282433827716, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 232, "native_id": 2153, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9226071238517761, "incorrect_loss_raw": 0.5985851883888245, "correct_loss_per_char": 0.23065178096294403, "incorrect_loss_per_char": 0.19952839612960815, "correct_loss_per_token": 0.9226071238517761, "incorrect_loss_per_token": 0.5985851883888245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9226071238517761, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": false, "logits_per_token": -0.9226071238517761, "logits_per_char": -0.23065178096294403, "num_chars": 4}, {"sum_logits": -0.5985851883888245, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": true, "logits_per_token": -0.5985851883888245, "logits_per_char": -0.19952839612960815, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 233, "native_id": 465, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2852705717086792, "incorrect_loss_raw": 1.529261589050293, "correct_loss_per_char": 0.0713176429271698, "incorrect_loss_per_char": 0.5097538630167643, "correct_loss_per_token": 0.2852705717086792, "incorrect_loss_per_token": 1.529261589050293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2852705717086792, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.2852705717086792, "logits_per_char": -0.0713176429271698, "num_chars": 4}, {"sum_logits": -1.529261589050293, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.529261589050293, "logits_per_char": -0.5097538630167643, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 234, "native_id": 2873, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3246527910232544, "incorrect_loss_raw": 1.6285336017608643, "correct_loss_per_char": 0.0811631977558136, "incorrect_loss_per_char": 0.5428445339202881, "correct_loss_per_token": 0.3246527910232544, "incorrect_loss_per_token": 1.6285336017608643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3246527910232544, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.3246527910232544, "logits_per_char": -0.0811631977558136, "num_chars": 4}, {"sum_logits": -1.6285336017608643, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.6285336017608643, "logits_per_char": -0.5428445339202881, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 235, "native_id": 1537, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5987659692764282, "incorrect_loss_raw": 0.2518719732761383, "correct_loss_per_char": 0.5329219897588094, "incorrect_loss_per_char": 0.06296799331903458, "correct_loss_per_token": 1.5987659692764282, "incorrect_loss_per_token": 0.2518719732761383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2518719732761383, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.2518719732761383, "logits_per_char": -0.06296799331903458, "num_chars": 4}, {"sum_logits": -1.5987659692764282, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.5987659692764282, "logits_per_char": -0.5329219897588094, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 236, "native_id": 1123, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0353515148162842, "incorrect_loss_raw": 0.5741777420043945, "correct_loss_per_char": 0.25883787870407104, "incorrect_loss_per_char": 0.1913925806681315, "correct_loss_per_token": 1.0353515148162842, "incorrect_loss_per_token": 0.5741777420043945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0353515148162842, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.0353515148162842, "logits_per_char": -0.25883787870407104, "num_chars": 4}, {"sum_logits": -0.5741777420043945, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.5741777420043945, "logits_per_char": -0.1913925806681315, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 237, "native_id": 876, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8727636337280273, "incorrect_loss_raw": 0.22619646787643433, "correct_loss_per_char": 0.6242545445760092, "incorrect_loss_per_char": 0.05654911696910858, "correct_loss_per_token": 1.8727636337280273, "incorrect_loss_per_token": 0.22619646787643433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22619646787643433, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.22619646787643433, "logits_per_char": -0.05654911696910858, "num_chars": 4}, {"sum_logits": -1.8727636337280273, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.8727636337280273, "logits_per_char": -0.6242545445760092, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 238, "native_id": 1218, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9459313154220581, "incorrect_loss_raw": 0.6311831474304199, "correct_loss_per_char": 0.31531043847401935, "incorrect_loss_per_char": 0.15779578685760498, "correct_loss_per_token": 0.9459313154220581, "incorrect_loss_per_token": 0.6311831474304199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6311831474304199, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.6311831474304199, "logits_per_char": -0.15779578685760498, "num_chars": 4}, {"sum_logits": -0.9459313154220581, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -0.9459313154220581, "logits_per_char": -0.31531043847401935, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 239, "native_id": 2933, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.35685601830482483, "incorrect_loss_raw": 1.434106469154358, "correct_loss_per_char": 0.08921400457620621, "incorrect_loss_per_char": 0.4780354897181193, "correct_loss_per_token": 0.35685601830482483, "incorrect_loss_per_token": 1.434106469154358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35685601830482483, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.35685601830482483, "logits_per_char": -0.08921400457620621, "num_chars": 4}, {"sum_logits": -1.434106469154358, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.434106469154358, "logits_per_char": -0.4780354897181193, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 240, "native_id": 3198, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7229665517807007, "incorrect_loss_raw": 0.7625288963317871, "correct_loss_per_char": 0.18074163794517517, "incorrect_loss_per_char": 0.2541762987772624, "correct_loss_per_token": 0.7229665517807007, "incorrect_loss_per_token": 0.7625288963317871, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7229665517807007, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.7229665517807007, "logits_per_char": -0.18074163794517517, "num_chars": 4}, {"sum_logits": -0.7625288963317871, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -0.7625288963317871, "logits_per_char": -0.2541762987772624, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 241, "native_id": 1631, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32103657722473145, "incorrect_loss_raw": 1.4508458375930786, "correct_loss_per_char": 0.08025914430618286, "incorrect_loss_per_char": 0.48361527919769287, "correct_loss_per_token": 0.32103657722473145, "incorrect_loss_per_token": 1.4508458375930786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32103657722473145, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.32103657722473145, "logits_per_char": -0.08025914430618286, "num_chars": 4}, {"sum_logits": -1.4508458375930786, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.4508458375930786, "logits_per_char": -0.48361527919769287, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 242, "native_id": 215, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42499279975891113, "incorrect_loss_raw": 1.2277735471725464, "correct_loss_per_char": 0.10624819993972778, "incorrect_loss_per_char": 0.40925784905751544, "correct_loss_per_token": 0.42499279975891113, "incorrect_loss_per_token": 1.2277735471725464, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42499279975891113, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.42499279975891113, "logits_per_char": -0.10624819993972778, "num_chars": 4}, {"sum_logits": -1.2277735471725464, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.2277735471725464, "logits_per_char": -0.40925784905751544, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 243, "native_id": 3167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2908365726470947, "incorrect_loss_raw": 1.688738465309143, "correct_loss_per_char": 0.07270914316177368, "incorrect_loss_per_char": 0.5629128217697144, "correct_loss_per_token": 0.2908365726470947, "incorrect_loss_per_token": 1.688738465309143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2908365726470947, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.2908365726470947, "logits_per_char": -0.07270914316177368, "num_chars": 4}, {"sum_logits": -1.688738465309143, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.688738465309143, "logits_per_char": -0.5629128217697144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 244, "native_id": 1804, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54593825340271, "incorrect_loss_raw": 0.305929571390152, "correct_loss_per_char": 0.5153127511342367, "incorrect_loss_per_char": 0.076482392847538, "correct_loss_per_token": 1.54593825340271, "incorrect_loss_per_token": 0.305929571390152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.305929571390152, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.305929571390152, "logits_per_char": -0.076482392847538, "num_chars": 4}, {"sum_logits": -1.54593825340271, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.54593825340271, "logits_per_char": -0.5153127511342367, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 245, "native_id": 952, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6807987689971924, "incorrect_loss_raw": 0.2335735708475113, "correct_loss_per_char": 0.5602662563323975, "incorrect_loss_per_char": 0.05839339271187782, "correct_loss_per_token": 1.6807987689971924, "incorrect_loss_per_token": 0.2335735708475113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2335735708475113, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.2335735708475113, "logits_per_char": -0.05839339271187782, "num_chars": 4}, {"sum_logits": -1.6807987689971924, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.6807987689971924, "logits_per_char": -0.5602662563323975, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 246, "native_id": 2009, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21993300318717957, "incorrect_loss_raw": 2.0077595710754395, "correct_loss_per_char": 0.05498325079679489, "incorrect_loss_per_char": 0.6692531903584799, "correct_loss_per_token": 0.21993300318717957, "incorrect_loss_per_token": 2.0077595710754395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21993300318717957, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.21993300318717957, "logits_per_char": -0.05498325079679489, "num_chars": 4}, {"sum_logits": -2.0077595710754395, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -2.0077595710754395, "logits_per_char": -0.6692531903584799, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 247, "native_id": 677, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9215147495269775, "incorrect_loss_raw": 0.2960973381996155, "correct_loss_per_char": 0.6405049165089926, "incorrect_loss_per_char": 0.07402433454990387, "correct_loss_per_token": 1.9215147495269775, "incorrect_loss_per_token": 0.2960973381996155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2960973381996155, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.2960973381996155, "logits_per_char": -0.07402433454990387, "num_chars": 4}, {"sum_logits": -1.9215147495269775, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.9215147495269775, "logits_per_char": -0.6405049165089926, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 248, "native_id": 1558, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2945671081542969, "incorrect_loss_raw": 0.35686370730400085, "correct_loss_per_char": 0.4315223693847656, "incorrect_loss_per_char": 0.08921592682600021, "correct_loss_per_token": 1.2945671081542969, "incorrect_loss_per_token": 0.35686370730400085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35686370730400085, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.35686370730400085, "logits_per_char": -0.08921592682600021, "num_chars": 4}, {"sum_logits": -1.2945671081542969, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.2945671081542969, "logits_per_char": -0.4315223693847656, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 249, "native_id": 1261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2515973448753357, "incorrect_loss_raw": 1.74668288230896, "correct_loss_per_char": 0.06289933621883392, "incorrect_loss_per_char": 0.58222762743632, "correct_loss_per_token": 0.2515973448753357, "incorrect_loss_per_token": 1.74668288230896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2515973448753357, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.2515973448753357, "logits_per_char": -0.06289933621883392, "num_chars": 4}, {"sum_logits": -1.74668288230896, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.74668288230896, "logits_per_char": -0.58222762743632, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 250, "native_id": 10, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48457932472229004, "incorrect_loss_raw": 1.1318789720535278, "correct_loss_per_char": 0.12114483118057251, "incorrect_loss_per_char": 0.3772929906845093, "correct_loss_per_token": 0.48457932472229004, "incorrect_loss_per_token": 1.1318789720535278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48457932472229004, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.48457932472229004, "logits_per_char": -0.12114483118057251, "num_chars": 4}, {"sum_logits": -1.1318789720535278, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.1318789720535278, "logits_per_char": -0.3772929906845093, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 251, "native_id": 300, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.369841456413269, "incorrect_loss_raw": 0.33796003460884094, "correct_loss_per_char": 0.45661381880442303, "incorrect_loss_per_char": 0.08449000865221024, "correct_loss_per_token": 1.369841456413269, "incorrect_loss_per_token": 0.33796003460884094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33796003460884094, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.33796003460884094, "logits_per_char": -0.08449000865221024, "num_chars": 4}, {"sum_logits": -1.369841456413269, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.369841456413269, "logits_per_char": -0.45661381880442303, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 252, "native_id": 1966, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3124845027923584, "incorrect_loss_raw": 1.45213782787323, "correct_loss_per_char": 0.0781211256980896, "incorrect_loss_per_char": 0.48404594262441, "correct_loss_per_token": 0.3124845027923584, "incorrect_loss_per_token": 1.45213782787323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3124845027923584, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.3124845027923584, "logits_per_char": -0.0781211256980896, "num_chars": 4}, {"sum_logits": -1.45213782787323, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.45213782787323, "logits_per_char": -0.48404594262441, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 253, "native_id": 1617, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.034898281097412, "incorrect_loss_raw": 0.5079395771026611, "correct_loss_per_char": 0.3449660936991374, "incorrect_loss_per_char": 0.12698489427566528, "correct_loss_per_token": 1.034898281097412, "incorrect_loss_per_token": 0.5079395771026611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5079395771026611, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.5079395771026611, "logits_per_char": -0.12698489427566528, "num_chars": 4}, {"sum_logits": -1.034898281097412, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -1.034898281097412, "logits_per_char": -0.3449660936991374, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 254, "native_id": 1222, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2795528173446655, "incorrect_loss_raw": 1.5821354389190674, "correct_loss_per_char": 0.06988820433616638, "incorrect_loss_per_char": 0.5273784796396891, "correct_loss_per_token": 0.2795528173446655, "incorrect_loss_per_token": 1.5821354389190674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2795528173446655, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.2795528173446655, "logits_per_char": -0.06988820433616638, "num_chars": 4}, {"sum_logits": -1.5821354389190674, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.5821354389190674, "logits_per_char": -0.5273784796396891, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 255, "native_id": 1756, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38959622383117676, "incorrect_loss_raw": 1.258399248123169, "correct_loss_per_char": 0.09739905595779419, "incorrect_loss_per_char": 0.41946641604105633, "correct_loss_per_token": 0.38959622383117676, "incorrect_loss_per_token": 1.258399248123169, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38959622383117676, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.38959622383117676, "logits_per_char": -0.09739905595779419, "num_chars": 4}, {"sum_logits": -1.258399248123169, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.258399248123169, "logits_per_char": -0.41946641604105633, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 256, "native_id": 2796, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19795416295528412, "incorrect_loss_raw": 1.8511420488357544, "correct_loss_per_char": 0.04948854073882103, "incorrect_loss_per_char": 0.6170473496119181, "correct_loss_per_token": 0.19795416295528412, "incorrect_loss_per_token": 1.8511420488357544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19795416295528412, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.19795416295528412, "logits_per_char": -0.04948854073882103, "num_chars": 4}, {"sum_logits": -1.8511420488357544, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.8511420488357544, "logits_per_char": -0.6170473496119181, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 257, "native_id": 1964, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25192561745643616, "incorrect_loss_raw": 1.6554194688796997, "correct_loss_per_char": 0.06298140436410904, "incorrect_loss_per_char": 0.5518064896265665, "correct_loss_per_token": 0.25192561745643616, "incorrect_loss_per_token": 1.6554194688796997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25192561745643616, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.25192561745643616, "logits_per_char": -0.06298140436410904, "num_chars": 4}, {"sum_logits": -1.6554194688796997, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.6554194688796997, "logits_per_char": -0.5518064896265665, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 258, "native_id": 3150, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3737137019634247, "incorrect_loss_raw": 1.2555689811706543, "correct_loss_per_char": 0.09342842549085617, "incorrect_loss_per_char": 0.41852299372355145, "correct_loss_per_token": 0.3737137019634247, "incorrect_loss_per_token": 1.2555689811706543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3737137019634247, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.3737137019634247, "logits_per_char": -0.09342842549085617, "num_chars": 4}, {"sum_logits": -1.2555689811706543, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -1.2555689811706543, "logits_per_char": -0.41852299372355145, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 259, "native_id": 1640, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7969388961791992, "incorrect_loss_raw": 0.6574651002883911, "correct_loss_per_char": 0.1992347240447998, "incorrect_loss_per_char": 0.2191550334294637, "correct_loss_per_token": 0.7969388961791992, "incorrect_loss_per_token": 0.6574651002883911, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7969388961791992, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -0.7969388961791992, "logits_per_char": -0.1992347240447998, "num_chars": 4}, {"sum_logits": -0.6574651002883911, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.6574651002883911, "logits_per_char": -0.2191550334294637, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 260, "native_id": 2573, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1922457367181778, "incorrect_loss_raw": 1.908561110496521, "correct_loss_per_char": 0.04806143417954445, "incorrect_loss_per_char": 0.6361870368321737, "correct_loss_per_token": 0.1922457367181778, "incorrect_loss_per_token": 1.908561110496521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1922457367181778, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.1922457367181778, "logits_per_char": -0.04806143417954445, "num_chars": 4}, {"sum_logits": -1.908561110496521, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.908561110496521, "logits_per_char": -0.6361870368321737, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 261, "native_id": 1957, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.040999174118042, "incorrect_loss_raw": 0.5221157073974609, "correct_loss_per_char": 0.346999724706014, "incorrect_loss_per_char": 0.13052892684936523, "correct_loss_per_token": 1.040999174118042, "incorrect_loss_per_token": 0.5221157073974609, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5221157073974609, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.5221157073974609, "logits_per_char": -0.13052892684936523, "num_chars": 4}, {"sum_logits": -1.040999174118042, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.040999174118042, "logits_per_char": -0.346999724706014, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 262, "native_id": 3134, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22640351951122284, "incorrect_loss_raw": 1.9273486137390137, "correct_loss_per_char": 0.05660087987780571, "incorrect_loss_per_char": 0.6424495379130045, "correct_loss_per_token": 0.22640351951122284, "incorrect_loss_per_token": 1.9273486137390137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22640351951122284, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.22640351951122284, "logits_per_char": -0.05660087987780571, "num_chars": 4}, {"sum_logits": -1.9273486137390137, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.9273486137390137, "logits_per_char": -0.6424495379130045, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 263, "native_id": 1152, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3565331697463989, "incorrect_loss_raw": 1.4256573915481567, "correct_loss_per_char": 0.08913329243659973, "incorrect_loss_per_char": 0.47521913051605225, "correct_loss_per_token": 0.3565331697463989, "incorrect_loss_per_token": 1.4256573915481567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3565331697463989, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.3565331697463989, "logits_per_char": -0.08913329243659973, "num_chars": 4}, {"sum_logits": -1.4256573915481567, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.4256573915481567, "logits_per_char": -0.47521913051605225, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 264, "native_id": 2422, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2720300853252411, "incorrect_loss_raw": 1.5949664115905762, "correct_loss_per_char": 0.06800752133131027, "incorrect_loss_per_char": 0.531655470530192, "correct_loss_per_token": 0.2720300853252411, "incorrect_loss_per_token": 1.5949664115905762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2720300853252411, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.2720300853252411, "logits_per_char": -0.06800752133131027, "num_chars": 4}, {"sum_logits": -1.5949664115905762, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.5949664115905762, "logits_per_char": -0.531655470530192, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 265, "native_id": 1513, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32074883580207825, "incorrect_loss_raw": 1.4973276853561401, "correct_loss_per_char": 0.08018720895051956, "incorrect_loss_per_char": 0.4991092284520467, "correct_loss_per_token": 0.32074883580207825, "incorrect_loss_per_token": 1.4973276853561401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32074883580207825, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.32074883580207825, "logits_per_char": -0.08018720895051956, "num_chars": 4}, {"sum_logits": -1.4973276853561401, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.4973276853561401, "logits_per_char": -0.4991092284520467, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 266, "native_id": 2683, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4947333037853241, "incorrect_loss_raw": 1.1476277112960815, "correct_loss_per_char": 0.12368332594633102, "incorrect_loss_per_char": 0.38254257043202716, "correct_loss_per_token": 0.4947333037853241, "incorrect_loss_per_token": 1.1476277112960815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4947333037853241, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.4947333037853241, "logits_per_char": -0.12368332594633102, "num_chars": 4}, {"sum_logits": -1.1476277112960815, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.1476277112960815, "logits_per_char": -0.38254257043202716, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 267, "native_id": 2459, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46018725633621216, "incorrect_loss_raw": 1.1933131217956543, "correct_loss_per_char": 0.11504681408405304, "incorrect_loss_per_char": 0.39777104059855145, "correct_loss_per_token": 0.46018725633621216, "incorrect_loss_per_token": 1.1933131217956543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46018725633621216, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.46018725633621216, "logits_per_char": -0.11504681408405304, "num_chars": 4}, {"sum_logits": -1.1933131217956543, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.1933131217956543, "logits_per_char": -0.39777104059855145, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 268, "native_id": 1419, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2760988473892212, "incorrect_loss_raw": 1.539175271987915, "correct_loss_per_char": 0.0690247118473053, "incorrect_loss_per_char": 0.5130584239959717, "correct_loss_per_token": 0.2760988473892212, "incorrect_loss_per_token": 1.539175271987915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2760988473892212, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.2760988473892212, "logits_per_char": -0.0690247118473053, "num_chars": 4}, {"sum_logits": -1.539175271987915, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.539175271987915, "logits_per_char": -0.5130584239959717, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 269, "native_id": 844, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4967520236968994, "incorrect_loss_raw": 1.0636646747589111, "correct_loss_per_char": 0.12418800592422485, "incorrect_loss_per_char": 0.3545548915863037, "correct_loss_per_token": 0.4967520236968994, "incorrect_loss_per_token": 1.0636646747589111, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4967520236968994, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.4967520236968994, "logits_per_char": -0.12418800592422485, "num_chars": 4}, {"sum_logits": -1.0636646747589111, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.0636646747589111, "logits_per_char": -0.3545548915863037, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 270, "native_id": 692, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1616169512271881, "incorrect_loss_raw": 2.0943238735198975, "correct_loss_per_char": 0.04040423780679703, "incorrect_loss_per_char": 0.6981079578399658, "correct_loss_per_token": 0.1616169512271881, "incorrect_loss_per_token": 2.0943238735198975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1616169512271881, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.1616169512271881, "logits_per_char": -0.04040423780679703, "num_chars": 4}, {"sum_logits": -2.0943238735198975, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -2.0943238735198975, "logits_per_char": -0.6981079578399658, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 271, "native_id": 2125, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2381376326084137, "incorrect_loss_raw": 1.6652716398239136, "correct_loss_per_char": 0.059534408152103424, "incorrect_loss_per_char": 0.5550905466079712, "correct_loss_per_token": 0.2381376326084137, "incorrect_loss_per_token": 1.6652716398239136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2381376326084137, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.2381376326084137, "logits_per_char": -0.059534408152103424, "num_chars": 4}, {"sum_logits": -1.6652716398239136, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.6652716398239136, "logits_per_char": -0.5550905466079712, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 272, "native_id": 2326, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4435546398162842, "incorrect_loss_raw": 1.1528606414794922, "correct_loss_per_char": 0.11088865995407104, "incorrect_loss_per_char": 0.38428688049316406, "correct_loss_per_token": 0.4435546398162842, "incorrect_loss_per_token": 1.1528606414794922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4435546398162842, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.4435546398162842, "logits_per_char": -0.11088865995407104, "num_chars": 4}, {"sum_logits": -1.1528606414794922, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -1.1528606414794922, "logits_per_char": -0.38428688049316406, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 273, "native_id": 1873, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.13333407044410706, "incorrect_loss_raw": 2.212148666381836, "correct_loss_per_char": 0.033333517611026764, "incorrect_loss_per_char": 0.7373828887939453, "correct_loss_per_token": 0.13333407044410706, "incorrect_loss_per_token": 2.212148666381836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13333407044410706, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.13333407044410706, "logits_per_char": -0.033333517611026764, "num_chars": 4}, {"sum_logits": -2.212148666381836, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -2.212148666381836, "logits_per_char": -0.7373828887939453, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 274, "native_id": 3069, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.12113381922245026, "incorrect_loss_raw": 2.518444538116455, "correct_loss_per_char": 0.030283454805612564, "incorrect_loss_per_char": 0.839481512705485, "correct_loss_per_token": 0.12113381922245026, "incorrect_loss_per_token": 2.518444538116455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12113381922245026, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.12113381922245026, "logits_per_char": -0.030283454805612564, "num_chars": 4}, {"sum_logits": -2.518444538116455, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -2.518444538116455, "logits_per_char": -0.839481512705485, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 275, "native_id": 1943, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22317560017108917, "incorrect_loss_raw": 1.78364098072052, "correct_loss_per_char": 0.05579390004277229, "incorrect_loss_per_char": 0.5945469935735067, "correct_loss_per_token": 0.22317560017108917, "incorrect_loss_per_token": 1.78364098072052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22317560017108917, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.22317560017108917, "logits_per_char": -0.05579390004277229, "num_chars": 4}, {"sum_logits": -1.78364098072052, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.78364098072052, "logits_per_char": -0.5945469935735067, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 276, "native_id": 2702, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8786371350288391, "incorrect_loss_raw": 0.6320585608482361, "correct_loss_per_char": 0.29287904500961304, "incorrect_loss_per_char": 0.15801464021205902, "correct_loss_per_token": 0.8786371350288391, "incorrect_loss_per_token": 0.6320585608482361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6320585608482361, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.6320585608482361, "logits_per_char": -0.15801464021205902, "num_chars": 4}, {"sum_logits": -0.8786371350288391, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.8786371350288391, "logits_per_char": -0.29287904500961304, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 277, "native_id": 115, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2961285412311554, "incorrect_loss_raw": 1.5986460447311401, "correct_loss_per_char": 0.07403213530778885, "incorrect_loss_per_char": 0.53288201491038, "correct_loss_per_token": 0.2961285412311554, "incorrect_loss_per_token": 1.5986460447311401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2961285412311554, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.2961285412311554, "logits_per_char": -0.07403213530778885, "num_chars": 4}, {"sum_logits": -1.5986460447311401, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.5986460447311401, "logits_per_char": -0.53288201491038, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 278, "native_id": 2971, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.119567632675171, "incorrect_loss_raw": 0.4697698950767517, "correct_loss_per_char": 0.37318921089172363, "incorrect_loss_per_char": 0.11744247376918793, "correct_loss_per_token": 1.119567632675171, "incorrect_loss_per_token": 0.4697698950767517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4697698950767517, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.4697698950767517, "logits_per_char": -0.11744247376918793, "num_chars": 4}, {"sum_logits": -1.119567632675171, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.119567632675171, "logits_per_char": -0.37318921089172363, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 279, "native_id": 1916, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.169555425643921, "incorrect_loss_raw": 0.4608432650566101, "correct_loss_per_char": 0.38985180854797363, "incorrect_loss_per_char": 0.11521081626415253, "correct_loss_per_token": 1.169555425643921, "incorrect_loss_per_token": 0.4608432650566101, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4608432650566101, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.4608432650566101, "logits_per_char": -0.11521081626415253, "num_chars": 4}, {"sum_logits": -1.169555425643921, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.169555425643921, "logits_per_char": -0.38985180854797363, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 280, "native_id": 2706, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23027291893959045, "incorrect_loss_raw": 1.7052526473999023, "correct_loss_per_char": 0.057568229734897614, "incorrect_loss_per_char": 0.5684175491333008, "correct_loss_per_token": 0.23027291893959045, "incorrect_loss_per_token": 1.7052526473999023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23027291893959045, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.23027291893959045, "logits_per_char": -0.057568229734897614, "num_chars": 4}, {"sum_logits": -1.7052526473999023, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.7052526473999023, "logits_per_char": -0.5684175491333008, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 281, "native_id": 424, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.10979011654853821, "incorrect_loss_raw": 2.4815545082092285, "correct_loss_per_char": 0.027447529137134552, "incorrect_loss_per_char": 0.8271848360697428, "correct_loss_per_token": 0.10979011654853821, "incorrect_loss_per_token": 2.4815545082092285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.10979011654853821, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.10979011654853821, "logits_per_char": -0.027447529137134552, "num_chars": 4}, {"sum_logits": -2.4815545082092285, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -2.4815545082092285, "logits_per_char": -0.8271848360697428, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 282, "native_id": 110, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2812153398990631, "incorrect_loss_raw": 1.523291826248169, "correct_loss_per_char": 0.07030383497476578, "incorrect_loss_per_char": 0.507763942082723, "correct_loss_per_token": 0.2812153398990631, "incorrect_loss_per_token": 1.523291826248169, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2812153398990631, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.2812153398990631, "logits_per_char": -0.07030383497476578, "num_chars": 4}, {"sum_logits": -1.523291826248169, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -1.523291826248169, "logits_per_char": -0.507763942082723, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 283, "native_id": 1501, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19859269261360168, "incorrect_loss_raw": 1.861402153968811, "correct_loss_per_char": 0.04964817315340042, "incorrect_loss_per_char": 0.6204673846562704, "correct_loss_per_token": 0.19859269261360168, "incorrect_loss_per_token": 1.861402153968811, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19859269261360168, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.19859269261360168, "logits_per_char": -0.04964817315340042, "num_chars": 4}, {"sum_logits": -1.861402153968811, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.861402153968811, "logits_per_char": -0.6204673846562704, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 284, "native_id": 1948, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8151283264160156, "incorrect_loss_raw": 0.64824378490448, "correct_loss_per_char": 0.2717094421386719, "incorrect_loss_per_char": 0.16206094622612, "correct_loss_per_token": 0.8151283264160156, "incorrect_loss_per_token": 0.64824378490448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.64824378490448, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.64824378490448, "logits_per_char": -0.16206094622612, "num_chars": 4}, {"sum_logits": -0.8151283264160156, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -0.8151283264160156, "logits_per_char": -0.2717094421386719, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 285, "native_id": 267, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46109190583229065, "incorrect_loss_raw": 1.1539466381072998, "correct_loss_per_char": 0.11527297645807266, "incorrect_loss_per_char": 0.3846488793690999, "correct_loss_per_token": 0.46109190583229065, "incorrect_loss_per_token": 1.1539466381072998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46109190583229065, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.46109190583229065, "logits_per_char": -0.11527297645807266, "num_chars": 4}, {"sum_logits": -1.1539466381072998, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.1539466381072998, "logits_per_char": -0.3846488793690999, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 286, "native_id": 573, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5825603008270264, "incorrect_loss_raw": 0.9627951383590698, "correct_loss_per_char": 0.1456400752067566, "incorrect_loss_per_char": 0.3209317127863566, "correct_loss_per_token": 0.5825603008270264, "incorrect_loss_per_token": 0.9627951383590698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5825603008270264, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.5825603008270264, "logits_per_char": -0.1456400752067566, "num_chars": 4}, {"sum_logits": -0.9627951383590698, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -0.9627951383590698, "logits_per_char": -0.3209317127863566, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 287, "native_id": 2408, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20621711015701294, "incorrect_loss_raw": 1.923079490661621, "correct_loss_per_char": 0.051554277539253235, "incorrect_loss_per_char": 0.641026496887207, "correct_loss_per_token": 0.20621711015701294, "incorrect_loss_per_token": 1.923079490661621, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20621711015701294, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.20621711015701294, "logits_per_char": -0.051554277539253235, "num_chars": 4}, {"sum_logits": -1.923079490661621, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.923079490661621, "logits_per_char": -0.641026496887207, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 288, "native_id": 1358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6367993354797363, "incorrect_loss_raw": 0.2758767008781433, "correct_loss_per_char": 0.5455997784932455, "incorrect_loss_per_char": 0.06896917521953583, "correct_loss_per_token": 1.6367993354797363, "incorrect_loss_per_token": 0.2758767008781433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2758767008781433, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2758767008781433, "logits_per_char": -0.06896917521953583, "num_chars": 4}, {"sum_logits": -1.6367993354797363, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.6367993354797363, "logits_per_char": -0.5455997784932455, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 289, "native_id": 1429, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6797918081283569, "incorrect_loss_raw": 0.8939486145973206, "correct_loss_per_char": 0.16994795203208923, "incorrect_loss_per_char": 0.2979828715324402, "correct_loss_per_token": 0.6797918081283569, "incorrect_loss_per_token": 0.8939486145973206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6797918081283569, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.6797918081283569, "logits_per_char": -0.16994795203208923, "num_chars": 4}, {"sum_logits": -0.8939486145973206, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -0.8939486145973206, "logits_per_char": -0.2979828715324402, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 290, "native_id": 1186, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5866134762763977, "incorrect_loss_raw": 0.9379738569259644, "correct_loss_per_char": 0.14665336906909943, "incorrect_loss_per_char": 0.3126579523086548, "correct_loss_per_token": 0.5866134762763977, "incorrect_loss_per_token": 0.9379738569259644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5866134762763977, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5866134762763977, "logits_per_char": -0.14665336906909943, "num_chars": 4}, {"sum_logits": -0.9379738569259644, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.9379738569259644, "logits_per_char": -0.3126579523086548, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 291, "native_id": 1223, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47352543473243713, "incorrect_loss_raw": 1.180158019065857, "correct_loss_per_char": 0.11838135868310928, "incorrect_loss_per_char": 0.39338600635528564, "correct_loss_per_token": 0.47352543473243713, "incorrect_loss_per_token": 1.180158019065857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47352543473243713, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.47352543473243713, "logits_per_char": -0.11838135868310928, "num_chars": 4}, {"sum_logits": -1.180158019065857, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.180158019065857, "logits_per_char": -0.39338600635528564, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 292, "native_id": 2791, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.288069486618042, "incorrect_loss_raw": 0.40105968713760376, "correct_loss_per_char": 0.42935649553934735, "incorrect_loss_per_char": 0.10026492178440094, "correct_loss_per_token": 1.288069486618042, "incorrect_loss_per_token": 0.40105968713760376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40105968713760376, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.40105968713760376, "logits_per_char": -0.10026492178440094, "num_chars": 4}, {"sum_logits": -1.288069486618042, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.288069486618042, "logits_per_char": -0.42935649553934735, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 293, "native_id": 2810, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32063794136047363, "incorrect_loss_raw": 1.4237034320831299, "correct_loss_per_char": 0.08015948534011841, "incorrect_loss_per_char": 0.47456781069437665, "correct_loss_per_token": 0.32063794136047363, "incorrect_loss_per_token": 1.4237034320831299, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32063794136047363, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.32063794136047363, "logits_per_char": -0.08015948534011841, "num_chars": 4}, {"sum_logits": -1.4237034320831299, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.4237034320831299, "logits_per_char": -0.47456781069437665, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 294, "native_id": 2388, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3642042279243469, "incorrect_loss_raw": 1.5657484531402588, "correct_loss_per_char": 0.09105105698108673, "incorrect_loss_per_char": 0.5219161510467529, "correct_loss_per_token": 0.3642042279243469, "incorrect_loss_per_token": 1.5657484531402588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3642042279243469, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.3642042279243469, "logits_per_char": -0.09105105698108673, "num_chars": 4}, {"sum_logits": -1.5657484531402588, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -1.5657484531402588, "logits_per_char": -0.5219161510467529, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 295, "native_id": 1354, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5054270029067993, "incorrect_loss_raw": 1.010810375213623, "correct_loss_per_char": 0.12635675072669983, "incorrect_loss_per_char": 0.33693679173787433, "correct_loss_per_token": 0.5054270029067993, "incorrect_loss_per_token": 1.010810375213623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5054270029067993, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5054270029067993, "logits_per_char": -0.12635675072669983, "num_chars": 4}, {"sum_logits": -1.010810375213623, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.010810375213623, "logits_per_char": -0.33693679173787433, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 296, "native_id": 2305, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30478712916374207, "incorrect_loss_raw": 1.4837614297866821, "correct_loss_per_char": 0.07619678229093552, "incorrect_loss_per_char": 0.49458714326222736, "correct_loss_per_token": 0.30478712916374207, "incorrect_loss_per_token": 1.4837614297866821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30478712916374207, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.30478712916374207, "logits_per_char": -0.07619678229093552, "num_chars": 4}, {"sum_logits": -1.4837614297866821, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.4837614297866821, "logits_per_char": -0.49458714326222736, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 297, "native_id": 1203, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3792409598827362, "incorrect_loss_raw": 1.3121280670166016, "correct_loss_per_char": 0.09481023997068405, "incorrect_loss_per_char": 0.4373760223388672, "correct_loss_per_token": 0.3792409598827362, "incorrect_loss_per_token": 1.3121280670166016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3792409598827362, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.3792409598827362, "logits_per_char": -0.09481023997068405, "num_chars": 4}, {"sum_logits": -1.3121280670166016, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.3121280670166016, "logits_per_char": -0.4373760223388672, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 298, "native_id": 2304, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.74942547082901, "incorrect_loss_raw": 0.7011825442314148, "correct_loss_per_char": 0.1873563677072525, "incorrect_loss_per_char": 0.23372751474380493, "correct_loss_per_token": 0.74942547082901, "incorrect_loss_per_token": 0.7011825442314148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.74942547082901, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.74942547082901, "logits_per_char": -0.1873563677072525, "num_chars": 4}, {"sum_logits": -0.7011825442314148, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.7011825442314148, "logits_per_char": -0.23372751474380493, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 299, "native_id": 796, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30894291400909424, "incorrect_loss_raw": 1.4522231817245483, "correct_loss_per_char": 0.07723572850227356, "incorrect_loss_per_char": 0.4840743939081828, "correct_loss_per_token": 0.30894291400909424, "incorrect_loss_per_token": 1.4522231817245483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30894291400909424, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.30894291400909424, "logits_per_char": -0.07723572850227356, "num_chars": 4}, {"sum_logits": -1.4522231817245483, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.4522231817245483, "logits_per_char": -0.4840743939081828, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 300, "native_id": 2085, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48697149753570557, "incorrect_loss_raw": 1.068164587020874, "correct_loss_per_char": 0.16232383251190186, "incorrect_loss_per_char": 0.2670411467552185, "correct_loss_per_token": 0.48697149753570557, "incorrect_loss_per_token": 1.068164587020874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.068164587020874, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -1.068164587020874, "logits_per_char": -0.2670411467552185, "num_chars": 4}, {"sum_logits": -0.48697149753570557, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.48697149753570557, "logits_per_char": -0.16232383251190186, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 301, "native_id": 1142, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3430343270301819, "incorrect_loss_raw": 1.3678205013275146, "correct_loss_per_char": 0.08575858175754547, "incorrect_loss_per_char": 0.45594016710917157, "correct_loss_per_token": 0.3430343270301819, "incorrect_loss_per_token": 1.3678205013275146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3430343270301819, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.3430343270301819, "logits_per_char": -0.08575858175754547, "num_chars": 4}, {"sum_logits": -1.3678205013275146, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3678205013275146, "logits_per_char": -0.45594016710917157, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 302, "native_id": 296, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3519936203956604, "incorrect_loss_raw": 1.3316926956176758, "correct_loss_per_char": 0.0879984050989151, "incorrect_loss_per_char": 0.4438975652058919, "correct_loss_per_token": 0.3519936203956604, "incorrect_loss_per_token": 1.3316926956176758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3519936203956604, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.3519936203956604, "logits_per_char": -0.0879984050989151, "num_chars": 4}, {"sum_logits": -1.3316926956176758, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.3316926956176758, "logits_per_char": -0.4438975652058919, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 303, "native_id": 2187, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2756795287132263, "incorrect_loss_raw": 1.5413446426391602, "correct_loss_per_char": 0.06891988217830658, "incorrect_loss_per_char": 0.5137815475463867, "correct_loss_per_token": 0.2756795287132263, "incorrect_loss_per_token": 1.5413446426391602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2756795287132263, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2756795287132263, "logits_per_char": -0.06891988217830658, "num_chars": 4}, {"sum_logits": -1.5413446426391602, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.5413446426391602, "logits_per_char": -0.5137815475463867, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 304, "native_id": 2840, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5784875750541687, "incorrect_loss_raw": 0.9444687366485596, "correct_loss_per_char": 0.14462189376354218, "incorrect_loss_per_char": 0.3148229122161865, "correct_loss_per_token": 0.5784875750541687, "incorrect_loss_per_token": 0.9444687366485596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5784875750541687, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": true, "logits_per_token": -0.5784875750541687, "logits_per_char": -0.14462189376354218, "num_chars": 4}, {"sum_logits": -0.9444687366485596, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": false, "logits_per_token": -0.9444687366485596, "logits_per_char": -0.3148229122161865, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 305, "native_id": 2466, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2877846360206604, "incorrect_loss_raw": 1.8277881145477295, "correct_loss_per_char": 0.0719461590051651, "incorrect_loss_per_char": 0.6092627048492432, "correct_loss_per_token": 0.2877846360206604, "incorrect_loss_per_token": 1.8277881145477295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2877846360206604, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.2877846360206604, "logits_per_char": -0.0719461590051651, "num_chars": 4}, {"sum_logits": -1.8277881145477295, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -1.8277881145477295, "logits_per_char": -0.6092627048492432, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 306, "native_id": 835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34221893548965454, "incorrect_loss_raw": 1.4727964401245117, "correct_loss_per_char": 0.08555473387241364, "incorrect_loss_per_char": 0.4909321467081706, "correct_loss_per_token": 0.34221893548965454, "incorrect_loss_per_token": 1.4727964401245117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34221893548965454, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.34221893548965454, "logits_per_char": -0.08555473387241364, "num_chars": 4}, {"sum_logits": -1.4727964401245117, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.4727964401245117, "logits_per_char": -0.4909321467081706, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 307, "native_id": 1391, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6114965677261353, "incorrect_loss_raw": 0.2582055628299713, "correct_loss_per_char": 0.5371655225753784, "incorrect_loss_per_char": 0.06455139070749283, "correct_loss_per_token": 1.6114965677261353, "incorrect_loss_per_token": 0.2582055628299713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2582055628299713, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.2582055628299713, "logits_per_char": -0.06455139070749283, "num_chars": 4}, {"sum_logits": -1.6114965677261353, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6114965677261353, "logits_per_char": -0.5371655225753784, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 308, "native_id": 2090, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3562636375427246, "incorrect_loss_raw": 0.34913256764411926, "correct_loss_per_char": 0.4520878791809082, "incorrect_loss_per_char": 0.08728314191102982, "correct_loss_per_token": 1.3562636375427246, "incorrect_loss_per_token": 0.34913256764411926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34913256764411926, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.34913256764411926, "logits_per_char": -0.08728314191102982, "num_chars": 4}, {"sum_logits": -1.3562636375427246, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -1.3562636375427246, "logits_per_char": -0.4520878791809082, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 309, "native_id": 1369, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8337033987045288, "incorrect_loss_raw": 0.6721146106719971, "correct_loss_per_char": 0.2084258496761322, "incorrect_loss_per_char": 0.22403820355733237, "correct_loss_per_token": 0.8337033987045288, "incorrect_loss_per_token": 0.6721146106719971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8337033987045288, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -0.8337033987045288, "logits_per_char": -0.2084258496761322, "num_chars": 4}, {"sum_logits": -0.6721146106719971, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.6721146106719971, "logits_per_char": -0.22403820355733237, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 310, "native_id": 1315, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19409309327602386, "incorrect_loss_raw": 1.928188681602478, "correct_loss_per_char": 0.048523273319005966, "incorrect_loss_per_char": 0.6427295605341593, "correct_loss_per_token": 0.19409309327602386, "incorrect_loss_per_token": 1.928188681602478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19409309327602386, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.19409309327602386, "logits_per_char": -0.048523273319005966, "num_chars": 4}, {"sum_logits": -1.928188681602478, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.928188681602478, "logits_per_char": -0.6427295605341593, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 311, "native_id": 1876, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2431325912475586, "incorrect_loss_raw": 0.40546566247940063, "correct_loss_per_char": 0.41437753041585285, "incorrect_loss_per_char": 0.10136641561985016, "correct_loss_per_token": 1.2431325912475586, "incorrect_loss_per_token": 0.40546566247940063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40546566247940063, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": true, "logits_per_token": -0.40546566247940063, "logits_per_char": -0.10136641561985016, "num_chars": 4}, {"sum_logits": -1.2431325912475586, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.2431325912475586, "logits_per_char": -0.41437753041585285, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 312, "native_id": 1095, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18437495827674866, "incorrect_loss_raw": 2.0236005783081055, "correct_loss_per_char": 0.046093739569187164, "incorrect_loss_per_char": 0.6745335261027018, "correct_loss_per_token": 0.18437495827674866, "incorrect_loss_per_token": 2.0236005783081055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18437495827674866, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.18437495827674866, "logits_per_char": -0.046093739569187164, "num_chars": 4}, {"sum_logits": -2.0236005783081055, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -2.0236005783081055, "logits_per_char": -0.6745335261027018, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 313, "native_id": 347, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38627615571022034, "incorrect_loss_raw": 1.5041675567626953, "correct_loss_per_char": 0.09656903892755508, "incorrect_loss_per_char": 0.5013891855875651, "correct_loss_per_token": 0.38627615571022034, "incorrect_loss_per_token": 1.5041675567626953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38627615571022034, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.38627615571022034, "logits_per_char": -0.09656903892755508, "num_chars": 4}, {"sum_logits": -1.5041675567626953, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.5041675567626953, "logits_per_char": -0.5013891855875651, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 314, "native_id": 2159, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6281907558441162, "incorrect_loss_raw": 1.2987022399902344, "correct_loss_per_char": 0.15704768896102905, "incorrect_loss_per_char": 0.43290074666341144, "correct_loss_per_token": 0.6281907558441162, "incorrect_loss_per_token": 1.2987022399902344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6281907558441162, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -0.6281907558441162, "logits_per_char": -0.15704768896102905, "num_chars": 4}, {"sum_logits": -1.2987022399902344, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.2987022399902344, "logits_per_char": -0.43290074666341144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 315, "native_id": 2413, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37560033798217773, "incorrect_loss_raw": 1.2859854698181152, "correct_loss_per_char": 0.09390008449554443, "incorrect_loss_per_char": 0.4286618232727051, "correct_loss_per_token": 0.37560033798217773, "incorrect_loss_per_token": 1.2859854698181152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37560033798217773, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.37560033798217773, "logits_per_char": -0.09390008449554443, "num_chars": 4}, {"sum_logits": -1.2859854698181152, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -1.2859854698181152, "logits_per_char": -0.4286618232727051, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 316, "native_id": 2386, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24815525114536285, "incorrect_loss_raw": 1.6818517446517944, "correct_loss_per_char": 0.062038812786340714, "incorrect_loss_per_char": 0.5606172482172648, "correct_loss_per_token": 0.24815525114536285, "incorrect_loss_per_token": 1.6818517446517944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24815525114536285, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.24815525114536285, "logits_per_char": -0.062038812786340714, "num_chars": 4}, {"sum_logits": -1.6818517446517944, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -1.6818517446517944, "logits_per_char": -0.5606172482172648, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 317, "native_id": 2245, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6771256923675537, "incorrect_loss_raw": 0.2658306956291199, "correct_loss_per_char": 0.5590418974558512, "incorrect_loss_per_char": 0.06645767390727997, "correct_loss_per_token": 1.6771256923675537, "incorrect_loss_per_token": 0.2658306956291199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2658306956291199, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.2658306956291199, "logits_per_char": -0.06645767390727997, "num_chars": 4}, {"sum_logits": -1.6771256923675537, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.6771256923675537, "logits_per_char": -0.5590418974558512, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 318, "native_id": 3147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1311398148536682, "incorrect_loss_raw": 2.3697962760925293, "correct_loss_per_char": 0.03278495371341705, "incorrect_loss_per_char": 0.7899320920308431, "correct_loss_per_token": 0.1311398148536682, "incorrect_loss_per_token": 2.3697962760925293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1311398148536682, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.1311398148536682, "logits_per_char": -0.03278495371341705, "num_chars": 4}, {"sum_logits": -2.3697962760925293, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -2.3697962760925293, "logits_per_char": -0.7899320920308431, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 319, "native_id": 1004, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28360623121261597, "incorrect_loss_raw": 1.5396983623504639, "correct_loss_per_char": 0.07090155780315399, "incorrect_loss_per_char": 0.5132327874501547, "correct_loss_per_token": 0.28360623121261597, "incorrect_loss_per_token": 1.5396983623504639, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28360623121261597, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.28360623121261597, "logits_per_char": -0.07090155780315399, "num_chars": 4}, {"sum_logits": -1.5396983623504639, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.5396983623504639, "logits_per_char": -0.5132327874501547, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 320, "native_id": 1053, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1360896825790405, "incorrect_loss_raw": 0.4436396360397339, "correct_loss_per_char": 0.3786965608596802, "incorrect_loss_per_char": 0.11090990900993347, "correct_loss_per_token": 1.1360896825790405, "incorrect_loss_per_token": 0.4436396360397339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4436396360397339, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.4436396360397339, "logits_per_char": -0.11090990900993347, "num_chars": 4}, {"sum_logits": -1.1360896825790405, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.1360896825790405, "logits_per_char": -0.3786965608596802, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 321, "native_id": 1523, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.28711998462677, "incorrect_loss_raw": 0.3925788104534149, "correct_loss_per_char": 0.42903999487559, "incorrect_loss_per_char": 0.09814470261335373, "correct_loss_per_token": 1.28711998462677, "incorrect_loss_per_token": 0.3925788104534149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3925788104534149, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.3925788104534149, "logits_per_char": -0.09814470261335373, "num_chars": 4}, {"sum_logits": -1.28711998462677, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.28711998462677, "logits_per_char": -0.42903999487559, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 322, "native_id": 561, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3811921179294586, "incorrect_loss_raw": 1.394118309020996, "correct_loss_per_char": 0.09529802948236465, "incorrect_loss_per_char": 0.4647061030069987, "correct_loss_per_token": 0.3811921179294586, "incorrect_loss_per_token": 1.394118309020996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3811921179294586, "num_tokens": 1, "num_tokens_all": 1430, "is_greedy": true, "logits_per_token": -0.3811921179294586, "logits_per_char": -0.09529802948236465, "num_chars": 4}, {"sum_logits": -1.394118309020996, "num_tokens": 1, "num_tokens_all": 1430, "is_greedy": false, "logits_per_token": -1.394118309020996, "logits_per_char": -0.4647061030069987, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 323, "native_id": 116, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38723084330558777, "incorrect_loss_raw": 1.414072036743164, "correct_loss_per_char": 0.09680771082639694, "incorrect_loss_per_char": 0.4713573455810547, "correct_loss_per_token": 0.38723084330558777, "incorrect_loss_per_token": 1.414072036743164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38723084330558777, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.38723084330558777, "logits_per_char": -0.09680771082639694, "num_chars": 4}, {"sum_logits": -1.414072036743164, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.414072036743164, "logits_per_char": -0.4713573455810547, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 324, "native_id": 1616, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20783212780952454, "incorrect_loss_raw": 1.8140904903411865, "correct_loss_per_char": 0.051958031952381134, "incorrect_loss_per_char": 0.6046968301137289, "correct_loss_per_token": 0.20783212780952454, "incorrect_loss_per_token": 1.8140904903411865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20783212780952454, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.20783212780952454, "logits_per_char": -0.051958031952381134, "num_chars": 4}, {"sum_logits": -1.8140904903411865, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.8140904903411865, "logits_per_char": -0.6046968301137289, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 325, "native_id": 153, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.887595772743225, "incorrect_loss_raw": 0.21280713379383087, "correct_loss_per_char": 0.6291985909144083, "incorrect_loss_per_char": 0.05320178344845772, "correct_loss_per_token": 1.887595772743225, "incorrect_loss_per_token": 0.21280713379383087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21280713379383087, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.21280713379383087, "logits_per_char": -0.05320178344845772, "num_chars": 4}, {"sum_logits": -1.887595772743225, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.887595772743225, "logits_per_char": -0.6291985909144083, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 326, "native_id": 2722, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.45798951387405396, "incorrect_loss_raw": 1.1066792011260986, "correct_loss_per_char": 0.11449737846851349, "incorrect_loss_per_char": 0.3688930670420329, "correct_loss_per_token": 0.45798951387405396, "incorrect_loss_per_token": 1.1066792011260986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45798951387405396, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.45798951387405396, "logits_per_char": -0.11449737846851349, "num_chars": 4}, {"sum_logits": -1.1066792011260986, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.1066792011260986, "logits_per_char": -0.3688930670420329, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 327, "native_id": 180, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32076770067214966, "incorrect_loss_raw": 1.400118112564087, "correct_loss_per_char": 0.08019192516803741, "incorrect_loss_per_char": 0.4667060375213623, "correct_loss_per_token": 0.32076770067214966, "incorrect_loss_per_token": 1.400118112564087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32076770067214966, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.32076770067214966, "logits_per_char": -0.08019192516803741, "num_chars": 4}, {"sum_logits": -1.400118112564087, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.400118112564087, "logits_per_char": -0.4667060375213623, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 328, "native_id": 854, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2634260952472687, "incorrect_loss_raw": 1.5975780487060547, "correct_loss_per_char": 0.06585652381181717, "incorrect_loss_per_char": 0.5325260162353516, "correct_loss_per_token": 0.2634260952472687, "incorrect_loss_per_token": 1.5975780487060547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2634260952472687, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.2634260952472687, "logits_per_char": -0.06585652381181717, "num_chars": 4}, {"sum_logits": -1.5975780487060547, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.5975780487060547, "logits_per_char": -0.5325260162353516, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 329, "native_id": 2730, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22126714885234833, "incorrect_loss_raw": 1.763210654258728, "correct_loss_per_char": 0.05531678721308708, "incorrect_loss_per_char": 0.5877368847529093, "correct_loss_per_token": 0.22126714885234833, "incorrect_loss_per_token": 1.763210654258728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22126714885234833, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.22126714885234833, "logits_per_char": -0.05531678721308708, "num_chars": 4}, {"sum_logits": -1.763210654258728, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.763210654258728, "logits_per_char": -0.5877368847529093, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 330, "native_id": 3131, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2721731960773468, "incorrect_loss_raw": 1.6927188634872437, "correct_loss_per_char": 0.0680432990193367, "incorrect_loss_per_char": 0.5642396211624146, "correct_loss_per_token": 0.2721731960773468, "incorrect_loss_per_token": 1.6927188634872437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2721731960773468, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.2721731960773468, "logits_per_char": -0.0680432990193367, "num_chars": 4}, {"sum_logits": -1.6927188634872437, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.6927188634872437, "logits_per_char": -0.5642396211624146, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 331, "native_id": 1282, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20143292844295502, "incorrect_loss_raw": 1.8365081548690796, "correct_loss_per_char": 0.050358232110738754, "incorrect_loss_per_char": 0.6121693849563599, "correct_loss_per_token": 0.20143292844295502, "incorrect_loss_per_token": 1.8365081548690796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20143292844295502, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.20143292844295502, "logits_per_char": -0.050358232110738754, "num_chars": 4}, {"sum_logits": -1.8365081548690796, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.8365081548690796, "logits_per_char": -0.6121693849563599, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 332, "native_id": 2112, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4718694686889648, "incorrect_loss_raw": 0.3036839962005615, "correct_loss_per_char": 0.49062315622965497, "incorrect_loss_per_char": 0.07592099905014038, "correct_loss_per_token": 1.4718694686889648, "incorrect_loss_per_token": 0.3036839962005615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3036839962005615, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.3036839962005615, "logits_per_char": -0.07592099905014038, "num_chars": 4}, {"sum_logits": -1.4718694686889648, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.4718694686889648, "logits_per_char": -0.49062315622965497, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 333, "native_id": 3219, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37688055634498596, "incorrect_loss_raw": 1.3371421098709106, "correct_loss_per_char": 0.09422013908624649, "incorrect_loss_per_char": 0.4457140366236369, "correct_loss_per_token": 0.37688055634498596, "incorrect_loss_per_token": 1.3371421098709106, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37688055634498596, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": true, "logits_per_token": -0.37688055634498596, "logits_per_char": -0.09422013908624649, "num_chars": 4}, {"sum_logits": -1.3371421098709106, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": false, "logits_per_token": -1.3371421098709106, "logits_per_char": -0.4457140366236369, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 334, "native_id": 1779, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25723621249198914, "incorrect_loss_raw": 1.6451444625854492, "correct_loss_per_char": 0.06430905312299728, "incorrect_loss_per_char": 0.548381487528483, "correct_loss_per_token": 0.25723621249198914, "incorrect_loss_per_token": 1.6451444625854492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25723621249198914, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.25723621249198914, "logits_per_char": -0.06430905312299728, "num_chars": 4}, {"sum_logits": -1.6451444625854492, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.6451444625854492, "logits_per_char": -0.548381487528483, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 335, "native_id": 2110, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27349117398262024, "incorrect_loss_raw": 1.6807347536087036, "correct_loss_per_char": 0.06837279349565506, "incorrect_loss_per_char": 0.5602449178695679, "correct_loss_per_token": 0.27349117398262024, "incorrect_loss_per_token": 1.6807347536087036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27349117398262024, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.27349117398262024, "logits_per_char": -0.06837279349565506, "num_chars": 4}, {"sum_logits": -1.6807347536087036, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.6807347536087036, "logits_per_char": -0.5602449178695679, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 336, "native_id": 282, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.609832525253296, "incorrect_loss_raw": 0.29460909962654114, "correct_loss_per_char": 0.5366108417510986, "incorrect_loss_per_char": 0.07365227490663528, "correct_loss_per_token": 1.609832525253296, "incorrect_loss_per_token": 0.29460909962654114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29460909962654114, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.29460909962654114, "logits_per_char": -0.07365227490663528, "num_chars": 4}, {"sum_logits": -1.609832525253296, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.609832525253296, "logits_per_char": -0.5366108417510986, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 337, "native_id": 1249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5804117918014526, "incorrect_loss_raw": 0.9616339206695557, "correct_loss_per_char": 0.14510294795036316, "incorrect_loss_per_char": 0.32054464022318524, "correct_loss_per_token": 0.5804117918014526, "incorrect_loss_per_token": 0.9616339206695557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5804117918014526, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.5804117918014526, "logits_per_char": -0.14510294795036316, "num_chars": 4}, {"sum_logits": -0.9616339206695557, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9616339206695557, "logits_per_char": -0.32054464022318524, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 338, "native_id": 1070, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19855506718158722, "incorrect_loss_raw": 1.9785374402999878, "correct_loss_per_char": 0.049638766795396805, "incorrect_loss_per_char": 0.659512480099996, "correct_loss_per_token": 0.19855506718158722, "incorrect_loss_per_token": 1.9785374402999878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19855506718158722, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.19855506718158722, "logits_per_char": -0.049638766795396805, "num_chars": 4}, {"sum_logits": -1.9785374402999878, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.9785374402999878, "logits_per_char": -0.659512480099996, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 339, "native_id": 2859, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14848142862319946, "incorrect_loss_raw": 2.2983293533325195, "correct_loss_per_char": 0.037120357155799866, "incorrect_loss_per_char": 0.7661097844441732, "correct_loss_per_token": 0.14848142862319946, "incorrect_loss_per_token": 2.2983293533325195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14848142862319946, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.14848142862319946, "logits_per_char": -0.037120357155799866, "num_chars": 4}, {"sum_logits": -2.2983293533325195, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -2.2983293533325195, "logits_per_char": -0.7661097844441732, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 340, "native_id": 1988, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5909079313278198, "incorrect_loss_raw": 0.9340674877166748, "correct_loss_per_char": 0.14772698283195496, "incorrect_loss_per_char": 0.3113558292388916, "correct_loss_per_token": 0.5909079313278198, "incorrect_loss_per_token": 0.9340674877166748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5909079313278198, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.5909079313278198, "logits_per_char": -0.14772698283195496, "num_chars": 4}, {"sum_logits": -0.9340674877166748, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -0.9340674877166748, "logits_per_char": -0.3113558292388916, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 341, "native_id": 2374, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1307082176208496, "incorrect_loss_raw": 2.363327741622925, "correct_loss_per_char": 0.0326770544052124, "incorrect_loss_per_char": 0.7877759138743082, "correct_loss_per_token": 0.1307082176208496, "incorrect_loss_per_token": 2.363327741622925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1307082176208496, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.1307082176208496, "logits_per_char": -0.0326770544052124, "num_chars": 4}, {"sum_logits": -2.363327741622925, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -2.363327741622925, "logits_per_char": -0.7877759138743082, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 342, "native_id": 899, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1169822216033936, "incorrect_loss_raw": 0.14769448339939117, "correct_loss_per_char": 0.7056607405344645, "incorrect_loss_per_char": 0.036923620849847794, "correct_loss_per_token": 2.1169822216033936, "incorrect_loss_per_token": 0.14769448339939117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14769448339939117, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.14769448339939117, "logits_per_char": -0.036923620849847794, "num_chars": 4}, {"sum_logits": -2.1169822216033936, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -2.1169822216033936, "logits_per_char": -0.7056607405344645, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 343, "native_id": 1424, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23705731332302094, "incorrect_loss_raw": 1.6788519620895386, "correct_loss_per_char": 0.059264328330755234, "incorrect_loss_per_char": 0.5596173206965128, "correct_loss_per_token": 0.23705731332302094, "incorrect_loss_per_token": 1.6788519620895386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23705731332302094, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": true, "logits_per_token": -0.23705731332302094, "logits_per_char": -0.059264328330755234, "num_chars": 4}, {"sum_logits": -1.6788519620895386, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": false, "logits_per_token": -1.6788519620895386, "logits_per_char": -0.5596173206965128, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 344, "native_id": 2065, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.586599588394165, "incorrect_loss_raw": 0.934352457523346, "correct_loss_per_char": 0.14664989709854126, "incorrect_loss_per_char": 0.31145081917444867, "correct_loss_per_token": 0.586599588394165, "incorrect_loss_per_token": 0.934352457523346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.586599588394165, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.586599588394165, "logits_per_char": -0.14664989709854126, "num_chars": 4}, {"sum_logits": -0.934352457523346, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -0.934352457523346, "logits_per_char": -0.31145081917444867, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 345, "native_id": 339, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.945064902305603, "incorrect_loss_raw": 0.5735630393028259, "correct_loss_per_char": 0.3150216341018677, "incorrect_loss_per_char": 0.14339075982570648, "correct_loss_per_token": 0.945064902305603, "incorrect_loss_per_token": 0.5735630393028259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5735630393028259, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.5735630393028259, "logits_per_char": -0.14339075982570648, "num_chars": 4}, {"sum_logits": -0.945064902305603, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -0.945064902305603, "logits_per_char": -0.3150216341018677, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 346, "native_id": 2675, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2953450679779053, "incorrect_loss_raw": 1.573573350906372, "correct_loss_per_char": 0.07383626699447632, "incorrect_loss_per_char": 0.524524450302124, "correct_loss_per_token": 0.2953450679779053, "incorrect_loss_per_token": 1.573573350906372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2953450679779053, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.2953450679779053, "logits_per_char": -0.07383626699447632, "num_chars": 4}, {"sum_logits": -1.573573350906372, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.573573350906372, "logits_per_char": -0.524524450302124, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 347, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.13792191445827484, "incorrect_loss_raw": 2.301421880722046, "correct_loss_per_char": 0.03448047861456871, "incorrect_loss_per_char": 0.7671406269073486, "correct_loss_per_token": 0.13792191445827484, "incorrect_loss_per_token": 2.301421880722046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13792191445827484, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.13792191445827484, "logits_per_char": -0.03448047861456871, "num_chars": 4}, {"sum_logits": -2.301421880722046, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -2.301421880722046, "logits_per_char": -0.7671406269073486, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 348, "native_id": 3013, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6053667068481445, "incorrect_loss_raw": 0.2680150866508484, "correct_loss_per_char": 0.5351222356160482, "incorrect_loss_per_char": 0.0670037716627121, "correct_loss_per_token": 1.6053667068481445, "incorrect_loss_per_token": 0.2680150866508484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2680150866508484, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.2680150866508484, "logits_per_char": -0.0670037716627121, "num_chars": 4}, {"sum_logits": -1.6053667068481445, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.6053667068481445, "logits_per_char": -0.5351222356160482, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 349, "native_id": 3111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7182661294937134, "incorrect_loss_raw": 0.25781533122062683, "correct_loss_per_char": 0.5727553764979044, "incorrect_loss_per_char": 0.06445383280515671, "correct_loss_per_token": 1.7182661294937134, "incorrect_loss_per_token": 0.25781533122062683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25781533122062683, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": true, "logits_per_token": -0.25781533122062683, "logits_per_char": -0.06445383280515671, "num_chars": 4}, {"sum_logits": -1.7182661294937134, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": false, "logits_per_token": -1.7182661294937134, "logits_per_char": -0.5727553764979044, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 350, "native_id": 1356, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.429748296737671, "incorrect_loss_raw": 0.3176504969596863, "correct_loss_per_char": 0.47658276557922363, "incorrect_loss_per_char": 0.07941262423992157, "correct_loss_per_token": 1.429748296737671, "incorrect_loss_per_token": 0.3176504969596863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3176504969596863, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.3176504969596863, "logits_per_char": -0.07941262423992157, "num_chars": 4}, {"sum_logits": -1.429748296737671, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.429748296737671, "logits_per_char": -0.47658276557922363, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 351, "native_id": 2310, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6640493869781494, "incorrect_loss_raw": 0.3015107214450836, "correct_loss_per_char": 0.5546831289927164, "incorrect_loss_per_char": 0.0753776803612709, "correct_loss_per_token": 1.6640493869781494, "incorrect_loss_per_token": 0.3015107214450836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3015107214450836, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.3015107214450836, "logits_per_char": -0.0753776803612709, "num_chars": 4}, {"sum_logits": -1.6640493869781494, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.6640493869781494, "logits_per_char": -0.5546831289927164, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 352, "native_id": 940, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1987542361021042, "incorrect_loss_raw": 1.9630250930786133, "correct_loss_per_char": 0.04968855902552605, "incorrect_loss_per_char": 0.6543416976928711, "correct_loss_per_token": 0.1987542361021042, "incorrect_loss_per_token": 1.9630250930786133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1987542361021042, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.1987542361021042, "logits_per_char": -0.04968855902552605, "num_chars": 4}, {"sum_logits": -1.9630250930786133, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.9630250930786133, "logits_per_char": -0.6543416976928711, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 353, "native_id": 665, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1642613410949707, "incorrect_loss_raw": 0.4547513723373413, "correct_loss_per_char": 0.38808711369832355, "incorrect_loss_per_char": 0.11368784308433533, "correct_loss_per_token": 1.1642613410949707, "incorrect_loss_per_token": 0.4547513723373413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4547513723373413, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.4547513723373413, "logits_per_char": -0.11368784308433533, "num_chars": 4}, {"sum_logits": -1.1642613410949707, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.1642613410949707, "logits_per_char": -0.38808711369832355, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 354, "native_id": 3008, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32412731647491455, "incorrect_loss_raw": 1.442376732826233, "correct_loss_per_char": 0.08103182911872864, "incorrect_loss_per_char": 0.48079224427541095, "correct_loss_per_token": 0.32412731647491455, "incorrect_loss_per_token": 1.442376732826233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32412731647491455, "num_tokens": 1, "num_tokens_all": 858, "is_greedy": true, "logits_per_token": -0.32412731647491455, "logits_per_char": -0.08103182911872864, "num_chars": 4}, {"sum_logits": -1.442376732826233, "num_tokens": 1, "num_tokens_all": 858, "is_greedy": false, "logits_per_token": -1.442376732826233, "logits_per_char": -0.48079224427541095, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 355, "native_id": 2045, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0281733274459839, "incorrect_loss_raw": 0.5365083813667297, "correct_loss_per_char": 0.34272444248199463, "incorrect_loss_per_char": 0.13412709534168243, "correct_loss_per_token": 1.0281733274459839, "incorrect_loss_per_token": 0.5365083813667297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5365083813667297, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.5365083813667297, "logits_per_char": -0.13412709534168243, "num_chars": 4}, {"sum_logits": -1.0281733274459839, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.0281733274459839, "logits_per_char": -0.34272444248199463, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 356, "native_id": 2805, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20385664701461792, "incorrect_loss_raw": 2.2800400257110596, "correct_loss_per_char": 0.05096416175365448, "incorrect_loss_per_char": 0.7600133419036865, "correct_loss_per_token": 0.20385664701461792, "incorrect_loss_per_token": 2.2800400257110596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20385664701461792, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.20385664701461792, "logits_per_char": -0.05096416175365448, "num_chars": 4}, {"sum_logits": -2.2800400257110596, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -2.2800400257110596, "logits_per_char": -0.7600133419036865, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 357, "native_id": 2767, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20253926515579224, "incorrect_loss_raw": 1.9452824592590332, "correct_loss_per_char": 0.05063481628894806, "incorrect_loss_per_char": 0.6484274864196777, "correct_loss_per_token": 0.20253926515579224, "incorrect_loss_per_token": 1.9452824592590332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20253926515579224, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.20253926515579224, "logits_per_char": -0.05063481628894806, "num_chars": 4}, {"sum_logits": -1.9452824592590332, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.9452824592590332, "logits_per_char": -0.6484274864196777, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 358, "native_id": 2983, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1607288122177124, "incorrect_loss_raw": 0.40989914536476135, "correct_loss_per_char": 0.3869096040725708, "incorrect_loss_per_char": 0.10247478634119034, "correct_loss_per_token": 1.1607288122177124, "incorrect_loss_per_token": 0.40989914536476135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40989914536476135, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.40989914536476135, "logits_per_char": -0.10247478634119034, "num_chars": 4}, {"sum_logits": -1.1607288122177124, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.1607288122177124, "logits_per_char": -0.3869096040725708, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 359, "native_id": 2180, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2616783678531647, "incorrect_loss_raw": 1.6795589923858643, "correct_loss_per_char": 0.06541959196329117, "incorrect_loss_per_char": 0.5598529974619547, "correct_loss_per_token": 0.2616783678531647, "incorrect_loss_per_token": 1.6795589923858643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2616783678531647, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.2616783678531647, "logits_per_char": -0.06541959196329117, "num_chars": 4}, {"sum_logits": -1.6795589923858643, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.6795589923858643, "logits_per_char": -0.5598529974619547, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 360, "native_id": 2550, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30462589859962463, "incorrect_loss_raw": 1.5797401666641235, "correct_loss_per_char": 0.07615647464990616, "incorrect_loss_per_char": 0.5265800555547079, "correct_loss_per_token": 0.30462589859962463, "incorrect_loss_per_token": 1.5797401666641235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30462589859962463, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.30462589859962463, "logits_per_char": -0.07615647464990616, "num_chars": 4}, {"sum_logits": -1.5797401666641235, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.5797401666641235, "logits_per_char": -0.5265800555547079, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 361, "native_id": 2538, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5611757040023804, "incorrect_loss_raw": 0.29952383041381836, "correct_loss_per_char": 0.5203919013341268, "incorrect_loss_per_char": 0.07488095760345459, "correct_loss_per_token": 1.5611757040023804, "incorrect_loss_per_token": 0.29952383041381836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29952383041381836, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.29952383041381836, "logits_per_char": -0.07488095760345459, "num_chars": 4}, {"sum_logits": -1.5611757040023804, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5611757040023804, "logits_per_char": -0.5203919013341268, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 362, "native_id": 279, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22515064477920532, "incorrect_loss_raw": 2.013728141784668, "correct_loss_per_char": 0.05628766119480133, "incorrect_loss_per_char": 0.6712427139282227, "correct_loss_per_token": 0.22515064477920532, "incorrect_loss_per_token": 2.013728141784668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22515064477920532, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.22515064477920532, "logits_per_char": -0.05628766119480133, "num_chars": 4}, {"sum_logits": -2.013728141784668, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -2.013728141784668, "logits_per_char": -0.6712427139282227, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 363, "native_id": 596, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40958189964294434, "incorrect_loss_raw": 1.2928099632263184, "correct_loss_per_char": 0.10239547491073608, "incorrect_loss_per_char": 0.43093665440877277, "correct_loss_per_token": 0.40958189964294434, "incorrect_loss_per_token": 1.2928099632263184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40958189964294434, "num_tokens": 1, "num_tokens_all": 1196, "is_greedy": true, "logits_per_token": -0.40958189964294434, "logits_per_char": -0.10239547491073608, "num_chars": 4}, {"sum_logits": -1.2928099632263184, "num_tokens": 1, "num_tokens_all": 1196, "is_greedy": false, "logits_per_token": -1.2928099632263184, "logits_per_char": -0.43093665440877277, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 364, "native_id": 2176, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3849904537200928, "incorrect_loss_raw": 0.36439138650894165, "correct_loss_per_char": 0.46166348457336426, "incorrect_loss_per_char": 0.09109784662723541, "correct_loss_per_token": 1.3849904537200928, "incorrect_loss_per_token": 0.36439138650894165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36439138650894165, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.36439138650894165, "logits_per_char": -0.09109784662723541, "num_chars": 4}, {"sum_logits": -1.3849904537200928, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.3849904537200928, "logits_per_char": -0.46166348457336426, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 365, "native_id": 996, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.489330768585205, "incorrect_loss_raw": 0.3034239709377289, "correct_loss_per_char": 0.4964435895284017, "incorrect_loss_per_char": 0.07585599273443222, "correct_loss_per_token": 1.489330768585205, "incorrect_loss_per_token": 0.3034239709377289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3034239709377289, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.3034239709377289, "logits_per_char": -0.07585599273443222, "num_chars": 4}, {"sum_logits": -1.489330768585205, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.489330768585205, "logits_per_char": -0.4964435895284017, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 366, "native_id": 2820, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27541646361351013, "incorrect_loss_raw": 1.6684492826461792, "correct_loss_per_char": 0.06885411590337753, "incorrect_loss_per_char": 0.5561497608820597, "correct_loss_per_token": 0.27541646361351013, "incorrect_loss_per_token": 1.6684492826461792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27541646361351013, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.27541646361351013, "logits_per_char": -0.06885411590337753, "num_chars": 4}, {"sum_logits": -1.6684492826461792, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.6684492826461792, "logits_per_char": -0.5561497608820597, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 367, "native_id": 672, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3924505412578583, "incorrect_loss_raw": 1.2414664030075073, "correct_loss_per_char": 0.09811263531446457, "incorrect_loss_per_char": 0.41382213433583576, "correct_loss_per_token": 0.3924505412578583, "incorrect_loss_per_token": 1.2414664030075073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3924505412578583, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.3924505412578583, "logits_per_char": -0.09811263531446457, "num_chars": 4}, {"sum_logits": -1.2414664030075073, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.2414664030075073, "logits_per_char": -0.41382213433583576, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 368, "native_id": 2074, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5578339099884033, "incorrect_loss_raw": 0.9601314067840576, "correct_loss_per_char": 0.13945847749710083, "incorrect_loss_per_char": 0.32004380226135254, "correct_loss_per_token": 0.5578339099884033, "incorrect_loss_per_token": 0.9601314067840576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5578339099884033, "num_tokens": 1, "num_tokens_all": 1291, "is_greedy": true, "logits_per_token": -0.5578339099884033, "logits_per_char": -0.13945847749710083, "num_chars": 4}, {"sum_logits": -0.9601314067840576, "num_tokens": 1, "num_tokens_all": 1291, "is_greedy": false, "logits_per_token": -0.9601314067840576, "logits_per_char": -0.32004380226135254, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 369, "native_id": 2068, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36610662937164307, "incorrect_loss_raw": 1.2963004112243652, "correct_loss_per_char": 0.09152665734291077, "incorrect_loss_per_char": 0.4321001370747884, "correct_loss_per_token": 0.36610662937164307, "incorrect_loss_per_token": 1.2963004112243652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36610662937164307, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.36610662937164307, "logits_per_char": -0.09152665734291077, "num_chars": 4}, {"sum_logits": -1.2963004112243652, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.2963004112243652, "logits_per_char": -0.4321001370747884, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 370, "native_id": 2831, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20217189192771912, "incorrect_loss_raw": 1.8597348928451538, "correct_loss_per_char": 0.05054297298192978, "incorrect_loss_per_char": 0.6199116309483846, "correct_loss_per_token": 0.20217189192771912, "incorrect_loss_per_token": 1.8597348928451538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20217189192771912, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.20217189192771912, "logits_per_char": -0.05054297298192978, "num_chars": 4}, {"sum_logits": -1.8597348928451538, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.8597348928451538, "logits_per_char": -0.6199116309483846, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 371, "native_id": 1610, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3364039957523346, "incorrect_loss_raw": 1.467586874961853, "correct_loss_per_char": 0.08410099893808365, "incorrect_loss_per_char": 0.48919562498728436, "correct_loss_per_token": 0.3364039957523346, "incorrect_loss_per_token": 1.467586874961853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3364039957523346, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.3364039957523346, "logits_per_char": -0.08410099893808365, "num_chars": 4}, {"sum_logits": -1.467586874961853, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.467586874961853, "logits_per_char": -0.48919562498728436, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 372, "native_id": 1337, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24267414212226868, "incorrect_loss_raw": 1.804399013519287, "correct_loss_per_char": 0.06066853553056717, "incorrect_loss_per_char": 0.6014663378397623, "correct_loss_per_token": 0.24267414212226868, "incorrect_loss_per_token": 1.804399013519287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24267414212226868, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.24267414212226868, "logits_per_char": -0.06066853553056717, "num_chars": 4}, {"sum_logits": -1.804399013519287, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.804399013519287, "logits_per_char": -0.6014663378397623, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 373, "native_id": 528, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19960874319076538, "incorrect_loss_raw": 1.8575830459594727, "correct_loss_per_char": 0.049902185797691345, "incorrect_loss_per_char": 0.6191943486531576, "correct_loss_per_token": 0.19960874319076538, "incorrect_loss_per_token": 1.8575830459594727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19960874319076538, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.19960874319076538, "logits_per_char": -0.049902185797691345, "num_chars": 4}, {"sum_logits": -1.8575830459594727, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.8575830459594727, "logits_per_char": -0.6191943486531576, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 374, "native_id": 2300, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5481300354003906, "incorrect_loss_raw": 0.27644675970077515, "correct_loss_per_char": 0.5160433451334635, "incorrect_loss_per_char": 0.06911168992519379, "correct_loss_per_token": 1.5481300354003906, "incorrect_loss_per_token": 0.27644675970077515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27644675970077515, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.27644675970077515, "logits_per_char": -0.06911168992519379, "num_chars": 4}, {"sum_logits": -1.5481300354003906, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.5481300354003906, "logits_per_char": -0.5160433451334635, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 375, "native_id": 2319, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.5537993907928467, "incorrect_loss_raw": 0.1078389585018158, "correct_loss_per_char": 0.8512664635976156, "incorrect_loss_per_char": 0.02695973962545395, "correct_loss_per_token": 2.5537993907928467, "incorrect_loss_per_token": 0.1078389585018158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1078389585018158, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.1078389585018158, "logits_per_char": -0.02695973962545395, "num_chars": 4}, {"sum_logits": -2.5537993907928467, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -2.5537993907928467, "logits_per_char": -0.8512664635976156, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 376, "native_id": 2191, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.359656810760498, "incorrect_loss_raw": 0.3645924925804138, "correct_loss_per_char": 0.453218936920166, "incorrect_loss_per_char": 0.09114812314510345, "correct_loss_per_token": 1.359656810760498, "incorrect_loss_per_token": 0.3645924925804138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3645924925804138, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.3645924925804138, "logits_per_char": -0.09114812314510345, "num_chars": 4}, {"sum_logits": -1.359656810760498, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.359656810760498, "logits_per_char": -0.453218936920166, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 377, "native_id": 2499, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5392961502075195, "incorrect_loss_raw": 0.2636629641056061, "correct_loss_per_char": 0.5130987167358398, "incorrect_loss_per_char": 0.06591574102640152, "correct_loss_per_token": 1.5392961502075195, "incorrect_loss_per_token": 0.2636629641056061, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2636629641056061, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.2636629641056061, "logits_per_char": -0.06591574102640152, "num_chars": 4}, {"sum_logits": -1.5392961502075195, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.5392961502075195, "logits_per_char": -0.5130987167358398, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 378, "native_id": 2454, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5682912468910217, "incorrect_loss_raw": 1.0567097663879395, "correct_loss_per_char": 0.18943041563034058, "incorrect_loss_per_char": 0.26417744159698486, "correct_loss_per_token": 0.5682912468910217, "incorrect_loss_per_token": 1.0567097663879395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0567097663879395, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.0567097663879395, "logits_per_char": -0.26417744159698486, "num_chars": 4}, {"sum_logits": -0.5682912468910217, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.5682912468910217, "logits_per_char": -0.18943041563034058, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 379, "native_id": 1828, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20046116411685944, "incorrect_loss_raw": 1.9488617181777954, "correct_loss_per_char": 0.05011529102921486, "incorrect_loss_per_char": 0.6496205727259318, "correct_loss_per_token": 0.20046116411685944, "incorrect_loss_per_token": 1.9488617181777954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20046116411685944, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.20046116411685944, "logits_per_char": -0.05011529102921486, "num_chars": 4}, {"sum_logits": -1.9488617181777954, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.9488617181777954, "logits_per_char": -0.6496205727259318, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 380, "native_id": 167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5227162837982178, "incorrect_loss_raw": 0.2876630425453186, "correct_loss_per_char": 0.5075720945994059, "incorrect_loss_per_char": 0.07191576063632965, "correct_loss_per_token": 1.5227162837982178, "incorrect_loss_per_token": 0.2876630425453186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2876630425453186, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.2876630425453186, "logits_per_char": -0.07191576063632965, "num_chars": 4}, {"sum_logits": -1.5227162837982178, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.5227162837982178, "logits_per_char": -0.5075720945994059, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 381, "native_id": 1522, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4587854146957397, "incorrect_loss_raw": 0.33770859241485596, "correct_loss_per_char": 0.4862618048985799, "incorrect_loss_per_char": 0.08442714810371399, "correct_loss_per_token": 1.4587854146957397, "incorrect_loss_per_token": 0.33770859241485596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33770859241485596, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": true, "logits_per_token": -0.33770859241485596, "logits_per_char": -0.08442714810371399, "num_chars": 4}, {"sum_logits": -1.4587854146957397, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -1.4587854146957397, "logits_per_char": -0.4862618048985799, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 382, "native_id": 281, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28054940700531006, "incorrect_loss_raw": 2.004709243774414, "correct_loss_per_char": 0.07013735175132751, "incorrect_loss_per_char": 0.6682364145914713, "correct_loss_per_token": 0.28054940700531006, "incorrect_loss_per_token": 2.004709243774414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28054940700531006, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.28054940700531006, "logits_per_char": -0.07013735175132751, "num_chars": 4}, {"sum_logits": -2.004709243774414, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -2.004709243774414, "logits_per_char": -0.6682364145914713, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 383, "native_id": 1511, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37814730405807495, "incorrect_loss_raw": 1.322203516960144, "correct_loss_per_char": 0.09453682601451874, "incorrect_loss_per_char": 0.44073450565338135, "correct_loss_per_token": 0.37814730405807495, "incorrect_loss_per_token": 1.322203516960144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37814730405807495, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.37814730405807495, "logits_per_char": -0.09453682601451874, "num_chars": 4}, {"sum_logits": -1.322203516960144, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.322203516960144, "logits_per_char": -0.44073450565338135, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 384, "native_id": 2768, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6437782049179077, "incorrect_loss_raw": 0.9098852872848511, "correct_loss_per_char": 0.16094455122947693, "incorrect_loss_per_char": 0.303295095761617, "correct_loss_per_token": 0.6437782049179077, "incorrect_loss_per_token": 0.9098852872848511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6437782049179077, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6437782049179077, "logits_per_char": -0.16094455122947693, "num_chars": 4}, {"sum_logits": -0.9098852872848511, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.9098852872848511, "logits_per_char": -0.303295095761617, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 385, "native_id": 1672, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1815754622220993, "incorrect_loss_raw": 2.063424825668335, "correct_loss_per_char": 0.045393865555524826, "incorrect_loss_per_char": 0.6878082752227783, "correct_loss_per_token": 0.1815754622220993, "incorrect_loss_per_token": 2.063424825668335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1815754622220993, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.1815754622220993, "logits_per_char": -0.045393865555524826, "num_chars": 4}, {"sum_logits": -2.063424825668335, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -2.063424825668335, "logits_per_char": -0.6878082752227783, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 386, "native_id": 182, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.533987283706665, "incorrect_loss_raw": 1.0379607677459717, "correct_loss_per_char": 0.13349682092666626, "incorrect_loss_per_char": 0.34598692258199054, "correct_loss_per_token": 0.533987283706665, "incorrect_loss_per_token": 1.0379607677459717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.533987283706665, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.533987283706665, "logits_per_char": -0.13349682092666626, "num_chars": 4}, {"sum_logits": -1.0379607677459717, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.0379607677459717, "logits_per_char": -0.34598692258199054, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 387, "native_id": 2474, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4766782522201538, "incorrect_loss_raw": 0.3218560218811035, "correct_loss_per_char": 0.4922260840733846, "incorrect_loss_per_char": 0.08046400547027588, "correct_loss_per_token": 1.4766782522201538, "incorrect_loss_per_token": 0.3218560218811035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3218560218811035, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.3218560218811035, "logits_per_char": -0.08046400547027588, "num_chars": 4}, {"sum_logits": -1.4766782522201538, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.4766782522201538, "logits_per_char": -0.4922260840733846, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 388, "native_id": 2506, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3685975074768066, "incorrect_loss_raw": 0.33355072140693665, "correct_loss_per_char": 0.45619916915893555, "incorrect_loss_per_char": 0.08338768035173416, "correct_loss_per_token": 1.3685975074768066, "incorrect_loss_per_token": 0.33355072140693665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33355072140693665, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.33355072140693665, "logits_per_char": -0.08338768035173416, "num_chars": 4}, {"sum_logits": -1.3685975074768066, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.3685975074768066, "logits_per_char": -0.45619916915893555, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 389, "native_id": 290, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8498654961585999, "incorrect_loss_raw": 0.6560642123222351, "correct_loss_per_char": 0.28328849871953327, "incorrect_loss_per_char": 0.16401605308055878, "correct_loss_per_token": 0.8498654961585999, "incorrect_loss_per_token": 0.6560642123222351, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6560642123222351, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.6560642123222351, "logits_per_char": -0.16401605308055878, "num_chars": 4}, {"sum_logits": -0.8498654961585999, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -0.8498654961585999, "logits_per_char": -0.28328849871953327, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 390, "native_id": 1286, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40436410903930664, "incorrect_loss_raw": 1.2501674890518188, "correct_loss_per_char": 0.10109102725982666, "incorrect_loss_per_char": 0.41672249635060626, "correct_loss_per_token": 0.40436410903930664, "incorrect_loss_per_token": 1.2501674890518188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40436410903930664, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.40436410903930664, "logits_per_char": -0.10109102725982666, "num_chars": 4}, {"sum_logits": -1.2501674890518188, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.2501674890518188, "logits_per_char": -0.41672249635060626, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 391, "native_id": 933, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4091982841491699, "incorrect_loss_raw": 1.1982762813568115, "correct_loss_per_char": 0.10229957103729248, "incorrect_loss_per_char": 0.3994254271189372, "correct_loss_per_token": 0.4091982841491699, "incorrect_loss_per_token": 1.1982762813568115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4091982841491699, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.4091982841491699, "logits_per_char": -0.10229957103729248, "num_chars": 4}, {"sum_logits": -1.1982762813568115, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.1982762813568115, "logits_per_char": -0.3994254271189372, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 392, "native_id": 3249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2435523122549057, "incorrect_loss_raw": 1.6712268590927124, "correct_loss_per_char": 0.060888078063726425, "incorrect_loss_per_char": 0.5570756196975708, "correct_loss_per_token": 0.2435523122549057, "incorrect_loss_per_token": 1.6712268590927124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2435523122549057, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.2435523122549057, "logits_per_char": -0.060888078063726425, "num_chars": 4}, {"sum_logits": -1.6712268590927124, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.6712268590927124, "logits_per_char": -0.5570756196975708, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 393, "native_id": 811, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4498817026615143, "incorrect_loss_raw": 1.144648551940918, "correct_loss_per_char": 0.11247042566537857, "incorrect_loss_per_char": 0.38154951731363934, "correct_loss_per_token": 0.4498817026615143, "incorrect_loss_per_token": 1.144648551940918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4498817026615143, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.4498817026615143, "logits_per_char": -0.11247042566537857, "num_chars": 4}, {"sum_logits": -1.144648551940918, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.144648551940918, "logits_per_char": -0.38154951731363934, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 394, "native_id": 3251, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38242292404174805, "incorrect_loss_raw": 1.3642840385437012, "correct_loss_per_char": 0.09560573101043701, "incorrect_loss_per_char": 0.4547613461812337, "correct_loss_per_token": 0.38242292404174805, "incorrect_loss_per_token": 1.3642840385437012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38242292404174805, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.38242292404174805, "logits_per_char": -0.09560573101043701, "num_chars": 4}, {"sum_logits": -1.3642840385437012, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.3642840385437012, "logits_per_char": -0.4547613461812337, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 395, "native_id": 2135, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1194014847278595, "incorrect_loss_raw": 2.3932952880859375, "correct_loss_per_char": 0.029850371181964874, "incorrect_loss_per_char": 0.7977650960286459, "correct_loss_per_token": 0.1194014847278595, "incorrect_loss_per_token": 2.3932952880859375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1194014847278595, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.1194014847278595, "logits_per_char": -0.029850371181964874, "num_chars": 4}, {"sum_logits": -2.3932952880859375, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -2.3932952880859375, "logits_per_char": -0.7977650960286459, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 396, "native_id": 2822, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8040784001350403, "incorrect_loss_raw": 0.8276240229606628, "correct_loss_per_char": 0.26802613337834674, "incorrect_loss_per_char": 0.2069060057401657, "correct_loss_per_token": 0.8040784001350403, "incorrect_loss_per_token": 0.8276240229606628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8276240229606628, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -0.8276240229606628, "logits_per_char": -0.2069060057401657, "num_chars": 4}, {"sum_logits": -0.8040784001350403, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.8040784001350403, "logits_per_char": -0.26802613337834674, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 397, "native_id": 1555, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30349957942962646, "incorrect_loss_raw": 1.4742323160171509, "correct_loss_per_char": 0.07587489485740662, "incorrect_loss_per_char": 0.491410772005717, "correct_loss_per_token": 0.30349957942962646, "incorrect_loss_per_token": 1.4742323160171509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30349957942962646, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": true, "logits_per_token": -0.30349957942962646, "logits_per_char": -0.07587489485740662, "num_chars": 4}, {"sum_logits": -1.4742323160171509, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": false, "logits_per_token": -1.4742323160171509, "logits_per_char": -0.491410772005717, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 398, "native_id": 2415, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5549696087837219, "incorrect_loss_raw": 0.92331862449646, "correct_loss_per_char": 0.13874240219593048, "incorrect_loss_per_char": 0.3077728748321533, "correct_loss_per_token": 0.5549696087837219, "incorrect_loss_per_token": 0.92331862449646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5549696087837219, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.5549696087837219, "logits_per_char": -0.13874240219593048, "num_chars": 4}, {"sum_logits": -0.92331862449646, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -0.92331862449646, "logits_per_char": -0.3077728748321533, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 399, "native_id": 2018, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16158591210842133, "incorrect_loss_raw": 2.1419179439544678, "correct_loss_per_char": 0.04039647802710533, "incorrect_loss_per_char": 0.7139726479848226, "correct_loss_per_token": 0.16158591210842133, "incorrect_loss_per_token": 2.1419179439544678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16158591210842133, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.16158591210842133, "logits_per_char": -0.04039647802710533, "num_chars": 4}, {"sum_logits": -2.1419179439544678, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -2.1419179439544678, "logits_per_char": -0.7139726479848226, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 400, "native_id": 214, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7765915989875793, "incorrect_loss_raw": 0.8040587306022644, "correct_loss_per_char": 0.2588638663291931, "incorrect_loss_per_char": 0.2010146826505661, "correct_loss_per_token": 0.7765915989875793, "incorrect_loss_per_token": 0.8040587306022644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8040587306022644, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -0.8040587306022644, "logits_per_char": -0.2010146826505661, "num_chars": 4}, {"sum_logits": -0.7765915989875793, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.7765915989875793, "logits_per_char": -0.2588638663291931, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 401, "native_id": 122, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.487635850906372, "incorrect_loss_raw": 0.2968719005584717, "correct_loss_per_char": 0.4958786169687907, "incorrect_loss_per_char": 0.07421797513961792, "correct_loss_per_token": 1.487635850906372, "incorrect_loss_per_token": 0.2968719005584717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2968719005584717, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.2968719005584717, "logits_per_char": -0.07421797513961792, "num_chars": 4}, {"sum_logits": -1.487635850906372, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.487635850906372, "logits_per_char": -0.4958786169687907, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 402, "native_id": 1835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26948514580726624, "incorrect_loss_raw": 1.590390682220459, "correct_loss_per_char": 0.06737128645181656, "incorrect_loss_per_char": 0.5301302274068197, "correct_loss_per_token": 0.26948514580726624, "incorrect_loss_per_token": 1.590390682220459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26948514580726624, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.26948514580726624, "logits_per_char": -0.06737128645181656, "num_chars": 4}, {"sum_logits": -1.590390682220459, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.590390682220459, "logits_per_char": -0.5301302274068197, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 403, "native_id": 328, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.41458776593208313, "incorrect_loss_raw": 1.1394288539886475, "correct_loss_per_char": 0.10364694148302078, "incorrect_loss_per_char": 0.3798096179962158, "correct_loss_per_token": 0.41458776593208313, "incorrect_loss_per_token": 1.1394288539886475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41458776593208313, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.41458776593208313, "logits_per_char": -0.10364694148302078, "num_chars": 4}, {"sum_logits": -1.1394288539886475, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.1394288539886475, "logits_per_char": -0.3798096179962158, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 404, "native_id": 1200, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19045694172382355, "incorrect_loss_raw": 1.9370036125183105, "correct_loss_per_char": 0.04761423543095589, "incorrect_loss_per_char": 0.6456678708394369, "correct_loss_per_token": 0.19045694172382355, "incorrect_loss_per_token": 1.9370036125183105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19045694172382355, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.19045694172382355, "logits_per_char": -0.04761423543095589, "num_chars": 4}, {"sum_logits": -1.9370036125183105, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.9370036125183105, "logits_per_char": -0.6456678708394369, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 405, "native_id": 3107, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2900419235229492, "incorrect_loss_raw": 0.42484134435653687, "correct_loss_per_char": 0.4300139745076497, "incorrect_loss_per_char": 0.10621033608913422, "correct_loss_per_token": 1.2900419235229492, "incorrect_loss_per_token": 0.42484134435653687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42484134435653687, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.42484134435653687, "logits_per_char": -0.10621033608913422, "num_chars": 4}, {"sum_logits": -1.2900419235229492, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -1.2900419235229492, "logits_per_char": -0.4300139745076497, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 406, "native_id": 1393, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9738906025886536, "incorrect_loss_raw": 0.5608844757080078, "correct_loss_per_char": 0.3246302008628845, "incorrect_loss_per_char": 0.14022111892700195, "correct_loss_per_token": 0.9738906025886536, "incorrect_loss_per_token": 0.5608844757080078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5608844757080078, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.5608844757080078, "logits_per_char": -0.14022111892700195, "num_chars": 4}, {"sum_logits": -0.9738906025886536, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -0.9738906025886536, "logits_per_char": -0.3246302008628845, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 407, "native_id": 605, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6460663080215454, "incorrect_loss_raw": 0.29144787788391113, "correct_loss_per_char": 0.5486887693405151, "incorrect_loss_per_char": 0.07286196947097778, "correct_loss_per_token": 1.6460663080215454, "incorrect_loss_per_token": 0.29144787788391113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29144787788391113, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.29144787788391113, "logits_per_char": -0.07286196947097778, "num_chars": 4}, {"sum_logits": -1.6460663080215454, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.6460663080215454, "logits_per_char": -0.5486887693405151, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 408, "native_id": 1991, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39821216464042664, "incorrect_loss_raw": 1.2890623807907104, "correct_loss_per_char": 0.09955304116010666, "incorrect_loss_per_char": 0.42968746026357013, "correct_loss_per_token": 0.39821216464042664, "incorrect_loss_per_token": 1.2890623807907104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39821216464042664, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.39821216464042664, "logits_per_char": -0.09955304116010666, "num_chars": 4}, {"sum_logits": -1.2890623807907104, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -1.2890623807907104, "logits_per_char": -0.42968746026357013, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 409, "native_id": 2772, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26310351490974426, "incorrect_loss_raw": 1.577651858329773, "correct_loss_per_char": 0.06577587872743607, "incorrect_loss_per_char": 0.525883952776591, "correct_loss_per_token": 0.26310351490974426, "incorrect_loss_per_token": 1.577651858329773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26310351490974426, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.26310351490974426, "logits_per_char": -0.06577587872743607, "num_chars": 4}, {"sum_logits": -1.577651858329773, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.577651858329773, "logits_per_char": -0.525883952776591, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 410, "native_id": 2665, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9499813318252563, "incorrect_loss_raw": 0.2098170965909958, "correct_loss_per_char": 0.6499937772750854, "incorrect_loss_per_char": 0.05245427414774895, "correct_loss_per_token": 1.9499813318252563, "incorrect_loss_per_token": 0.2098170965909958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2098170965909958, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.2098170965909958, "logits_per_char": -0.05245427414774895, "num_chars": 4}, {"sum_logits": -1.9499813318252563, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.9499813318252563, "logits_per_char": -0.6499937772750854, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 411, "native_id": 991, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33010202646255493, "incorrect_loss_raw": 1.6121559143066406, "correct_loss_per_char": 0.08252550661563873, "incorrect_loss_per_char": 0.5373853047688802, "correct_loss_per_token": 0.33010202646255493, "incorrect_loss_per_token": 1.6121559143066406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33010202646255493, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.33010202646255493, "logits_per_char": -0.08252550661563873, "num_chars": 4}, {"sum_logits": -1.6121559143066406, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.6121559143066406, "logits_per_char": -0.5373853047688802, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 412, "native_id": 3261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36510711908340454, "incorrect_loss_raw": 1.3379549980163574, "correct_loss_per_char": 0.09127677977085114, "incorrect_loss_per_char": 0.4459849993387858, "correct_loss_per_token": 0.36510711908340454, "incorrect_loss_per_token": 1.3379549980163574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36510711908340454, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.36510711908340454, "logits_per_char": -0.09127677977085114, "num_chars": 4}, {"sum_logits": -1.3379549980163574, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.3379549980163574, "logits_per_char": -0.4459849993387858, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 413, "native_id": 2868, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0117731094360352, "incorrect_loss_raw": 0.5236133933067322, "correct_loss_per_char": 0.33725770314534503, "incorrect_loss_per_char": 0.13090334832668304, "correct_loss_per_token": 1.0117731094360352, "incorrect_loss_per_token": 0.5236133933067322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5236133933067322, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.5236133933067322, "logits_per_char": -0.13090334832668304, "num_chars": 4}, {"sum_logits": -1.0117731094360352, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -1.0117731094360352, "logits_per_char": -0.33725770314534503, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 414, "native_id": 1460, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2475818395614624, "incorrect_loss_raw": 0.39914336800575256, "correct_loss_per_char": 0.4158606131871541, "incorrect_loss_per_char": 0.09978584200143814, "correct_loss_per_token": 1.2475818395614624, "incorrect_loss_per_token": 0.39914336800575256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39914336800575256, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.39914336800575256, "logits_per_char": -0.09978584200143814, "num_chars": 4}, {"sum_logits": -1.2475818395614624, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.2475818395614624, "logits_per_char": -0.4158606131871541, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 415, "native_id": 3005, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3866941034793854, "incorrect_loss_raw": 1.2658486366271973, "correct_loss_per_char": 0.09667352586984634, "incorrect_loss_per_char": 0.4219495455423991, "correct_loss_per_token": 0.3866941034793854, "incorrect_loss_per_token": 1.2658486366271973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3866941034793854, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.3866941034793854, "logits_per_char": -0.09667352586984634, "num_chars": 4}, {"sum_logits": -1.2658486366271973, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.2658486366271973, "logits_per_char": -0.4219495455423991, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 416, "native_id": 1521, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23541347682476044, "incorrect_loss_raw": 1.7489887475967407, "correct_loss_per_char": 0.05885336920619011, "incorrect_loss_per_char": 0.5829962491989136, "correct_loss_per_token": 0.23541347682476044, "incorrect_loss_per_token": 1.7489887475967407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23541347682476044, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.23541347682476044, "logits_per_char": -0.05885336920619011, "num_chars": 4}, {"sum_logits": -1.7489887475967407, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.7489887475967407, "logits_per_char": -0.5829962491989136, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 417, "native_id": 1699, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3706511855125427, "incorrect_loss_raw": 1.2781919240951538, "correct_loss_per_char": 0.09266279637813568, "incorrect_loss_per_char": 0.4260639746983846, "correct_loss_per_token": 0.3706511855125427, "incorrect_loss_per_token": 1.2781919240951538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3706511855125427, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.3706511855125427, "logits_per_char": -0.09266279637813568, "num_chars": 4}, {"sum_logits": -1.2781919240951538, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.2781919240951538, "logits_per_char": -0.4260639746983846, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 418, "native_id": 712, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2637877464294434, "incorrect_loss_raw": 0.3726981580257416, "correct_loss_per_char": 0.42126258214314777, "incorrect_loss_per_char": 0.0931745395064354, "correct_loss_per_token": 1.2637877464294434, "incorrect_loss_per_token": 0.3726981580257416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3726981580257416, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.3726981580257416, "logits_per_char": -0.0931745395064354, "num_chars": 4}, {"sum_logits": -1.2637877464294434, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -1.2637877464294434, "logits_per_char": -0.42126258214314777, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 419, "native_id": 305, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2036462426185608, "incorrect_loss_raw": 1.799248218536377, "correct_loss_per_char": 0.0509115606546402, "incorrect_loss_per_char": 0.5997494061787924, "correct_loss_per_token": 0.2036462426185608, "incorrect_loss_per_token": 1.799248218536377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2036462426185608, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.2036462426185608, "logits_per_char": -0.0509115606546402, "num_chars": 4}, {"sum_logits": -1.799248218536377, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.799248218536377, "logits_per_char": -0.5997494061787924, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 420, "native_id": 2619, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.245390847325325, "incorrect_loss_raw": 1.7922662496566772, "correct_loss_per_char": 0.06134771183133125, "incorrect_loss_per_char": 0.5974220832188925, "correct_loss_per_token": 0.245390847325325, "incorrect_loss_per_token": 1.7922662496566772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.245390847325325, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.245390847325325, "logits_per_char": -0.06134771183133125, "num_chars": 4}, {"sum_logits": -1.7922662496566772, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.7922662496566772, "logits_per_char": -0.5974220832188925, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 421, "native_id": 72, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3868489265441895, "incorrect_loss_raw": 0.33814528584480286, "correct_loss_per_char": 0.4622829755147298, "incorrect_loss_per_char": 0.08453632146120071, "correct_loss_per_token": 1.3868489265441895, "incorrect_loss_per_token": 0.33814528584480286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33814528584480286, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.33814528584480286, "logits_per_char": -0.08453632146120071, "num_chars": 4}, {"sum_logits": -1.3868489265441895, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.3868489265441895, "logits_per_char": -0.4622829755147298, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 422, "native_id": 869, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1658306121826172, "incorrect_loss_raw": 0.4363900423049927, "correct_loss_per_char": 0.3886102040608724, "incorrect_loss_per_char": 0.10909751057624817, "correct_loss_per_token": 1.1658306121826172, "incorrect_loss_per_token": 0.4363900423049927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4363900423049927, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.4363900423049927, "logits_per_char": -0.10909751057624817, "num_chars": 4}, {"sum_logits": -1.1658306121826172, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.1658306121826172, "logits_per_char": -0.3886102040608724, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 423, "native_id": 804, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3013510704040527, "incorrect_loss_raw": 0.3845444619655609, "correct_loss_per_char": 0.43378369013468426, "incorrect_loss_per_char": 0.09613611549139023, "correct_loss_per_token": 1.3013510704040527, "incorrect_loss_per_token": 0.3845444619655609, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3845444619655609, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.3845444619655609, "logits_per_char": -0.09613611549139023, "num_chars": 4}, {"sum_logits": -1.3013510704040527, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3013510704040527, "logits_per_char": -0.43378369013468426, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 424, "native_id": 2478, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3555927276611328, "incorrect_loss_raw": 0.41309884190559387, "correct_loss_per_char": 0.45186424255371094, "incorrect_loss_per_char": 0.10327471047639847, "correct_loss_per_token": 1.3555927276611328, "incorrect_loss_per_token": 0.41309884190559387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41309884190559387, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.41309884190559387, "logits_per_char": -0.10327471047639847, "num_chars": 4}, {"sum_logits": -1.3555927276611328, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.3555927276611328, "logits_per_char": -0.45186424255371094, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 425, "native_id": 2541, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8994629383087158, "incorrect_loss_raw": 0.624320387840271, "correct_loss_per_char": 0.2998209794362386, "incorrect_loss_per_char": 0.15608009696006775, "correct_loss_per_token": 0.8994629383087158, "incorrect_loss_per_token": 0.624320387840271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.624320387840271, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.624320387840271, "logits_per_char": -0.15608009696006775, "num_chars": 4}, {"sum_logits": -0.8994629383087158, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -0.8994629383087158, "logits_per_char": -0.2998209794362386, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 426, "native_id": 2242, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1269629001617432, "incorrect_loss_raw": 0.5543237924575806, "correct_loss_per_char": 0.37565430005391437, "incorrect_loss_per_char": 0.13858094811439514, "correct_loss_per_token": 1.1269629001617432, "incorrect_loss_per_token": 0.5543237924575806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5543237924575806, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.5543237924575806, "logits_per_char": -0.13858094811439514, "num_chars": 4}, {"sum_logits": -1.1269629001617432, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -1.1269629001617432, "logits_per_char": -0.37565430005391437, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 427, "native_id": 579, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2780124545097351, "incorrect_loss_raw": 1.516995906829834, "correct_loss_per_char": 0.06950311362743378, "incorrect_loss_per_char": 0.5056653022766113, "correct_loss_per_token": 0.2780124545097351, "incorrect_loss_per_token": 1.516995906829834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2780124545097351, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.2780124545097351, "logits_per_char": -0.06950311362743378, "num_chars": 4}, {"sum_logits": -1.516995906829834, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.516995906829834, "logits_per_char": -0.5056653022766113, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 428, "native_id": 2055, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7417894601821899, "incorrect_loss_raw": 0.7777359485626221, "correct_loss_per_char": 0.18544736504554749, "incorrect_loss_per_char": 0.2592453161875407, "correct_loss_per_token": 0.7417894601821899, "incorrect_loss_per_token": 0.7777359485626221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7417894601821899, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.7417894601821899, "logits_per_char": -0.18544736504554749, "num_chars": 4}, {"sum_logits": -0.7777359485626221, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -0.7777359485626221, "logits_per_char": -0.2592453161875407, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 429, "native_id": 542, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6734510064125061, "incorrect_loss_raw": 0.8313330411911011, "correct_loss_per_char": 0.16836275160312653, "incorrect_loss_per_char": 0.277111013730367, "correct_loss_per_token": 0.6734510064125061, "incorrect_loss_per_token": 0.8313330411911011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6734510064125061, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.6734510064125061, "logits_per_char": -0.16836275160312653, "num_chars": 4}, {"sum_logits": -0.8313330411911011, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -0.8313330411911011, "logits_per_char": -0.277111013730367, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 430, "native_id": 2761, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37861934304237366, "incorrect_loss_raw": 1.2901148796081543, "correct_loss_per_char": 0.09465483576059341, "incorrect_loss_per_char": 0.4300382932027181, "correct_loss_per_token": 0.37861934304237366, "incorrect_loss_per_token": 1.2901148796081543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37861934304237366, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.37861934304237366, "logits_per_char": -0.09465483576059341, "num_chars": 4}, {"sum_logits": -1.2901148796081543, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.2901148796081543, "logits_per_char": -0.4300382932027181, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 431, "native_id": 1043, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3035646080970764, "incorrect_loss_raw": 1.4749233722686768, "correct_loss_per_char": 0.0758911520242691, "incorrect_loss_per_char": 0.4916411240895589, "correct_loss_per_token": 0.3035646080970764, "incorrect_loss_per_token": 1.4749233722686768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3035646080970764, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.3035646080970764, "logits_per_char": -0.0758911520242691, "num_chars": 4}, {"sum_logits": -1.4749233722686768, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.4749233722686768, "logits_per_char": -0.4916411240895589, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 432, "native_id": 2667, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3333578109741211, "incorrect_loss_raw": 1.3807787895202637, "correct_loss_per_char": 0.08333945274353027, "incorrect_loss_per_char": 0.4602595965067546, "correct_loss_per_token": 0.3333578109741211, "incorrect_loss_per_token": 1.3807787895202637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3333578109741211, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.3333578109741211, "logits_per_char": -0.08333945274353027, "num_chars": 4}, {"sum_logits": -1.3807787895202637, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -1.3807787895202637, "logits_per_char": -0.4602595965067546, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 433, "native_id": 202, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3266977071762085, "incorrect_loss_raw": 1.437941074371338, "correct_loss_per_char": 0.08167442679405212, "incorrect_loss_per_char": 0.4793136914571126, "correct_loss_per_token": 0.3266977071762085, "incorrect_loss_per_token": 1.437941074371338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3266977071762085, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.3266977071762085, "logits_per_char": -0.08167442679405212, "num_chars": 4}, {"sum_logits": -1.437941074371338, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.437941074371338, "logits_per_char": -0.4793136914571126, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 434, "native_id": 2457, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39086398482322693, "incorrect_loss_raw": 1.744219422340393, "correct_loss_per_char": 0.09771599620580673, "incorrect_loss_per_char": 0.5814064741134644, "correct_loss_per_token": 0.39086398482322693, "incorrect_loss_per_token": 1.744219422340393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39086398482322693, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.39086398482322693, "logits_per_char": -0.09771599620580673, "num_chars": 4}, {"sum_logits": -1.744219422340393, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.744219422340393, "logits_per_char": -0.5814064741134644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 435, "native_id": 3163, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34820282459259033, "incorrect_loss_raw": 1.5563689470291138, "correct_loss_per_char": 0.08705070614814758, "incorrect_loss_per_char": 0.5187896490097046, "correct_loss_per_token": 0.34820282459259033, "incorrect_loss_per_token": 1.5563689470291138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34820282459259033, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.34820282459259033, "logits_per_char": -0.08705070614814758, "num_chars": 4}, {"sum_logits": -1.5563689470291138, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.5563689470291138, "logits_per_char": -0.5187896490097046, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 436, "native_id": 1480, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1702570617198944, "incorrect_loss_raw": 2.0057790279388428, "correct_loss_per_char": 0.0425642654299736, "incorrect_loss_per_char": 0.6685930093129476, "correct_loss_per_token": 0.1702570617198944, "incorrect_loss_per_token": 2.0057790279388428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1702570617198944, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.1702570617198944, "logits_per_char": -0.0425642654299736, "num_chars": 4}, {"sum_logits": -2.0057790279388428, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -2.0057790279388428, "logits_per_char": -0.6685930093129476, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 437, "native_id": 2448, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20031999051570892, "incorrect_loss_raw": 1.9091238975524902, "correct_loss_per_char": 0.05007999762892723, "incorrect_loss_per_char": 0.6363746325174967, "correct_loss_per_token": 0.20031999051570892, "incorrect_loss_per_token": 1.9091238975524902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20031999051570892, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.20031999051570892, "logits_per_char": -0.05007999762892723, "num_chars": 4}, {"sum_logits": -1.9091238975524902, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.9091238975524902, "logits_per_char": -0.6363746325174967, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 438, "native_id": 2888, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5445191860198975, "incorrect_loss_raw": 1.0079195499420166, "correct_loss_per_char": 0.13612979650497437, "incorrect_loss_per_char": 0.33597318331400555, "correct_loss_per_token": 0.5445191860198975, "incorrect_loss_per_token": 1.0079195499420166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5445191860198975, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.5445191860198975, "logits_per_char": -0.13612979650497437, "num_chars": 4}, {"sum_logits": -1.0079195499420166, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.0079195499420166, "logits_per_char": -0.33597318331400555, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 439, "native_id": 1181, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2025647759437561, "incorrect_loss_raw": 1.9162101745605469, "correct_loss_per_char": 0.050641193985939026, "incorrect_loss_per_char": 0.6387367248535156, "correct_loss_per_token": 0.2025647759437561, "incorrect_loss_per_token": 1.9162101745605469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2025647759437561, "num_tokens": 1, "num_tokens_all": 1161, "is_greedy": true, "logits_per_token": -0.2025647759437561, "logits_per_char": -0.050641193985939026, "num_chars": 4}, {"sum_logits": -1.9162101745605469, "num_tokens": 1, "num_tokens_all": 1161, "is_greedy": false, "logits_per_token": -1.9162101745605469, "logits_per_char": -0.6387367248535156, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 440, "native_id": 3104, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21245931088924408, "incorrect_loss_raw": 1.818962812423706, "correct_loss_per_char": 0.05311482772231102, "incorrect_loss_per_char": 0.6063209374745687, "correct_loss_per_token": 0.21245931088924408, "incorrect_loss_per_token": 1.818962812423706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21245931088924408, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.21245931088924408, "logits_per_char": -0.05311482772231102, "num_chars": 4}, {"sum_logits": -1.818962812423706, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.818962812423706, "logits_per_char": -0.6063209374745687, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 441, "native_id": 1671, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9134193062782288, "incorrect_loss_raw": 0.5635390281677246, "correct_loss_per_char": 0.2283548265695572, "incorrect_loss_per_char": 0.18784634272257486, "correct_loss_per_token": 0.9134193062782288, "incorrect_loss_per_token": 0.5635390281677246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9134193062782288, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.9134193062782288, "logits_per_char": -0.2283548265695572, "num_chars": 4}, {"sum_logits": -0.5635390281677246, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.5635390281677246, "logits_per_char": -0.18784634272257486, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 442, "native_id": 1506, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.039478063583374, "incorrect_loss_raw": 0.5074955224990845, "correct_loss_per_char": 0.3464926878611247, "incorrect_loss_per_char": 0.12687388062477112, "correct_loss_per_token": 1.039478063583374, "incorrect_loss_per_token": 0.5074955224990845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5074955224990845, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.5074955224990845, "logits_per_char": -0.12687388062477112, "num_chars": 4}, {"sum_logits": -1.039478063583374, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.039478063583374, "logits_per_char": -0.3464926878611247, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 443, "native_id": 959, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9438539147377014, "incorrect_loss_raw": 0.6813597083091736, "correct_loss_per_char": 0.23596347868442535, "incorrect_loss_per_char": 0.22711990276972452, "correct_loss_per_token": 0.9438539147377014, "incorrect_loss_per_token": 0.6813597083091736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9438539147377014, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.9438539147377014, "logits_per_char": -0.23596347868442535, "num_chars": 4}, {"sum_logits": -0.6813597083091736, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.6813597083091736, "logits_per_char": -0.22711990276972452, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 444, "native_id": 1168, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46592170000076294, "incorrect_loss_raw": 1.1468496322631836, "correct_loss_per_char": 0.11648042500019073, "incorrect_loss_per_char": 0.38228321075439453, "correct_loss_per_token": 0.46592170000076294, "incorrect_loss_per_token": 1.1468496322631836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46592170000076294, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.46592170000076294, "logits_per_char": -0.11648042500019073, "num_chars": 4}, {"sum_logits": -1.1468496322631836, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.1468496322631836, "logits_per_char": -0.38228321075439453, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 445, "native_id": 35, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6483876705169678, "incorrect_loss_raw": 1.0363199710845947, "correct_loss_per_char": 0.16209691762924194, "incorrect_loss_per_char": 0.34543999036153156, "correct_loss_per_token": 0.6483876705169678, "incorrect_loss_per_token": 1.0363199710845947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6483876705169678, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.6483876705169678, "logits_per_char": -0.16209691762924194, "num_chars": 4}, {"sum_logits": -1.0363199710845947, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.0363199710845947, "logits_per_char": -0.34543999036153156, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 446, "native_id": 1281, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8917888402938843, "incorrect_loss_raw": 0.6241536140441895, "correct_loss_per_char": 0.2972629467646281, "incorrect_loss_per_char": 0.15603840351104736, "correct_loss_per_token": 0.8917888402938843, "incorrect_loss_per_token": 0.6241536140441895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6241536140441895, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.6241536140441895, "logits_per_char": -0.15603840351104736, "num_chars": 4}, {"sum_logits": -0.8917888402938843, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -0.8917888402938843, "logits_per_char": -0.2972629467646281, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 447, "native_id": 2975, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5954817533493042, "incorrect_loss_raw": 0.9314684867858887, "correct_loss_per_char": 0.14887043833732605, "incorrect_loss_per_char": 0.3104894955952962, "correct_loss_per_token": 0.5954817533493042, "incorrect_loss_per_token": 0.9314684867858887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5954817533493042, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.5954817533493042, "logits_per_char": -0.14887043833732605, "num_chars": 4}, {"sum_logits": -0.9314684867858887, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.9314684867858887, "logits_per_char": -0.3104894955952962, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 448, "native_id": 1089, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5201820135116577, "incorrect_loss_raw": 0.319819837808609, "correct_loss_per_char": 0.5067273378372192, "incorrect_loss_per_char": 0.07995495945215225, "correct_loss_per_token": 1.5201820135116577, "incorrect_loss_per_token": 0.319819837808609, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.319819837808609, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.319819837808609, "logits_per_char": -0.07995495945215225, "num_chars": 4}, {"sum_logits": -1.5201820135116577, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.5201820135116577, "logits_per_char": -0.5067273378372192, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 449, "native_id": 493, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1862844228744507, "incorrect_loss_raw": 0.46377551555633545, "correct_loss_per_char": 0.3954281409581502, "incorrect_loss_per_char": 0.11594387888908386, "correct_loss_per_token": 1.1862844228744507, "incorrect_loss_per_token": 0.46377551555633545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46377551555633545, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.46377551555633545, "logits_per_char": -0.11594387888908386, "num_chars": 4}, {"sum_logits": -1.1862844228744507, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.1862844228744507, "logits_per_char": -0.3954281409581502, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 450, "native_id": 2229, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3000975549221039, "incorrect_loss_raw": 1.5256611108779907, "correct_loss_per_char": 0.07502438873052597, "incorrect_loss_per_char": 0.508553703625997, "correct_loss_per_token": 0.3000975549221039, "incorrect_loss_per_token": 1.5256611108779907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3000975549221039, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.3000975549221039, "logits_per_char": -0.07502438873052597, "num_chars": 4}, {"sum_logits": -1.5256611108779907, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.5256611108779907, "logits_per_char": -0.508553703625997, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 451, "native_id": 2835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2360595017671585, "incorrect_loss_raw": 1.8611277341842651, "correct_loss_per_char": 0.05901487544178963, "incorrect_loss_per_char": 0.620375911394755, "correct_loss_per_token": 0.2360595017671585, "incorrect_loss_per_token": 1.8611277341842651, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2360595017671585, "num_tokens": 1, "num_tokens_all": 852, "is_greedy": true, "logits_per_token": -0.2360595017671585, "logits_per_char": -0.05901487544178963, "num_chars": 4}, {"sum_logits": -1.8611277341842651, "num_tokens": 1, "num_tokens_all": 852, "is_greedy": false, "logits_per_token": -1.8611277341842651, "logits_per_char": -0.620375911394755, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 452, "native_id": 145, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6471737027168274, "incorrect_loss_raw": 1.2642168998718262, "correct_loss_per_char": 0.16179342567920685, "incorrect_loss_per_char": 0.4214056332906087, "correct_loss_per_token": 0.6471737027168274, "incorrect_loss_per_token": 1.2642168998718262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6471737027168274, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.6471737027168274, "logits_per_char": -0.16179342567920685, "num_chars": 4}, {"sum_logits": -1.2642168998718262, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.2642168998718262, "logits_per_char": -0.4214056332906087, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 453, "native_id": 895, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1979982703924179, "incorrect_loss_raw": 1.8410727977752686, "correct_loss_per_char": 0.04949956759810448, "incorrect_loss_per_char": 0.6136909325917562, "correct_loss_per_token": 0.1979982703924179, "incorrect_loss_per_token": 1.8410727977752686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1979982703924179, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.1979982703924179, "logits_per_char": -0.04949956759810448, "num_chars": 4}, {"sum_logits": -1.8410727977752686, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.8410727977752686, "logits_per_char": -0.6136909325917562, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 454, "native_id": 2966, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5455251336097717, "incorrect_loss_raw": 1.0061877965927124, "correct_loss_per_char": 0.13638128340244293, "incorrect_loss_per_char": 0.3353959321975708, "correct_loss_per_token": 0.5455251336097717, "incorrect_loss_per_token": 1.0061877965927124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5455251336097717, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.5455251336097717, "logits_per_char": -0.13638128340244293, "num_chars": 4}, {"sum_logits": -1.0061877965927124, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.0061877965927124, "logits_per_char": -0.3353959321975708, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 455, "native_id": 2339, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2190831899642944, "incorrect_loss_raw": 0.4158475697040558, "correct_loss_per_char": 0.40636106332143146, "incorrect_loss_per_char": 0.10396189242601395, "correct_loss_per_token": 1.2190831899642944, "incorrect_loss_per_token": 0.4158475697040558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4158475697040558, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.4158475697040558, "logits_per_char": -0.10396189242601395, "num_chars": 4}, {"sum_logits": -1.2190831899642944, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -1.2190831899642944, "logits_per_char": -0.40636106332143146, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 456, "native_id": 2431, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.73036128282547, "incorrect_loss_raw": 0.745991051197052, "correct_loss_per_char": 0.1825903207063675, "incorrect_loss_per_char": 0.24866368373235068, "correct_loss_per_token": 0.73036128282547, "incorrect_loss_per_token": 0.745991051197052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.73036128282547, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.73036128282547, "logits_per_char": -0.1825903207063675, "num_chars": 4}, {"sum_logits": -0.745991051197052, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -0.745991051197052, "logits_per_char": -0.24866368373235068, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 457, "native_id": 3156, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8498501181602478, "incorrect_loss_raw": 0.7487658262252808, "correct_loss_per_char": 0.2832833727200826, "incorrect_loss_per_char": 0.1871914565563202, "correct_loss_per_token": 0.8498501181602478, "incorrect_loss_per_token": 0.7487658262252808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7487658262252808, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.7487658262252808, "logits_per_char": -0.1871914565563202, "num_chars": 4}, {"sum_logits": -0.8498501181602478, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -0.8498501181602478, "logits_per_char": -0.2832833727200826, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 458, "native_id": 2200, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4246654212474823, "incorrect_loss_raw": 1.1967591047286987, "correct_loss_per_char": 0.10616635531187057, "incorrect_loss_per_char": 0.3989197015762329, "correct_loss_per_token": 0.4246654212474823, "incorrect_loss_per_token": 1.1967591047286987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4246654212474823, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.4246654212474823, "logits_per_char": -0.10616635531187057, "num_chars": 4}, {"sum_logits": -1.1967591047286987, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -1.1967591047286987, "logits_per_char": -0.3989197015762329, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 459, "native_id": 753, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1941676139831543, "incorrect_loss_raw": 1.8688478469848633, "correct_loss_per_char": 0.048541903495788574, "incorrect_loss_per_char": 0.6229492823282877, "correct_loss_per_token": 0.1941676139831543, "incorrect_loss_per_token": 1.8688478469848633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1941676139831543, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.1941676139831543, "logits_per_char": -0.048541903495788574, "num_chars": 4}, {"sum_logits": -1.8688478469848633, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.8688478469848633, "logits_per_char": -0.6229492823282877, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 460, "native_id": 1319, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27700483798980713, "incorrect_loss_raw": 1.6367498636245728, "correct_loss_per_char": 0.06925120949745178, "incorrect_loss_per_char": 0.5455832878748575, "correct_loss_per_token": 0.27700483798980713, "incorrect_loss_per_token": 1.6367498636245728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27700483798980713, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.27700483798980713, "logits_per_char": -0.06925120949745178, "num_chars": 4}, {"sum_logits": -1.6367498636245728, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.6367498636245728, "logits_per_char": -0.5455832878748575, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 461, "native_id": 1199, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5429812073707581, "incorrect_loss_raw": 0.9863749742507935, "correct_loss_per_char": 0.13574530184268951, "incorrect_loss_per_char": 0.32879165808359784, "correct_loss_per_token": 0.5429812073707581, "incorrect_loss_per_token": 0.9863749742507935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5429812073707581, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -0.5429812073707581, "logits_per_char": -0.13574530184268951, "num_chars": 4}, {"sum_logits": -0.9863749742507935, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -0.9863749742507935, "logits_per_char": -0.32879165808359784, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 462, "native_id": 1486, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39799752831459045, "incorrect_loss_raw": 1.237390160560608, "correct_loss_per_char": 0.09949938207864761, "incorrect_loss_per_char": 0.41246338685353595, "correct_loss_per_token": 0.39799752831459045, "incorrect_loss_per_token": 1.237390160560608, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39799752831459045, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.39799752831459045, "logits_per_char": -0.09949938207864761, "num_chars": 4}, {"sum_logits": -1.237390160560608, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.237390160560608, "logits_per_char": -0.41246338685353595, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 463, "native_id": 1117, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7836394906044006, "incorrect_loss_raw": 0.6759051084518433, "correct_loss_per_char": 0.19590987265110016, "incorrect_loss_per_char": 0.2253017028172811, "correct_loss_per_token": 0.7836394906044006, "incorrect_loss_per_token": 0.6759051084518433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7836394906044006, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -0.7836394906044006, "logits_per_char": -0.19590987265110016, "num_chars": 4}, {"sum_logits": -0.6759051084518433, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.6759051084518433, "logits_per_char": -0.2253017028172811, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 464, "native_id": 2632, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24797087907791138, "incorrect_loss_raw": 1.6525449752807617, "correct_loss_per_char": 0.061992719769477844, "incorrect_loss_per_char": 0.5508483250935873, "correct_loss_per_token": 0.24797087907791138, "incorrect_loss_per_token": 1.6525449752807617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24797087907791138, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.24797087907791138, "logits_per_char": -0.061992719769477844, "num_chars": 4}, {"sum_logits": -1.6525449752807617, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6525449752807617, "logits_per_char": -0.5508483250935873, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 465, "native_id": 722, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47759366035461426, "incorrect_loss_raw": 1.1278271675109863, "correct_loss_per_char": 0.11939841508865356, "incorrect_loss_per_char": 0.3759423891703288, "correct_loss_per_token": 0.47759366035461426, "incorrect_loss_per_token": 1.1278271675109863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47759366035461426, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.47759366035461426, "logits_per_char": -0.11939841508865356, "num_chars": 4}, {"sum_logits": -1.1278271675109863, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.1278271675109863, "logits_per_char": -0.3759423891703288, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 466, "native_id": 1871, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4398481249809265, "incorrect_loss_raw": 1.1741231679916382, "correct_loss_per_char": 0.10996203124523163, "incorrect_loss_per_char": 0.3913743893305461, "correct_loss_per_token": 0.4398481249809265, "incorrect_loss_per_token": 1.1741231679916382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4398481249809265, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.4398481249809265, "logits_per_char": -0.10996203124523163, "num_chars": 4}, {"sum_logits": -1.1741231679916382, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -1.1741231679916382, "logits_per_char": -0.3913743893305461, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 467, "native_id": 693, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3342725336551666, "incorrect_loss_raw": 1.427367925643921, "correct_loss_per_char": 0.08356813341379166, "incorrect_loss_per_char": 0.47578930854797363, "correct_loss_per_token": 0.3342725336551666, "incorrect_loss_per_token": 1.427367925643921, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3342725336551666, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.3342725336551666, "logits_per_char": -0.08356813341379166, "num_chars": 4}, {"sum_logits": -1.427367925643921, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.427367925643921, "logits_per_char": -0.47578930854797363, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 468, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16545003652572632, "incorrect_loss_raw": 2.1276051998138428, "correct_loss_per_char": 0.04136250913143158, "incorrect_loss_per_char": 0.7092017332712809, "correct_loss_per_token": 0.16545003652572632, "incorrect_loss_per_token": 2.1276051998138428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16545003652572632, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.16545003652572632, "logits_per_char": -0.04136250913143158, "num_chars": 4}, {"sum_logits": -2.1276051998138428, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -2.1276051998138428, "logits_per_char": -0.7092017332712809, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 469, "native_id": 2226, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7435556650161743, "incorrect_loss_raw": 0.896162748336792, "correct_loss_per_char": 0.18588891625404358, "incorrect_loss_per_char": 0.298720916112264, "correct_loss_per_token": 0.7435556650161743, "incorrect_loss_per_token": 0.896162748336792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7435556650161743, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.7435556650161743, "logits_per_char": -0.18588891625404358, "num_chars": 4}, {"sum_logits": -0.896162748336792, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -0.896162748336792, "logits_per_char": -0.298720916112264, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 470, "native_id": 1673, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4606752395629883, "incorrect_loss_raw": 0.2999502420425415, "correct_loss_per_char": 0.4868917465209961, "incorrect_loss_per_char": 0.07498756051063538, "correct_loss_per_token": 1.4606752395629883, "incorrect_loss_per_token": 0.2999502420425415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2999502420425415, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.2999502420425415, "logits_per_char": -0.07498756051063538, "num_chars": 4}, {"sum_logits": -1.4606752395629883, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.4606752395629883, "logits_per_char": -0.4868917465209961, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 471, "native_id": 979, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9473667144775391, "incorrect_loss_raw": 0.6016587018966675, "correct_loss_per_char": 0.3157889048258464, "incorrect_loss_per_char": 0.15041467547416687, "correct_loss_per_token": 0.9473667144775391, "incorrect_loss_per_token": 0.6016587018966675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6016587018966675, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.6016587018966675, "logits_per_char": -0.15041467547416687, "num_chars": 4}, {"sum_logits": -0.9473667144775391, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -0.9473667144775391, "logits_per_char": -0.3157889048258464, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 472, "native_id": 785, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30416104197502136, "incorrect_loss_raw": 1.4694485664367676, "correct_loss_per_char": 0.07604026049375534, "incorrect_loss_per_char": 0.48981618881225586, "correct_loss_per_token": 0.30416104197502136, "incorrect_loss_per_token": 1.4694485664367676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30416104197502136, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.30416104197502136, "logits_per_char": -0.07604026049375534, "num_chars": 4}, {"sum_logits": -1.4694485664367676, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4694485664367676, "logits_per_char": -0.48981618881225586, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 473, "native_id": 1817, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26857322454452515, "incorrect_loss_raw": 1.6370470523834229, "correct_loss_per_char": 0.06714330613613129, "incorrect_loss_per_char": 0.5456823507944742, "correct_loss_per_token": 0.26857322454452515, "incorrect_loss_per_token": 1.6370470523834229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26857322454452515, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.26857322454452515, "logits_per_char": -0.06714330613613129, "num_chars": 4}, {"sum_logits": -1.6370470523834229, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.6370470523834229, "logits_per_char": -0.5456823507944742, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 474, "native_id": 1119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4105202853679657, "incorrect_loss_raw": 1.217435359954834, "correct_loss_per_char": 0.10263007134199142, "incorrect_loss_per_char": 0.40581178665161133, "correct_loss_per_token": 0.4105202853679657, "incorrect_loss_per_token": 1.217435359954834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4105202853679657, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.4105202853679657, "logits_per_char": -0.10263007134199142, "num_chars": 4}, {"sum_logits": -1.217435359954834, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.217435359954834, "logits_per_char": -0.40581178665161133, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 475, "native_id": 713, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15237943828105927, "incorrect_loss_raw": 2.0999550819396973, "correct_loss_per_char": 0.038094859570264816, "incorrect_loss_per_char": 0.6999850273132324, "correct_loss_per_token": 0.15237943828105927, "incorrect_loss_per_token": 2.0999550819396973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15237943828105927, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.15237943828105927, "logits_per_char": -0.038094859570264816, "num_chars": 4}, {"sum_logits": -2.0999550819396973, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -2.0999550819396973, "logits_per_char": -0.6999850273132324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 476, "native_id": 1449, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5571765303611755, "incorrect_loss_raw": 1.1173782348632812, "correct_loss_per_char": 0.18572551012039185, "incorrect_loss_per_char": 0.2793445587158203, "correct_loss_per_token": 0.5571765303611755, "incorrect_loss_per_token": 1.1173782348632812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1173782348632812, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.1173782348632812, "logits_per_char": -0.2793445587158203, "num_chars": 4}, {"sum_logits": -0.5571765303611755, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.5571765303611755, "logits_per_char": -0.18572551012039185, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 477, "native_id": 2401, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.661049485206604, "incorrect_loss_raw": 0.9636819362640381, "correct_loss_per_char": 0.165262371301651, "incorrect_loss_per_char": 0.3212273120880127, "correct_loss_per_token": 0.661049485206604, "incorrect_loss_per_token": 0.9636819362640381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.661049485206604, "num_tokens": 1, "num_tokens_all": 1152, "is_greedy": true, "logits_per_token": -0.661049485206604, "logits_per_char": -0.165262371301651, "num_chars": 4}, {"sum_logits": -0.9636819362640381, "num_tokens": 1, "num_tokens_all": 1152, "is_greedy": false, "logits_per_token": -0.9636819362640381, "logits_per_char": -0.3212273120880127, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 478, "native_id": 1676, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31480464339256287, "incorrect_loss_raw": 1.4558414220809937, "correct_loss_per_char": 0.07870116084814072, "incorrect_loss_per_char": 0.48528047402699787, "correct_loss_per_token": 0.31480464339256287, "incorrect_loss_per_token": 1.4558414220809937, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31480464339256287, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.31480464339256287, "logits_per_char": -0.07870116084814072, "num_chars": 4}, {"sum_logits": -1.4558414220809937, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.4558414220809937, "logits_per_char": -0.48528047402699787, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 479, "native_id": 3213, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7036817073822021, "incorrect_loss_raw": 0.891383707523346, "correct_loss_per_char": 0.23456056912740073, "incorrect_loss_per_char": 0.2228459268808365, "correct_loss_per_token": 0.7036817073822021, "incorrect_loss_per_token": 0.891383707523346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.891383707523346, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -0.891383707523346, "logits_per_char": -0.2228459268808365, "num_chars": 4}, {"sum_logits": -0.7036817073822021, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.7036817073822021, "logits_per_char": -0.23456056912740073, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 480, "native_id": 2861, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9034247994422913, "incorrect_loss_raw": 0.6179684996604919, "correct_loss_per_char": 0.3011415998140971, "incorrect_loss_per_char": 0.15449212491512299, "correct_loss_per_token": 0.9034247994422913, "incorrect_loss_per_token": 0.6179684996604919, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6179684996604919, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.6179684996604919, "logits_per_char": -0.15449212491512299, "num_chars": 4}, {"sum_logits": -0.9034247994422913, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.9034247994422913, "logits_per_char": -0.3011415998140971, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 481, "native_id": 2452, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1784896850585938, "incorrect_loss_raw": 0.46416616439819336, "correct_loss_per_char": 0.39282989501953125, "incorrect_loss_per_char": 0.11604154109954834, "correct_loss_per_token": 1.1784896850585938, "incorrect_loss_per_token": 0.46416616439819336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46416616439819336, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.46416616439819336, "logits_per_char": -0.11604154109954834, "num_chars": 4}, {"sum_logits": -1.1784896850585938, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -1.1784896850585938, "logits_per_char": -0.39282989501953125, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 482, "native_id": 2405, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6702619791030884, "incorrect_loss_raw": 0.9924018383026123, "correct_loss_per_char": 0.1675654947757721, "incorrect_loss_per_char": 0.3308006127675374, "correct_loss_per_token": 0.6702619791030884, "incorrect_loss_per_token": 0.9924018383026123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6702619791030884, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6702619791030884, "logits_per_char": -0.1675654947757721, "num_chars": 4}, {"sum_logits": -0.9924018383026123, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.9924018383026123, "logits_per_char": -0.3308006127675374, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 483, "native_id": 3220, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6356754302978516, "incorrect_loss_raw": 0.8833916187286377, "correct_loss_per_char": 0.21189181009928384, "incorrect_loss_per_char": 0.22084790468215942, "correct_loss_per_token": 0.6356754302978516, "incorrect_loss_per_token": 0.8833916187286377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8833916187286377, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -0.8833916187286377, "logits_per_char": -0.22084790468215942, "num_chars": 4}, {"sum_logits": -0.6356754302978516, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.6356754302978516, "logits_per_char": -0.21189181009928384, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 484, "native_id": 3059, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.644392728805542, "incorrect_loss_raw": 0.8668406009674072, "correct_loss_per_char": 0.1610981822013855, "incorrect_loss_per_char": 0.28894686698913574, "correct_loss_per_token": 0.644392728805542, "incorrect_loss_per_token": 0.8668406009674072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.644392728805542, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": true, "logits_per_token": -0.644392728805542, "logits_per_char": -0.1610981822013855, "num_chars": 4}, {"sum_logits": -0.8668406009674072, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -0.8668406009674072, "logits_per_char": -0.28894686698913574, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 485, "native_id": 2106, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7200350165367126, "incorrect_loss_raw": 0.7542906999588013, "correct_loss_per_char": 0.18000875413417816, "incorrect_loss_per_char": 0.2514302333196004, "correct_loss_per_token": 0.7200350165367126, "incorrect_loss_per_token": 0.7542906999588013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7200350165367126, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.7200350165367126, "logits_per_char": -0.18000875413417816, "num_chars": 4}, {"sum_logits": -0.7542906999588013, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -0.7542906999588013, "logits_per_char": -0.2514302333196004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 486, "native_id": 1823, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2983342409133911, "incorrect_loss_raw": 1.5197259187698364, "correct_loss_per_char": 0.07458356022834778, "incorrect_loss_per_char": 0.5065753062566122, "correct_loss_per_token": 0.2983342409133911, "incorrect_loss_per_token": 1.5197259187698364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2983342409133911, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -0.2983342409133911, "logits_per_char": -0.07458356022834778, "num_chars": 4}, {"sum_logits": -1.5197259187698364, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -1.5197259187698364, "logits_per_char": -0.5065753062566122, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 487, "native_id": 1527, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23790612816810608, "incorrect_loss_raw": 1.654752254486084, "correct_loss_per_char": 0.05947653204202652, "incorrect_loss_per_char": 0.5515840848286947, "correct_loss_per_token": 0.23790612816810608, "incorrect_loss_per_token": 1.654752254486084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23790612816810608, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.23790612816810608, "logits_per_char": -0.05947653204202652, "num_chars": 4}, {"sum_logits": -1.654752254486084, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.654752254486084, "logits_per_char": -0.5515840848286947, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 488, "native_id": 2532, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19462312757968903, "incorrect_loss_raw": 2.0783631801605225, "correct_loss_per_char": 0.048655781894922256, "incorrect_loss_per_char": 0.6927877267201742, "correct_loss_per_token": 0.19462312757968903, "incorrect_loss_per_token": 2.0783631801605225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19462312757968903, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.19462312757968903, "logits_per_char": -0.048655781894922256, "num_chars": 4}, {"sum_logits": -2.0783631801605225, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -2.0783631801605225, "logits_per_char": -0.6927877267201742, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 489, "native_id": 420, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8969277143478394, "incorrect_loss_raw": 0.611039400100708, "correct_loss_per_char": 0.2989759047826131, "incorrect_loss_per_char": 0.152759850025177, "correct_loss_per_token": 0.8969277143478394, "incorrect_loss_per_token": 0.611039400100708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.611039400100708, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.611039400100708, "logits_per_char": -0.152759850025177, "num_chars": 4}, {"sum_logits": -0.8969277143478394, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -0.8969277143478394, "logits_per_char": -0.2989759047826131, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 490, "native_id": 2764, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.10026470571756363, "incorrect_loss_raw": 2.7461955547332764, "correct_loss_per_char": 0.025066176429390907, "incorrect_loss_per_char": 0.9153985182444254, "correct_loss_per_token": 0.10026470571756363, "incorrect_loss_per_token": 2.7461955547332764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.10026470571756363, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": true, "logits_per_token": -0.10026470571756363, "logits_per_char": -0.025066176429390907, "num_chars": 4}, {"sum_logits": -2.7461955547332764, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -2.7461955547332764, "logits_per_char": -0.9153985182444254, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 491, "native_id": 2167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9124369621276855, "incorrect_loss_raw": 0.6071613430976868, "correct_loss_per_char": 0.30414565404256183, "incorrect_loss_per_char": 0.1517903357744217, "correct_loss_per_token": 0.9124369621276855, "incorrect_loss_per_token": 0.6071613430976868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6071613430976868, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.6071613430976868, "logits_per_char": -0.1517903357744217, "num_chars": 4}, {"sum_logits": -0.9124369621276855, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -0.9124369621276855, "logits_per_char": -0.30414565404256183, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 492, "native_id": 1644, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2146551609039307, "incorrect_loss_raw": 0.15423473715782166, "correct_loss_per_char": 0.7382183869679769, "incorrect_loss_per_char": 0.038558684289455414, "correct_loss_per_token": 2.2146551609039307, "incorrect_loss_per_token": 0.15423473715782166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15423473715782166, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.15423473715782166, "logits_per_char": -0.038558684289455414, "num_chars": 4}, {"sum_logits": -2.2146551609039307, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -2.2146551609039307, "logits_per_char": -0.7382183869679769, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 493, "native_id": 2375, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43417492508888245, "incorrect_loss_raw": 1.1949472427368164, "correct_loss_per_char": 0.10854373127222061, "incorrect_loss_per_char": 0.3983157475789388, "correct_loss_per_token": 0.43417492508888245, "incorrect_loss_per_token": 1.1949472427368164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43417492508888245, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.43417492508888245, "logits_per_char": -0.10854373127222061, "num_chars": 4}, {"sum_logits": -1.1949472427368164, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.1949472427368164, "logits_per_char": -0.3983157475789388, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 494, "native_id": 520, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9332547187805176, "incorrect_loss_raw": 0.6218778491020203, "correct_loss_per_char": 0.31108490626017254, "incorrect_loss_per_char": 0.15546946227550507, "correct_loss_per_token": 0.9332547187805176, "incorrect_loss_per_token": 0.6218778491020203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6218778491020203, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.6218778491020203, "logits_per_char": -0.15546946227550507, "num_chars": 4}, {"sum_logits": -0.9332547187805176, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.9332547187805176, "logits_per_char": -0.31108490626017254, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 495, "native_id": 434, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9862087965011597, "incorrect_loss_raw": 0.5351883172988892, "correct_loss_per_char": 0.32873626550038654, "incorrect_loss_per_char": 0.1337970793247223, "correct_loss_per_token": 0.9862087965011597, "incorrect_loss_per_token": 0.5351883172988892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5351883172988892, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.5351883172988892, "logits_per_char": -0.1337970793247223, "num_chars": 4}, {"sum_logits": -0.9862087965011597, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -0.9862087965011597, "logits_per_char": -0.32873626550038654, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 496, "native_id": 1922, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3054463565349579, "incorrect_loss_raw": 1.4892319440841675, "correct_loss_per_char": 0.07636158913373947, "incorrect_loss_per_char": 0.49641064802805585, "correct_loss_per_token": 0.3054463565349579, "incorrect_loss_per_token": 1.4892319440841675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3054463565349579, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -0.3054463565349579, "logits_per_char": -0.07636158913373947, "num_chars": 4}, {"sum_logits": -1.4892319440841675, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -1.4892319440841675, "logits_per_char": -0.49641064802805585, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 497, "native_id": 1999, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2567355632781982, "incorrect_loss_raw": 0.4138752222061157, "correct_loss_per_char": 0.4189118544260661, "incorrect_loss_per_char": 0.10346880555152893, "correct_loss_per_token": 1.2567355632781982, "incorrect_loss_per_token": 0.4138752222061157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4138752222061157, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -0.4138752222061157, "logits_per_char": -0.10346880555152893, "num_chars": 4}, {"sum_logits": -1.2567355632781982, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -1.2567355632781982, "logits_per_char": -0.4189118544260661, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 498, "native_id": 396, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5980771780014038, "incorrect_loss_raw": 0.29664209485054016, "correct_loss_per_char": 0.5326923926671346, "incorrect_loss_per_char": 0.07416052371263504, "correct_loss_per_token": 1.5980771780014038, "incorrect_loss_per_token": 0.29664209485054016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29664209485054016, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.29664209485054016, "logits_per_char": -0.07416052371263504, "num_chars": 4}, {"sum_logits": -1.5980771780014038, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.5980771780014038, "logits_per_char": -0.5326923926671346, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 499, "native_id": 2237, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43181055784225464, "incorrect_loss_raw": 1.2013304233551025, "correct_loss_per_char": 0.10795263946056366, "incorrect_loss_per_char": 0.40044347445170086, "correct_loss_per_token": 0.43181055784225464, "incorrect_loss_per_token": 1.2013304233551025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43181055784225464, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.43181055784225464, "logits_per_char": -0.10795263946056366, "num_chars": 4}, {"sum_logits": -1.2013304233551025, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.2013304233551025, "logits_per_char": -0.40044347445170086, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 500, "native_id": 2284, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34311509132385254, "incorrect_loss_raw": 1.3198001384735107, "correct_loss_per_char": 0.08577877283096313, "incorrect_loss_per_char": 0.43993337949117023, "correct_loss_per_token": 0.34311509132385254, "incorrect_loss_per_token": 1.3198001384735107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34311509132385254, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.34311509132385254, "logits_per_char": -0.08577877283096313, "num_chars": 4}, {"sum_logits": -1.3198001384735107, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.3198001384735107, "logits_per_char": -0.43993337949117023, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 501, "native_id": 540, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3965210020542145, "incorrect_loss_raw": 1.2553105354309082, "correct_loss_per_char": 0.09913025051355362, "incorrect_loss_per_char": 0.41843684514363605, "correct_loss_per_token": 0.3965210020542145, "incorrect_loss_per_token": 1.2553105354309082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3965210020542145, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.3965210020542145, "logits_per_char": -0.09913025051355362, "num_chars": 4}, {"sum_logits": -1.2553105354309082, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.2553105354309082, "logits_per_char": -0.41843684514363605, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 502, "native_id": 1048, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5337855815887451, "incorrect_loss_raw": 0.27638834714889526, "correct_loss_per_char": 0.5112618605295817, "incorrect_loss_per_char": 0.06909708678722382, "correct_loss_per_token": 1.5337855815887451, "incorrect_loss_per_token": 0.27638834714889526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27638834714889526, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.27638834714889526, "logits_per_char": -0.06909708678722382, "num_chars": 4}, {"sum_logits": -1.5337855815887451, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.5337855815887451, "logits_per_char": -0.5112618605295817, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 503, "native_id": 978, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2789655029773712, "incorrect_loss_raw": 1.5074403285980225, "correct_loss_per_char": 0.0697413757443428, "incorrect_loss_per_char": 0.5024801095326742, "correct_loss_per_token": 0.2789655029773712, "incorrect_loss_per_token": 1.5074403285980225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2789655029773712, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2789655029773712, "logits_per_char": -0.0697413757443428, "num_chars": 4}, {"sum_logits": -1.5074403285980225, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.5074403285980225, "logits_per_char": -0.5024801095326742, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 504, "native_id": 2880, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24850918352603912, "incorrect_loss_raw": 1.723832607269287, "correct_loss_per_char": 0.06212729588150978, "incorrect_loss_per_char": 0.5746108690897623, "correct_loss_per_token": 0.24850918352603912, "incorrect_loss_per_token": 1.723832607269287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24850918352603912, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -0.24850918352603912, "logits_per_char": -0.06212729588150978, "num_chars": 4}, {"sum_logits": -1.723832607269287, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -1.723832607269287, "logits_per_char": -0.5746108690897623, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 505, "native_id": 1373, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.322544664144516, "incorrect_loss_raw": 1.4606969356536865, "correct_loss_per_char": 0.080636166036129, "incorrect_loss_per_char": 0.4868989785512288, "correct_loss_per_token": 0.322544664144516, "incorrect_loss_per_token": 1.4606969356536865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.322544664144516, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.322544664144516, "logits_per_char": -0.080636166036129, "num_chars": 4}, {"sum_logits": -1.4606969356536865, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.4606969356536865, "logits_per_char": -0.4868989785512288, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 506, "native_id": 1606, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2598661482334137, "incorrect_loss_raw": 1.6677770614624023, "correct_loss_per_char": 0.06496653705835342, "incorrect_loss_per_char": 0.5559256871541342, "correct_loss_per_token": 0.2598661482334137, "incorrect_loss_per_token": 1.6677770614624023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2598661482334137, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.2598661482334137, "logits_per_char": -0.06496653705835342, "num_chars": 4}, {"sum_logits": -1.6677770614624023, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.6677770614624023, "logits_per_char": -0.5559256871541342, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 507, "native_id": 1202, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.285975843667984, "incorrect_loss_raw": 1.5032601356506348, "correct_loss_per_char": 0.071493960916996, "incorrect_loss_per_char": 0.5010867118835449, "correct_loss_per_token": 0.285975843667984, "incorrect_loss_per_token": 1.5032601356506348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.285975843667984, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.285975843667984, "logits_per_char": -0.071493960916996, "num_chars": 4}, {"sum_logits": -1.5032601356506348, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.5032601356506348, "logits_per_char": -0.5010867118835449, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 508, "native_id": 2138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8509711027145386, "incorrect_loss_raw": 0.20985981822013855, "correct_loss_per_char": 0.6169903675715128, "incorrect_loss_per_char": 0.05246495455503464, "correct_loss_per_token": 1.8509711027145386, "incorrect_loss_per_token": 0.20985981822013855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20985981822013855, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.20985981822013855, "logits_per_char": -0.05246495455503464, "num_chars": 4}, {"sum_logits": -1.8509711027145386, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.8509711027145386, "logits_per_char": -0.6169903675715128, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 509, "native_id": 1453, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2573940753936768, "incorrect_loss_raw": 0.4714582860469818, "correct_loss_per_char": 0.4191313584645589, "incorrect_loss_per_char": 0.11786457151174545, "correct_loss_per_token": 1.2573940753936768, "incorrect_loss_per_token": 0.4714582860469818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4714582860469818, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4714582860469818, "logits_per_char": -0.11786457151174545, "num_chars": 4}, {"sum_logits": -1.2573940753936768, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.2573940753936768, "logits_per_char": -0.4191313584645589, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 510, "native_id": 1660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31948551535606384, "incorrect_loss_raw": 1.438992977142334, "correct_loss_per_char": 0.07987137883901596, "incorrect_loss_per_char": 0.47966432571411133, "correct_loss_per_token": 0.31948551535606384, "incorrect_loss_per_token": 1.438992977142334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31948551535606384, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.31948551535606384, "logits_per_char": -0.07987137883901596, "num_chars": 4}, {"sum_logits": -1.438992977142334, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.438992977142334, "logits_per_char": -0.47966432571411133, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 511, "native_id": 2244, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.08701468259096146, "incorrect_loss_raw": 2.6131503582000732, "correct_loss_per_char": 0.021753670647740364, "incorrect_loss_per_char": 0.8710501194000244, "correct_loss_per_token": 0.08701468259096146, "incorrect_loss_per_token": 2.6131503582000732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.08701468259096146, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.08701468259096146, "logits_per_char": -0.021753670647740364, "num_chars": 4}, {"sum_logits": -2.6131503582000732, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -2.6131503582000732, "logits_per_char": -0.8710501194000244, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 512, "native_id": 771, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3230561316013336, "incorrect_loss_raw": 1.4635196924209595, "correct_loss_per_char": 0.0807640329003334, "incorrect_loss_per_char": 0.48783989747365314, "correct_loss_per_token": 0.3230561316013336, "incorrect_loss_per_token": 1.4635196924209595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3230561316013336, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.3230561316013336, "logits_per_char": -0.0807640329003334, "num_chars": 4}, {"sum_logits": -1.4635196924209595, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.4635196924209595, "logits_per_char": -0.48783989747365314, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 513, "native_id": 2480, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4275940954685211, "incorrect_loss_raw": 1.2800047397613525, "correct_loss_per_char": 0.10689852386713028, "incorrect_loss_per_char": 0.4266682465871175, "correct_loss_per_token": 0.4275940954685211, "incorrect_loss_per_token": 1.2800047397613525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4275940954685211, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4275940954685211, "logits_per_char": -0.10689852386713028, "num_chars": 4}, {"sum_logits": -1.2800047397613525, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.2800047397613525, "logits_per_char": -0.4266682465871175, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 514, "native_id": 1937, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9704117774963379, "incorrect_loss_raw": 0.5454351902008057, "correct_loss_per_char": 0.3234705924987793, "incorrect_loss_per_char": 0.13635879755020142, "correct_loss_per_token": 0.9704117774963379, "incorrect_loss_per_token": 0.5454351902008057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5454351902008057, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.5454351902008057, "logits_per_char": -0.13635879755020142, "num_chars": 4}, {"sum_logits": -0.9704117774963379, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -0.9704117774963379, "logits_per_char": -0.3234705924987793, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 515, "native_id": 1907, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.256969928741455, "incorrect_loss_raw": 0.3913109600543976, "correct_loss_per_char": 0.4189899762471517, "incorrect_loss_per_char": 0.0978277400135994, "correct_loss_per_token": 1.256969928741455, "incorrect_loss_per_token": 0.3913109600543976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3913109600543976, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": true, "logits_per_token": -0.3913109600543976, "logits_per_char": -0.0978277400135994, "num_chars": 4}, {"sum_logits": -1.256969928741455, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -1.256969928741455, "logits_per_char": -0.4189899762471517, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 516, "native_id": 1308, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18693003058433533, "incorrect_loss_raw": 1.8738369941711426, "correct_loss_per_char": 0.04673250764608383, "incorrect_loss_per_char": 0.6246123313903809, "correct_loss_per_token": 0.18693003058433533, "incorrect_loss_per_token": 1.8738369941711426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18693003058433533, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.18693003058433533, "logits_per_char": -0.04673250764608383, "num_chars": 4}, {"sum_logits": -1.8738369941711426, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -1.8738369941711426, "logits_per_char": -0.6246123313903809, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 517, "native_id": 1808, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34774506092071533, "incorrect_loss_raw": 1.3402812480926514, "correct_loss_per_char": 0.08693626523017883, "incorrect_loss_per_char": 0.4467604160308838, "correct_loss_per_token": 0.34774506092071533, "incorrect_loss_per_token": 1.3402812480926514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34774506092071533, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": true, "logits_per_token": -0.34774506092071533, "logits_per_char": -0.08693626523017883, "num_chars": 4}, {"sum_logits": -1.3402812480926514, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": false, "logits_per_token": -1.3402812480926514, "logits_per_char": -0.4467604160308838, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 518, "native_id": 2149, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8576602935791016, "incorrect_loss_raw": 0.6185723543167114, "correct_loss_per_char": 0.2858867645263672, "incorrect_loss_per_char": 0.15464308857917786, "correct_loss_per_token": 0.8576602935791016, "incorrect_loss_per_token": 0.6185723543167114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6185723543167114, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.6185723543167114, "logits_per_char": -0.15464308857917786, "num_chars": 4}, {"sum_logits": -0.8576602935791016, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -0.8576602935791016, "logits_per_char": -0.2858867645263672, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 519, "native_id": 441, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2306634485721588, "incorrect_loss_raw": 1.7873797416687012, "correct_loss_per_char": 0.0576658621430397, "incorrect_loss_per_char": 0.5957932472229004, "correct_loss_per_token": 0.2306634485721588, "incorrect_loss_per_token": 1.7873797416687012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2306634485721588, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.2306634485721588, "logits_per_char": -0.0576658621430397, "num_chars": 4}, {"sum_logits": -1.7873797416687012, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.7873797416687012, "logits_per_char": -0.5957932472229004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 520, "native_id": 2208, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3305976390838623, "incorrect_loss_raw": 0.43802908062934875, "correct_loss_per_char": 0.4435325463612874, "incorrect_loss_per_char": 0.10950727015733719, "correct_loss_per_token": 1.3305976390838623, "incorrect_loss_per_token": 0.43802908062934875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43802908062934875, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.43802908062934875, "logits_per_char": -0.10950727015733719, "num_chars": 4}, {"sum_logits": -1.3305976390838623, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.3305976390838623, "logits_per_char": -0.4435325463612874, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 521, "native_id": 1897, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4522157609462738, "incorrect_loss_raw": 1.171838641166687, "correct_loss_per_char": 0.11305394023656845, "incorrect_loss_per_char": 0.3906128803888957, "correct_loss_per_token": 0.4522157609462738, "incorrect_loss_per_token": 1.171838641166687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4522157609462738, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": true, "logits_per_token": -0.4522157609462738, "logits_per_char": -0.11305394023656845, "num_chars": 4}, {"sum_logits": -1.171838641166687, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": false, "logits_per_token": -1.171838641166687, "logits_per_char": -0.3906128803888957, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 522, "native_id": 351, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7928592562675476, "incorrect_loss_raw": 0.7590600252151489, "correct_loss_per_char": 0.2642864187558492, "incorrect_loss_per_char": 0.18976500630378723, "correct_loss_per_token": 0.7928592562675476, "incorrect_loss_per_token": 0.7590600252151489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7590600252151489, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.7590600252151489, "logits_per_char": -0.18976500630378723, "num_chars": 4}, {"sum_logits": -0.7928592562675476, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -0.7928592562675476, "logits_per_char": -0.2642864187558492, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 523, "native_id": 311, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9661797285079956, "incorrect_loss_raw": 0.5680235624313354, "correct_loss_per_char": 0.3220599095026652, "incorrect_loss_per_char": 0.14200589060783386, "correct_loss_per_token": 0.9661797285079956, "incorrect_loss_per_token": 0.5680235624313354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5680235624313354, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.5680235624313354, "logits_per_char": -0.14200589060783386, "num_chars": 4}, {"sum_logits": -0.9661797285079956, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -0.9661797285079956, "logits_per_char": -0.3220599095026652, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 524, "native_id": 808, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5491568446159363, "incorrect_loss_raw": 0.9991780519485474, "correct_loss_per_char": 0.13728921115398407, "incorrect_loss_per_char": 0.3330593506495158, "correct_loss_per_token": 0.5491568446159363, "incorrect_loss_per_token": 0.9991780519485474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5491568446159363, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.5491568446159363, "logits_per_char": -0.13728921115398407, "num_chars": 4}, {"sum_logits": -0.9991780519485474, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -0.9991780519485474, "logits_per_char": -0.3330593506495158, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 525, "native_id": 720, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.716048002243042, "incorrect_loss_raw": 0.27331188321113586, "correct_loss_per_char": 0.5720160007476807, "incorrect_loss_per_char": 0.06832797080278397, "correct_loss_per_token": 1.716048002243042, "incorrect_loss_per_token": 0.27331188321113586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27331188321113586, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.27331188321113586, "logits_per_char": -0.06832797080278397, "num_chars": 4}, {"sum_logits": -1.716048002243042, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.716048002243042, "logits_per_char": -0.5720160007476807, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 526, "native_id": 2489, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40601950883865356, "incorrect_loss_raw": 1.1897995471954346, "correct_loss_per_char": 0.10150487720966339, "incorrect_loss_per_char": 0.39659984906514484, "correct_loss_per_token": 0.40601950883865356, "incorrect_loss_per_token": 1.1897995471954346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40601950883865356, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.40601950883865356, "logits_per_char": -0.10150487720966339, "num_chars": 4}, {"sum_logits": -1.1897995471954346, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.1897995471954346, "logits_per_char": -0.39659984906514484, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 527, "native_id": 1375, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2568489909172058, "incorrect_loss_raw": 1.7308738231658936, "correct_loss_per_char": 0.06421224772930145, "incorrect_loss_per_char": 0.5769579410552979, "correct_loss_per_token": 0.2568489909172058, "incorrect_loss_per_token": 1.7308738231658936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2568489909172058, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.2568489909172058, "logits_per_char": -0.06421224772930145, "num_chars": 4}, {"sum_logits": -1.7308738231658936, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.7308738231658936, "logits_per_char": -0.5769579410552979, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 528, "native_id": 707, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.294440507888794, "incorrect_loss_raw": 0.40833568572998047, "correct_loss_per_char": 0.43148016929626465, "incorrect_loss_per_char": 0.10208392143249512, "correct_loss_per_token": 1.294440507888794, "incorrect_loss_per_token": 0.40833568572998047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40833568572998047, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.40833568572998047, "logits_per_char": -0.10208392143249512, "num_chars": 4}, {"sum_logits": -1.294440507888794, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.294440507888794, "logits_per_char": -0.43148016929626465, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 529, "native_id": 1547, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30689531564712524, "incorrect_loss_raw": 1.476635217666626, "correct_loss_per_char": 0.07672382891178131, "incorrect_loss_per_char": 0.4922117392222087, "correct_loss_per_token": 0.30689531564712524, "incorrect_loss_per_token": 1.476635217666626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30689531564712524, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.30689531564712524, "logits_per_char": -0.07672382891178131, "num_chars": 4}, {"sum_logits": -1.476635217666626, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.476635217666626, "logits_per_char": -0.4922117392222087, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 530, "native_id": 3176, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3452827036380768, "incorrect_loss_raw": 1.3515857458114624, "correct_loss_per_char": 0.0863206759095192, "incorrect_loss_per_char": 0.4505285819371541, "correct_loss_per_token": 0.3452827036380768, "incorrect_loss_per_token": 1.3515857458114624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3452827036380768, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.3452827036380768, "logits_per_char": -0.0863206759095192, "num_chars": 4}, {"sum_logits": -1.3515857458114624, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3515857458114624, "logits_per_char": -0.4505285819371541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 531, "native_id": 817, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27667585015296936, "incorrect_loss_raw": 1.5176228284835815, "correct_loss_per_char": 0.06916896253824234, "incorrect_loss_per_char": 0.5058742761611938, "correct_loss_per_token": 0.27667585015296936, "incorrect_loss_per_token": 1.5176228284835815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27667585015296936, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.27667585015296936, "logits_per_char": -0.06916896253824234, "num_chars": 4}, {"sum_logits": -1.5176228284835815, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -1.5176228284835815, "logits_per_char": -0.5058742761611938, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 532, "native_id": 1083, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2764773964881897, "incorrect_loss_raw": 1.5665802955627441, "correct_loss_per_char": 0.06911934912204742, "incorrect_loss_per_char": 0.522193431854248, "correct_loss_per_token": 0.2764773964881897, "incorrect_loss_per_token": 1.5665802955627441, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2764773964881897, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.2764773964881897, "logits_per_char": -0.06911934912204742, "num_chars": 4}, {"sum_logits": -1.5665802955627441, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.5665802955627441, "logits_per_char": -0.522193431854248, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 533, "native_id": 120, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15921732783317566, "incorrect_loss_raw": 2.222437620162964, "correct_loss_per_char": 0.039804331958293915, "incorrect_loss_per_char": 0.7408125400543213, "correct_loss_per_token": 0.15921732783317566, "incorrect_loss_per_token": 2.222437620162964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15921732783317566, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.15921732783317566, "logits_per_char": -0.039804331958293915, "num_chars": 4}, {"sum_logits": -2.222437620162964, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -2.222437620162964, "logits_per_char": -0.7408125400543213, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 534, "native_id": 647, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7243640422821045, "incorrect_loss_raw": 0.2328033447265625, "correct_loss_per_char": 0.5747880140940348, "incorrect_loss_per_char": 0.058200836181640625, "correct_loss_per_token": 1.7243640422821045, "incorrect_loss_per_token": 0.2328033447265625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2328033447265625, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.2328033447265625, "logits_per_char": -0.058200836181640625, "num_chars": 4}, {"sum_logits": -1.7243640422821045, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.7243640422821045, "logits_per_char": -0.5747880140940348, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 535, "native_id": 2710, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6247105598449707, "incorrect_loss_raw": 0.2814900577068329, "correct_loss_per_char": 0.5415701866149902, "incorrect_loss_per_char": 0.07037251442670822, "correct_loss_per_token": 1.6247105598449707, "incorrect_loss_per_token": 0.2814900577068329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2814900577068329, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.2814900577068329, "logits_per_char": -0.07037251442670822, "num_chars": 4}, {"sum_logits": -1.6247105598449707, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.6247105598449707, "logits_per_char": -0.5415701866149902, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 536, "native_id": 1294, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14605529606342316, "incorrect_loss_raw": 2.1342110633850098, "correct_loss_per_char": 0.03651382401585579, "incorrect_loss_per_char": 0.7114036877950033, "correct_loss_per_token": 0.14605529606342316, "incorrect_loss_per_token": 2.1342110633850098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14605529606342316, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.14605529606342316, "logits_per_char": -0.03651382401585579, "num_chars": 4}, {"sum_logits": -2.1342110633850098, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -2.1342110633850098, "logits_per_char": -0.7114036877950033, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 537, "native_id": 2964, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5284667015075684, "incorrect_loss_raw": 1.0832455158233643, "correct_loss_per_char": 0.1321166753768921, "incorrect_loss_per_char": 0.3610818386077881, "correct_loss_per_token": 0.5284667015075684, "incorrect_loss_per_token": 1.0832455158233643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5284667015075684, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.5284667015075684, "logits_per_char": -0.1321166753768921, "num_chars": 4}, {"sum_logits": -1.0832455158233643, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -1.0832455158233643, "logits_per_char": -0.3610818386077881, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 538, "native_id": 408, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1492326259613037, "incorrect_loss_raw": 2.2918403148651123, "correct_loss_per_char": 0.03730815649032593, "incorrect_loss_per_char": 0.7639467716217041, "correct_loss_per_token": 0.1492326259613037, "incorrect_loss_per_token": 2.2918403148651123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1492326259613037, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.1492326259613037, "logits_per_char": -0.03730815649032593, "num_chars": 4}, {"sum_logits": -2.2918403148651123, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -2.2918403148651123, "logits_per_char": -0.7639467716217041, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 539, "native_id": 3161, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29642823338508606, "incorrect_loss_raw": 1.5179224014282227, "correct_loss_per_char": 0.07410705834627151, "incorrect_loss_per_char": 0.5059741338094076, "correct_loss_per_token": 0.29642823338508606, "incorrect_loss_per_token": 1.5179224014282227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29642823338508606, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.29642823338508606, "logits_per_char": -0.07410705834627151, "num_chars": 4}, {"sum_logits": -1.5179224014282227, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.5179224014282227, "logits_per_char": -0.5059741338094076, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 540, "native_id": 228, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7190568447113037, "incorrect_loss_raw": 0.770988941192627, "correct_loss_per_char": 0.17976421117782593, "incorrect_loss_per_char": 0.25699631373087567, "correct_loss_per_token": 0.7190568447113037, "incorrect_loss_per_token": 0.770988941192627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7190568447113037, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.7190568447113037, "logits_per_char": -0.17976421117782593, "num_chars": 4}, {"sum_logits": -0.770988941192627, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.770988941192627, "logits_per_char": -0.25699631373087567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 541, "native_id": 3043, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2070826143026352, "incorrect_loss_raw": 1.9618247747421265, "correct_loss_per_char": 0.0517706535756588, "incorrect_loss_per_char": 0.6539415915807089, "correct_loss_per_token": 0.2070826143026352, "incorrect_loss_per_token": 1.9618247747421265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2070826143026352, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.2070826143026352, "logits_per_char": -0.0517706535756588, "num_chars": 4}, {"sum_logits": -1.9618247747421265, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.9618247747421265, "logits_per_char": -0.6539415915807089, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 542, "native_id": 1736, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49770796298980713, "incorrect_loss_raw": 1.1182674169540405, "correct_loss_per_char": 0.12442699074745178, "incorrect_loss_per_char": 0.37275580565134686, "correct_loss_per_token": 0.49770796298980713, "incorrect_loss_per_token": 1.1182674169540405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.49770796298980713, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.49770796298980713, "logits_per_char": -0.12442699074745178, "num_chars": 4}, {"sum_logits": -1.1182674169540405, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.1182674169540405, "logits_per_char": -0.37275580565134686, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 543, "native_id": 1323, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43075132369995117, "incorrect_loss_raw": 1.192469596862793, "correct_loss_per_char": 0.10768783092498779, "incorrect_loss_per_char": 0.39748986562093097, "correct_loss_per_token": 0.43075132369995117, "incorrect_loss_per_token": 1.192469596862793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43075132369995117, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.43075132369995117, "logits_per_char": -0.10768783092498779, "num_chars": 4}, {"sum_logits": -1.192469596862793, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -1.192469596862793, "logits_per_char": -0.39748986562093097, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 544, "native_id": 1392, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16899128258228302, "incorrect_loss_raw": 2.306690216064453, "correct_loss_per_char": 0.042247820645570755, "incorrect_loss_per_char": 0.768896738688151, "correct_loss_per_token": 0.16899128258228302, "incorrect_loss_per_token": 2.306690216064453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16899128258228302, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.16899128258228302, "logits_per_char": -0.042247820645570755, "num_chars": 4}, {"sum_logits": -2.306690216064453, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -2.306690216064453, "logits_per_char": -0.768896738688151, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 545, "native_id": 3020, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27939164638519287, "incorrect_loss_raw": 1.6533360481262207, "correct_loss_per_char": 0.06984791159629822, "incorrect_loss_per_char": 0.5511120160420736, "correct_loss_per_token": 0.27939164638519287, "incorrect_loss_per_token": 1.6533360481262207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27939164638519287, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.27939164638519287, "logits_per_char": -0.06984791159629822, "num_chars": 4}, {"sum_logits": -1.6533360481262207, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.6533360481262207, "logits_per_char": -0.5511120160420736, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 546, "native_id": 2426, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.12485100328922272, "incorrect_loss_raw": 2.4378466606140137, "correct_loss_per_char": 0.03121275082230568, "incorrect_loss_per_char": 0.8126155535380045, "correct_loss_per_token": 0.12485100328922272, "incorrect_loss_per_token": 2.4378466606140137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12485100328922272, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.12485100328922272, "logits_per_char": -0.03121275082230568, "num_chars": 4}, {"sum_logits": -2.4378466606140137, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -2.4378466606140137, "logits_per_char": -0.8126155535380045, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 547, "native_id": 1776, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19248934090137482, "incorrect_loss_raw": 1.8723080158233643, "correct_loss_per_char": 0.048122335225343704, "incorrect_loss_per_char": 0.6241026719411215, "correct_loss_per_token": 0.19248934090137482, "incorrect_loss_per_token": 1.8723080158233643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19248934090137482, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.19248934090137482, "logits_per_char": -0.048122335225343704, "num_chars": 4}, {"sum_logits": -1.8723080158233643, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.8723080158233643, "logits_per_char": -0.6241026719411215, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 548, "native_id": 2362, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1858646869659424, "incorrect_loss_raw": 0.49605053663253784, "correct_loss_per_char": 0.39528822898864746, "incorrect_loss_per_char": 0.12401263415813446, "correct_loss_per_token": 1.1858646869659424, "incorrect_loss_per_token": 0.49605053663253784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.49605053663253784, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.49605053663253784, "logits_per_char": -0.12401263415813446, "num_chars": 4}, {"sum_logits": -1.1858646869659424, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.1858646869659424, "logits_per_char": -0.39528822898864746, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 549, "native_id": 681, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18195267021656036, "incorrect_loss_raw": 1.9725574254989624, "correct_loss_per_char": 0.04548816755414009, "incorrect_loss_per_char": 0.6575191418329874, "correct_loss_per_token": 0.18195267021656036, "incorrect_loss_per_token": 1.9725574254989624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18195267021656036, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.18195267021656036, "logits_per_char": -0.04548816755414009, "num_chars": 4}, {"sum_logits": -1.9725574254989624, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.9725574254989624, "logits_per_char": -0.6575191418329874, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 550, "native_id": 1539, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3681706488132477, "incorrect_loss_raw": 1.352637529373169, "correct_loss_per_char": 0.09204266220331192, "incorrect_loss_per_char": 0.45087917645772296, "correct_loss_per_token": 0.3681706488132477, "incorrect_loss_per_token": 1.352637529373169, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3681706488132477, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.3681706488132477, "logits_per_char": -0.09204266220331192, "num_chars": 4}, {"sum_logits": -1.352637529373169, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.352637529373169, "logits_per_char": -0.45087917645772296, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 551, "native_id": 2945, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4360202550888062, "incorrect_loss_raw": 0.34213054180145264, "correct_loss_per_char": 0.47867341836293537, "incorrect_loss_per_char": 0.08553263545036316, "correct_loss_per_token": 1.4360202550888062, "incorrect_loss_per_token": 0.34213054180145264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34213054180145264, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.34213054180145264, "logits_per_char": -0.08553263545036316, "num_chars": 4}, {"sum_logits": -1.4360202550888062, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -1.4360202550888062, "logits_per_char": -0.47867341836293537, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 552, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2465754747390747, "incorrect_loss_raw": 0.41008269786834717, "correct_loss_per_char": 0.4155251582463582, "incorrect_loss_per_char": 0.10252067446708679, "correct_loss_per_token": 1.2465754747390747, "incorrect_loss_per_token": 0.41008269786834717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41008269786834717, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.41008269786834717, "logits_per_char": -0.10252067446708679, "num_chars": 4}, {"sum_logits": -1.2465754747390747, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.2465754747390747, "logits_per_char": -0.4155251582463582, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 553, "native_id": 1184, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.13687309622764587, "incorrect_loss_raw": 2.442960023880005, "correct_loss_per_char": 0.03421827405691147, "incorrect_loss_per_char": 0.8143200079600016, "correct_loss_per_token": 0.13687309622764587, "incorrect_loss_per_token": 2.442960023880005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13687309622764587, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.13687309622764587, "logits_per_char": -0.03421827405691147, "num_chars": 4}, {"sum_logits": -2.442960023880005, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -2.442960023880005, "logits_per_char": -0.8143200079600016, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 554, "native_id": 2443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5398966073989868, "incorrect_loss_raw": 0.27555420994758606, "correct_loss_per_char": 0.5132988691329956, "incorrect_loss_per_char": 0.06888855248689651, "correct_loss_per_token": 1.5398966073989868, "incorrect_loss_per_token": 0.27555420994758606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27555420994758606, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.27555420994758606, "logits_per_char": -0.06888855248689651, "num_chars": 4}, {"sum_logits": -1.5398966073989868, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.5398966073989868, "logits_per_char": -0.5132988691329956, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 555, "native_id": 2434, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4078321158885956, "incorrect_loss_raw": 1.2645004987716675, "correct_loss_per_char": 0.1019580289721489, "incorrect_loss_per_char": 0.4215001662572225, "correct_loss_per_token": 0.4078321158885956, "incorrect_loss_per_token": 1.2645004987716675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4078321158885956, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.4078321158885956, "logits_per_char": -0.1019580289721489, "num_chars": 4}, {"sum_logits": -1.2645004987716675, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.2645004987716675, "logits_per_char": -0.4215001662572225, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 556, "native_id": 1162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5174303650856018, "incorrect_loss_raw": 1.1789215803146362, "correct_loss_per_char": 0.12935759127140045, "incorrect_loss_per_char": 0.3929738601048787, "correct_loss_per_token": 0.5174303650856018, "incorrect_loss_per_token": 1.1789215803146362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5174303650856018, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.5174303650856018, "logits_per_char": -0.12935759127140045, "num_chars": 4}, {"sum_logits": -1.1789215803146362, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.1789215803146362, "logits_per_char": -0.3929738601048787, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 557, "native_id": 1296, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6054942607879639, "incorrect_loss_raw": 0.278084933757782, "correct_loss_per_char": 0.5351647535959879, "incorrect_loss_per_char": 0.0695212334394455, "correct_loss_per_token": 1.6054942607879639, "incorrect_loss_per_token": 0.278084933757782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.278084933757782, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.278084933757782, "logits_per_char": -0.0695212334394455, "num_chars": 4}, {"sum_logits": -1.6054942607879639, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.6054942607879639, "logits_per_char": -0.5351647535959879, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 558, "native_id": 2496, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7728337645530701, "incorrect_loss_raw": 0.7036465406417847, "correct_loss_per_char": 0.19320844113826752, "incorrect_loss_per_char": 0.23454884688059488, "correct_loss_per_token": 0.7728337645530701, "incorrect_loss_per_token": 0.7036465406417847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7728337645530701, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -0.7728337645530701, "logits_per_char": -0.19320844113826752, "num_chars": 4}, {"sum_logits": -0.7036465406417847, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": true, "logits_per_token": -0.7036465406417847, "logits_per_char": -0.23454884688059488, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 559, "native_id": 1019, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.113236427307129, "incorrect_loss_raw": 0.5434492230415344, "correct_loss_per_char": 0.3710788091023763, "incorrect_loss_per_char": 0.1358623057603836, "correct_loss_per_token": 1.113236427307129, "incorrect_loss_per_token": 0.5434492230415344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5434492230415344, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.5434492230415344, "logits_per_char": -0.1358623057603836, "num_chars": 4}, {"sum_logits": -1.113236427307129, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.113236427307129, "logits_per_char": -0.3710788091023763, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 560, "native_id": 639, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9351148009300232, "incorrect_loss_raw": 0.5611537098884583, "correct_loss_per_char": 0.31170493364334106, "incorrect_loss_per_char": 0.14028842747211456, "correct_loss_per_token": 0.9351148009300232, "incorrect_loss_per_token": 0.5611537098884583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5611537098884583, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.5611537098884583, "logits_per_char": -0.14028842747211456, "num_chars": 4}, {"sum_logits": -0.9351148009300232, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -0.9351148009300232, "logits_per_char": -0.31170493364334106, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 561, "native_id": 795, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3135189712047577, "incorrect_loss_raw": 1.4123668670654297, "correct_loss_per_char": 0.07837974280118942, "incorrect_loss_per_char": 0.47078895568847656, "correct_loss_per_token": 0.3135189712047577, "incorrect_loss_per_token": 1.4123668670654297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3135189712047577, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.3135189712047577, "logits_per_char": -0.07837974280118942, "num_chars": 4}, {"sum_logits": -1.4123668670654297, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.4123668670654297, "logits_per_char": -0.47078895568847656, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 562, "native_id": 2498, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7642585635185242, "incorrect_loss_raw": 0.7344850301742554, "correct_loss_per_char": 0.25475285450617474, "incorrect_loss_per_char": 0.18362125754356384, "correct_loss_per_token": 0.7642585635185242, "incorrect_loss_per_token": 0.7344850301742554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7344850301742554, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": true, "logits_per_token": -0.7344850301742554, "logits_per_char": -0.18362125754356384, "num_chars": 4}, {"sum_logits": -0.7642585635185242, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -0.7642585635185242, "logits_per_char": -0.25475285450617474, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 563, "native_id": 1855, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26303166151046753, "incorrect_loss_raw": 1.6664693355560303, "correct_loss_per_char": 0.06575791537761688, "incorrect_loss_per_char": 0.5554897785186768, "correct_loss_per_token": 0.26303166151046753, "incorrect_loss_per_token": 1.6664693355560303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26303166151046753, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.26303166151046753, "logits_per_char": -0.06575791537761688, "num_chars": 4}, {"sum_logits": -1.6664693355560303, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.6664693355560303, "logits_per_char": -0.5554897785186768, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 564, "native_id": 2485, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27077963948249817, "incorrect_loss_raw": 1.5464270114898682, "correct_loss_per_char": 0.06769490987062454, "incorrect_loss_per_char": 0.5154756704966227, "correct_loss_per_token": 0.27077963948249817, "incorrect_loss_per_token": 1.5464270114898682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27077963948249817, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.27077963948249817, "logits_per_char": -0.06769490987062454, "num_chars": 4}, {"sum_logits": -1.5464270114898682, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.5464270114898682, "logits_per_char": -0.5154756704966227, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 565, "native_id": 1822, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7945387959480286, "incorrect_loss_raw": 0.6871997714042664, "correct_loss_per_char": 0.19863469898700714, "incorrect_loss_per_char": 0.22906659046808878, "correct_loss_per_token": 0.7945387959480286, "incorrect_loss_per_token": 0.6871997714042664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7945387959480286, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -0.7945387959480286, "logits_per_char": -0.19863469898700714, "num_chars": 4}, {"sum_logits": -0.6871997714042664, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.6871997714042664, "logits_per_char": -0.22906659046808878, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 566, "native_id": 1710, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6770209074020386, "incorrect_loss_raw": 0.23873083293437958, "correct_loss_per_char": 0.5590069691340128, "incorrect_loss_per_char": 0.059682708233594894, "correct_loss_per_token": 1.6770209074020386, "incorrect_loss_per_token": 0.23873083293437958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23873083293437958, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.23873083293437958, "logits_per_char": -0.059682708233594894, "num_chars": 4}, {"sum_logits": -1.6770209074020386, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.6770209074020386, "logits_per_char": -0.5590069691340128, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 567, "native_id": 2841, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29123204946517944, "incorrect_loss_raw": 1.4867658615112305, "correct_loss_per_char": 0.07280801236629486, "incorrect_loss_per_char": 0.49558862050374347, "correct_loss_per_token": 0.29123204946517944, "incorrect_loss_per_token": 1.4867658615112305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29123204946517944, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.29123204946517944, "logits_per_char": -0.07280801236629486, "num_chars": 4}, {"sum_logits": -1.4867658615112305, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -1.4867658615112305, "logits_per_char": -0.49558862050374347, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 568, "native_id": 1377, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2093778848648071, "incorrect_loss_raw": 0.4693349003791809, "correct_loss_per_char": 0.40312596162160236, "incorrect_loss_per_char": 0.11733372509479523, "correct_loss_per_token": 1.2093778848648071, "incorrect_loss_per_token": 0.4693349003791809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4693349003791809, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.4693349003791809, "logits_per_char": -0.11733372509479523, "num_chars": 4}, {"sum_logits": -1.2093778848648071, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.2093778848648071, "logits_per_char": -0.40312596162160236, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 569, "native_id": 2142, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.321954607963562, "incorrect_loss_raw": 1.4213917255401611, "correct_loss_per_char": 0.0804886519908905, "incorrect_loss_per_char": 0.4737972418467204, "correct_loss_per_token": 0.321954607963562, "incorrect_loss_per_token": 1.4213917255401611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.321954607963562, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.321954607963562, "logits_per_char": -0.0804886519908905, "num_chars": 4}, {"sum_logits": -1.4213917255401611, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.4213917255401611, "logits_per_char": -0.4737972418467204, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 570, "native_id": 1100, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0452356338500977, "incorrect_loss_raw": 0.18334995210170746, "correct_loss_per_char": 0.6817452112833658, "incorrect_loss_per_char": 0.045837488025426865, "correct_loss_per_token": 2.0452356338500977, "incorrect_loss_per_token": 0.18334995210170746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18334995210170746, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.18334995210170746, "logits_per_char": -0.045837488025426865, "num_chars": 4}, {"sum_logits": -2.0452356338500977, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -2.0452356338500977, "logits_per_char": -0.6817452112833658, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 571, "native_id": 1782, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16368043422698975, "incorrect_loss_raw": 2.1325507164001465, "correct_loss_per_char": 0.040920108556747437, "incorrect_loss_per_char": 0.7108502388000488, "correct_loss_per_token": 0.16368043422698975, "incorrect_loss_per_token": 2.1325507164001465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16368043422698975, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.16368043422698975, "logits_per_char": -0.040920108556747437, "num_chars": 4}, {"sum_logits": -2.1325507164001465, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -2.1325507164001465, "logits_per_char": -0.7108502388000488, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 572, "native_id": 1604, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2787368297576904, "incorrect_loss_raw": 0.3860817849636078, "correct_loss_per_char": 0.42624560991923016, "incorrect_loss_per_char": 0.09652044624090195, "correct_loss_per_token": 1.2787368297576904, "incorrect_loss_per_token": 0.3860817849636078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3860817849636078, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.3860817849636078, "logits_per_char": -0.09652044624090195, "num_chars": 4}, {"sum_logits": -1.2787368297576904, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.2787368297576904, "logits_per_char": -0.42624560991923016, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 573, "native_id": 1063, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.216937854886055, "incorrect_loss_raw": 1.8015776872634888, "correct_loss_per_char": 0.05423446372151375, "incorrect_loss_per_char": 0.6005258957544962, "correct_loss_per_token": 0.216937854886055, "incorrect_loss_per_token": 1.8015776872634888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.216937854886055, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.216937854886055, "logits_per_char": -0.05423446372151375, "num_chars": 4}, {"sum_logits": -1.8015776872634888, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.8015776872634888, "logits_per_char": -0.6005258957544962, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 574, "native_id": 2352, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0528366565704346, "incorrect_loss_raw": 0.1745910793542862, "correct_loss_per_char": 0.6842788855234782, "incorrect_loss_per_char": 0.04364776983857155, "correct_loss_per_token": 2.0528366565704346, "incorrect_loss_per_token": 0.1745910793542862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1745910793542862, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.1745910793542862, "logits_per_char": -0.04364776983857155, "num_chars": 4}, {"sum_logits": -2.0528366565704346, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -2.0528366565704346, "logits_per_char": -0.6842788855234782, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 575, "native_id": 2021, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.077446460723877, "incorrect_loss_raw": 0.5095515847206116, "correct_loss_per_char": 0.3591488202412923, "incorrect_loss_per_char": 0.1273878961801529, "correct_loss_per_token": 1.077446460723877, "incorrect_loss_per_token": 0.5095515847206116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5095515847206116, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5095515847206116, "logits_per_char": -0.1273878961801529, "num_chars": 4}, {"sum_logits": -1.077446460723877, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.077446460723877, "logits_per_char": -0.3591488202412923, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 576, "native_id": 1290, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2509728670120239, "incorrect_loss_raw": 1.8112398386001587, "correct_loss_per_char": 0.06274321675300598, "incorrect_loss_per_char": 0.6037466128667196, "correct_loss_per_token": 0.2509728670120239, "incorrect_loss_per_token": 1.8112398386001587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2509728670120239, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.2509728670120239, "logits_per_char": -0.06274321675300598, "num_chars": 4}, {"sum_logits": -1.8112398386001587, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.8112398386001587, "logits_per_char": -0.6037466128667196, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 577, "native_id": 1014, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3728856146335602, "incorrect_loss_raw": 1.3590587377548218, "correct_loss_per_char": 0.09322140365839005, "incorrect_loss_per_char": 0.45301957925160724, "correct_loss_per_token": 0.3728856146335602, "incorrect_loss_per_token": 1.3590587377548218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3728856146335602, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.3728856146335602, "logits_per_char": -0.09322140365839005, "num_chars": 4}, {"sum_logits": -1.3590587377548218, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.3590587377548218, "logits_per_char": -0.45301957925160724, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 578, "native_id": 3121, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8604841232299805, "incorrect_loss_raw": 0.6874789595603943, "correct_loss_per_char": 0.28682804107666016, "incorrect_loss_per_char": 0.17186973989009857, "correct_loss_per_token": 0.8604841232299805, "incorrect_loss_per_token": 0.6874789595603943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6874789595603943, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.6874789595603943, "logits_per_char": -0.17186973989009857, "num_chars": 4}, {"sum_logits": -0.8604841232299805, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -0.8604841232299805, "logits_per_char": -0.28682804107666016, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 579, "native_id": 646, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8059161901473999, "incorrect_loss_raw": 0.77756267786026, "correct_loss_per_char": 0.20147904753684998, "incorrect_loss_per_char": 0.25918755928675336, "correct_loss_per_token": 0.8059161901473999, "incorrect_loss_per_token": 0.77756267786026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8059161901473999, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -0.8059161901473999, "logits_per_char": -0.20147904753684998, "num_chars": 4}, {"sum_logits": -0.77756267786026, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.77756267786026, "logits_per_char": -0.25918755928675336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 580, "native_id": 3196, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3154199123382568, "incorrect_loss_raw": 0.36691218614578247, "correct_loss_per_char": 0.43847330411275226, "incorrect_loss_per_char": 0.09172804653644562, "correct_loss_per_token": 1.3154199123382568, "incorrect_loss_per_token": 0.36691218614578247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36691218614578247, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.36691218614578247, "logits_per_char": -0.09172804653644562, "num_chars": 4}, {"sum_logits": -1.3154199123382568, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.3154199123382568, "logits_per_char": -0.43847330411275226, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 581, "native_id": 1682, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4646734595298767, "incorrect_loss_raw": 1.093056321144104, "correct_loss_per_char": 0.11616836488246918, "incorrect_loss_per_char": 0.36435210704803467, "correct_loss_per_token": 0.4646734595298767, "incorrect_loss_per_token": 1.093056321144104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4646734595298767, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.4646734595298767, "logits_per_char": -0.11616836488246918, "num_chars": 4}, {"sum_logits": -1.093056321144104, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.093056321144104, "logits_per_char": -0.36435210704803467, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 582, "native_id": 645, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14986978471279144, "incorrect_loss_raw": 2.310236692428589, "correct_loss_per_char": 0.03746744617819786, "incorrect_loss_per_char": 0.7700788974761963, "correct_loss_per_token": 0.14986978471279144, "incorrect_loss_per_token": 2.310236692428589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14986978471279144, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.14986978471279144, "logits_per_char": -0.03746744617819786, "num_chars": 4}, {"sum_logits": -2.310236692428589, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -2.310236692428589, "logits_per_char": -0.7700788974761963, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 583, "native_id": 141, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9305888414382935, "incorrect_loss_raw": 0.1881980001926422, "correct_loss_per_char": 0.6435296138127645, "incorrect_loss_per_char": 0.04704950004816055, "correct_loss_per_token": 1.9305888414382935, "incorrect_loss_per_token": 0.1881980001926422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1881980001926422, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.1881980001926422, "logits_per_char": -0.04704950004816055, "num_chars": 4}, {"sum_logits": -1.9305888414382935, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.9305888414382935, "logits_per_char": -0.6435296138127645, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 584, "native_id": 3024, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.383596807718277, "incorrect_loss_raw": 1.2257091999053955, "correct_loss_per_char": 0.09589920192956924, "incorrect_loss_per_char": 0.4085697333017985, "correct_loss_per_token": 0.383596807718277, "incorrect_loss_per_token": 1.2257091999053955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.383596807718277, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.383596807718277, "logits_per_char": -0.09589920192956924, "num_chars": 4}, {"sum_logits": -1.2257091999053955, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.2257091999053955, "logits_per_char": -0.4085697333017985, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 585, "native_id": 2360, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2510414123535156, "incorrect_loss_raw": 1.7975883483886719, "correct_loss_per_char": 0.0627603530883789, "incorrect_loss_per_char": 0.5991961161295573, "correct_loss_per_token": 0.2510414123535156, "incorrect_loss_per_token": 1.7975883483886719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2510414123535156, "num_tokens": 1, "num_tokens_all": 1144, "is_greedy": true, "logits_per_token": -0.2510414123535156, "logits_per_char": -0.0627603530883789, "num_chars": 4}, {"sum_logits": -1.7975883483886719, "num_tokens": 1, "num_tokens_all": 1144, "is_greedy": false, "logits_per_token": -1.7975883483886719, "logits_per_char": -0.5991961161295573, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 586, "native_id": 2233, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9759595394134521, "incorrect_loss_raw": 0.5744799375534058, "correct_loss_per_char": 0.3253198464711507, "incorrect_loss_per_char": 0.14361998438835144, "correct_loss_per_token": 0.9759595394134521, "incorrect_loss_per_token": 0.5744799375534058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5744799375534058, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.5744799375534058, "logits_per_char": -0.14361998438835144, "num_chars": 4}, {"sum_logits": -0.9759595394134521, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -0.9759595394134521, "logits_per_char": -0.3253198464711507, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 587, "native_id": 2793, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.12902531027793884, "incorrect_loss_raw": 2.2129926681518555, "correct_loss_per_char": 0.03225632756948471, "incorrect_loss_per_char": 0.7376642227172852, "correct_loss_per_token": 0.12902531027793884, "incorrect_loss_per_token": 2.2129926681518555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12902531027793884, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.12902531027793884, "logits_per_char": -0.03225632756948471, "num_chars": 4}, {"sum_logits": -2.2129926681518555, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -2.2129926681518555, "logits_per_char": -0.7376642227172852, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 588, "native_id": 3009, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42152267694473267, "incorrect_loss_raw": 1.1364983320236206, "correct_loss_per_char": 0.10538066923618317, "incorrect_loss_per_char": 0.37883277734120685, "correct_loss_per_token": 0.42152267694473267, "incorrect_loss_per_token": 1.1364983320236206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42152267694473267, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.42152267694473267, "logits_per_char": -0.10538066923618317, "num_chars": 4}, {"sum_logits": -1.1364983320236206, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.1364983320236206, "logits_per_char": -0.37883277734120685, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 589, "native_id": 2227, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3731746673583984, "incorrect_loss_raw": 0.4138124883174896, "correct_loss_per_char": 0.4577248891194661, "incorrect_loss_per_char": 0.1034531220793724, "correct_loss_per_token": 1.3731746673583984, "incorrect_loss_per_token": 0.4138124883174896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4138124883174896, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.4138124883174896, "logits_per_char": -0.1034531220793724, "num_chars": 4}, {"sum_logits": -1.3731746673583984, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.3731746673583984, "logits_per_char": -0.4577248891194661, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 590, "native_id": 3000, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.192692518234253, "incorrect_loss_raw": 0.42809778451919556, "correct_loss_per_char": 0.397564172744751, "incorrect_loss_per_char": 0.10702444612979889, "correct_loss_per_token": 1.192692518234253, "incorrect_loss_per_token": 0.42809778451919556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42809778451919556, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.42809778451919556, "logits_per_char": -0.10702444612979889, "num_chars": 4}, {"sum_logits": -1.192692518234253, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.192692518234253, "logits_per_char": -0.397564172744751, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 591, "native_id": 1761, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2575942277908325, "incorrect_loss_raw": 1.647141456604004, "correct_loss_per_char": 0.06439855694770813, "incorrect_loss_per_char": 0.5490471522013346, "correct_loss_per_token": 0.2575942277908325, "incorrect_loss_per_token": 1.647141456604004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2575942277908325, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2575942277908325, "logits_per_char": -0.06439855694770813, "num_chars": 4}, {"sum_logits": -1.647141456604004, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.647141456604004, "logits_per_char": -0.5490471522013346, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 592, "native_id": 1819, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2568890154361725, "incorrect_loss_raw": 1.6415072679519653, "correct_loss_per_char": 0.06422225385904312, "incorrect_loss_per_char": 0.5471690893173218, "correct_loss_per_token": 0.2568890154361725, "incorrect_loss_per_token": 1.6415072679519653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2568890154361725, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.2568890154361725, "logits_per_char": -0.06422225385904312, "num_chars": 4}, {"sum_logits": -1.6415072679519653, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.6415072679519653, "logits_per_char": -0.5471690893173218, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 593, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3043826818466187, "incorrect_loss_raw": 0.4064323902130127, "correct_loss_per_char": 0.43479422728220624, "incorrect_loss_per_char": 0.10160809755325317, "correct_loss_per_token": 1.3043826818466187, "incorrect_loss_per_token": 0.4064323902130127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4064323902130127, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -0.4064323902130127, "logits_per_char": -0.10160809755325317, "num_chars": 4}, {"sum_logits": -1.3043826818466187, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -1.3043826818466187, "logits_per_char": -0.43479422728220624, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 594, "native_id": 2252, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3039013147354126, "incorrect_loss_raw": 1.4913526773452759, "correct_loss_per_char": 0.07597532868385315, "incorrect_loss_per_char": 0.497117559115092, "correct_loss_per_token": 0.3039013147354126, "incorrect_loss_per_token": 1.4913526773452759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3039013147354126, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.3039013147354126, "logits_per_char": -0.07597532868385315, "num_chars": 4}, {"sum_logits": -1.4913526773452759, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.4913526773452759, "logits_per_char": -0.497117559115092, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 595, "native_id": 1656, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1191068887710571, "incorrect_loss_raw": 0.45298460125923157, "correct_loss_per_char": 0.37303562959035236, "incorrect_loss_per_char": 0.11324615031480789, "correct_loss_per_token": 1.1191068887710571, "incorrect_loss_per_token": 0.45298460125923157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45298460125923157, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.45298460125923157, "logits_per_char": -0.11324615031480789, "num_chars": 4}, {"sum_logits": -1.1191068887710571, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.1191068887710571, "logits_per_char": -0.37303562959035236, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 596, "native_id": 283, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15879788994789124, "incorrect_loss_raw": 2.068021774291992, "correct_loss_per_char": 0.03969947248697281, "incorrect_loss_per_char": 0.6893405914306641, "correct_loss_per_token": 0.15879788994789124, "incorrect_loss_per_token": 2.068021774291992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15879788994789124, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.15879788994789124, "logits_per_char": -0.03969947248697281, "num_chars": 4}, {"sum_logits": -2.068021774291992, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -2.068021774291992, "logits_per_char": -0.6893405914306641, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 597, "native_id": 3223, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6534626483917236, "incorrect_loss_raw": 0.26423195004463196, "correct_loss_per_char": 0.5511542161305746, "incorrect_loss_per_char": 0.06605798751115799, "correct_loss_per_token": 1.6534626483917236, "incorrect_loss_per_token": 0.26423195004463196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26423195004463196, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.26423195004463196, "logits_per_char": -0.06605798751115799, "num_chars": 4}, {"sum_logits": -1.6534626483917236, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.6534626483917236, "logits_per_char": -0.5511542161305746, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 598, "native_id": 3253, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6331661939620972, "incorrect_loss_raw": 0.920089840888977, "correct_loss_per_char": 0.1582915484905243, "incorrect_loss_per_char": 0.306696613629659, "correct_loss_per_token": 0.6331661939620972, "incorrect_loss_per_token": 0.920089840888977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6331661939620972, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.6331661939620972, "logits_per_char": -0.1582915484905243, "num_chars": 4}, {"sum_logits": -0.920089840888977, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -0.920089840888977, "logits_per_char": -0.306696613629659, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 599, "native_id": 1001, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6703311204910278, "incorrect_loss_raw": 0.24239765107631683, "correct_loss_per_char": 0.5567770401636759, "incorrect_loss_per_char": 0.06059941276907921, "correct_loss_per_token": 1.6703311204910278, "incorrect_loss_per_token": 0.24239765107631683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24239765107631683, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.24239765107631683, "logits_per_char": -0.06059941276907921, "num_chars": 4}, {"sum_logits": -1.6703311204910278, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.6703311204910278, "logits_per_char": -0.5567770401636759, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 600, "native_id": 2647, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3977423906326294, "incorrect_loss_raw": 1.3262991905212402, "correct_loss_per_char": 0.09943559765815735, "incorrect_loss_per_char": 0.44209973017374676, "correct_loss_per_token": 0.3977423906326294, "incorrect_loss_per_token": 1.3262991905212402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3977423906326294, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.3977423906326294, "logits_per_char": -0.09943559765815735, "num_chars": 4}, {"sum_logits": -1.3262991905212402, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.3262991905212402, "logits_per_char": -0.44209973017374676, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 601, "native_id": 3055, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.223530575633049, "incorrect_loss_raw": 1.839350938796997, "correct_loss_per_char": 0.05588264390826225, "incorrect_loss_per_char": 0.613116979598999, "correct_loss_per_token": 0.223530575633049, "incorrect_loss_per_token": 1.839350938796997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.223530575633049, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.223530575633049, "logits_per_char": -0.05588264390826225, "num_chars": 4}, {"sum_logits": -1.839350938796997, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.839350938796997, "logits_per_char": -0.613116979598999, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 602, "native_id": 2929, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15982691943645477, "incorrect_loss_raw": 2.1120638847351074, "correct_loss_per_char": 0.03995672985911369, "incorrect_loss_per_char": 0.7040212949117025, "correct_loss_per_token": 0.15982691943645477, "incorrect_loss_per_token": 2.1120638847351074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15982691943645477, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.15982691943645477, "logits_per_char": -0.03995672985911369, "num_chars": 4}, {"sum_logits": -2.1120638847351074, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -2.1120638847351074, "logits_per_char": -0.7040212949117025, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 603, "native_id": 2872, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44100645184516907, "incorrect_loss_raw": 1.1413081884384155, "correct_loss_per_char": 0.11025161296129227, "incorrect_loss_per_char": 0.3804360628128052, "correct_loss_per_token": 0.44100645184516907, "incorrect_loss_per_token": 1.1413081884384155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44100645184516907, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.44100645184516907, "logits_per_char": -0.11025161296129227, "num_chars": 4}, {"sum_logits": -1.1413081884384155, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.1413081884384155, "logits_per_char": -0.3804360628128052, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 604, "native_id": 972, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0737894773483276, "incorrect_loss_raw": 0.47787895798683167, "correct_loss_per_char": 0.3579298257827759, "incorrect_loss_per_char": 0.11946973949670792, "correct_loss_per_token": 1.0737894773483276, "incorrect_loss_per_token": 0.47787895798683167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47787895798683167, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.47787895798683167, "logits_per_char": -0.11946973949670792, "num_chars": 4}, {"sum_logits": -1.0737894773483276, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.0737894773483276, "logits_per_char": -0.3579298257827759, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 605, "native_id": 1239, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3780105113983154, "incorrect_loss_raw": 0.4208173155784607, "correct_loss_per_char": 0.4593368371327718, "incorrect_loss_per_char": 0.10520432889461517, "correct_loss_per_token": 1.3780105113983154, "incorrect_loss_per_token": 0.4208173155784607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4208173155784607, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.4208173155784607, "logits_per_char": -0.10520432889461517, "num_chars": 4}, {"sum_logits": -1.3780105113983154, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.3780105113983154, "logits_per_char": -0.4593368371327718, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 606, "native_id": 2101, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33023297786712646, "incorrect_loss_raw": 1.3713185787200928, "correct_loss_per_char": 0.08255824446678162, "incorrect_loss_per_char": 0.4571061929066976, "correct_loss_per_token": 0.33023297786712646, "incorrect_loss_per_token": 1.3713185787200928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33023297786712646, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.33023297786712646, "logits_per_char": -0.08255824446678162, "num_chars": 4}, {"sum_logits": -1.3713185787200928, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.3713185787200928, "logits_per_char": -0.4571061929066976, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 607, "native_id": 1340, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4918733239173889, "incorrect_loss_raw": 1.1351277828216553, "correct_loss_per_char": 0.12296833097934723, "incorrect_loss_per_char": 0.37837592760721844, "correct_loss_per_token": 0.4918733239173889, "incorrect_loss_per_token": 1.1351277828216553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4918733239173889, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.4918733239173889, "logits_per_char": -0.12296833097934723, "num_chars": 4}, {"sum_logits": -1.1351277828216553, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.1351277828216553, "logits_per_char": -0.37837592760721844, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 608, "native_id": 2127, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9150880575180054, "incorrect_loss_raw": 0.2027432918548584, "correct_loss_per_char": 0.6383626858393351, "incorrect_loss_per_char": 0.0506858229637146, "correct_loss_per_token": 1.9150880575180054, "incorrect_loss_per_token": 0.2027432918548584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2027432918548584, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.2027432918548584, "logits_per_char": -0.0506858229637146, "num_chars": 4}, {"sum_logits": -1.9150880575180054, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.9150880575180054, "logits_per_char": -0.6383626858393351, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 609, "native_id": 2123, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.783703088760376, "incorrect_loss_raw": 0.2546103000640869, "correct_loss_per_char": 0.5945676962534586, "incorrect_loss_per_char": 0.06365257501602173, "correct_loss_per_token": 1.783703088760376, "incorrect_loss_per_token": 0.2546103000640869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2546103000640869, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.2546103000640869, "logits_per_char": -0.06365257501602173, "num_chars": 4}, {"sum_logits": -1.783703088760376, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.783703088760376, "logits_per_char": -0.5945676962534586, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 610, "native_id": 1851, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24192669987678528, "incorrect_loss_raw": 1.6767940521240234, "correct_loss_per_char": 0.06048167496919632, "incorrect_loss_per_char": 0.5589313507080078, "correct_loss_per_token": 0.24192669987678528, "incorrect_loss_per_token": 1.6767940521240234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24192669987678528, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.24192669987678528, "logits_per_char": -0.06048167496919632, "num_chars": 4}, {"sum_logits": -1.6767940521240234, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6767940521240234, "logits_per_char": -0.5589313507080078, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 611, "native_id": 263, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32725879549980164, "incorrect_loss_raw": 1.3764958381652832, "correct_loss_per_char": 0.08181469887495041, "incorrect_loss_per_char": 0.4588319460550944, "correct_loss_per_token": 0.32725879549980164, "incorrect_loss_per_token": 1.3764958381652832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32725879549980164, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.32725879549980164, "logits_per_char": -0.08181469887495041, "num_chars": 4}, {"sum_logits": -1.3764958381652832, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.3764958381652832, "logits_per_char": -0.4588319460550944, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 612, "native_id": 1240, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.223345160484314, "incorrect_loss_raw": 0.44417038559913635, "correct_loss_per_char": 0.407781720161438, "incorrect_loss_per_char": 0.11104259639978409, "correct_loss_per_token": 1.223345160484314, "incorrect_loss_per_token": 0.44417038559913635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44417038559913635, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.44417038559913635, "logits_per_char": -0.11104259639978409, "num_chars": 4}, {"sum_logits": -1.223345160484314, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.223345160484314, "logits_per_char": -0.407781720161438, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 613, "native_id": 106, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8401954174041748, "incorrect_loss_raw": 0.7500266432762146, "correct_loss_per_char": 0.2800651391347249, "incorrect_loss_per_char": 0.18750666081905365, "correct_loss_per_token": 0.8401954174041748, "incorrect_loss_per_token": 0.7500266432762146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7500266432762146, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": true, "logits_per_token": -0.7500266432762146, "logits_per_char": -0.18750666081905365, "num_chars": 4}, {"sum_logits": -0.8401954174041748, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -0.8401954174041748, "logits_per_char": -0.2800651391347249, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 614, "native_id": 2052, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.996421217918396, "incorrect_loss_raw": 0.5321294665336609, "correct_loss_per_char": 0.33214040597279865, "incorrect_loss_per_char": 0.13303236663341522, "correct_loss_per_token": 0.996421217918396, "incorrect_loss_per_token": 0.5321294665336609, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5321294665336609, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.5321294665336609, "logits_per_char": -0.13303236663341522, "num_chars": 4}, {"sum_logits": -0.996421217918396, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -0.996421217918396, "logits_per_char": -0.33214040597279865, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 615, "native_id": 739, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43347930908203125, "incorrect_loss_raw": 1.2085883617401123, "correct_loss_per_char": 0.10836982727050781, "incorrect_loss_per_char": 0.4028627872467041, "correct_loss_per_token": 0.43347930908203125, "incorrect_loss_per_token": 1.2085883617401123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43347930908203125, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.43347930908203125, "logits_per_char": -0.10836982727050781, "num_chars": 4}, {"sum_logits": -1.2085883617401123, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.2085883617401123, "logits_per_char": -0.4028627872467041, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 616, "native_id": 584, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48307496309280396, "incorrect_loss_raw": 1.0949572324752808, "correct_loss_per_char": 0.12076874077320099, "incorrect_loss_per_char": 0.36498574415842694, "correct_loss_per_token": 0.48307496309280396, "incorrect_loss_per_token": 1.0949572324752808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48307496309280396, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.48307496309280396, "logits_per_char": -0.12076874077320099, "num_chars": 4}, {"sum_logits": -1.0949572324752808, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.0949572324752808, "logits_per_char": -0.36498574415842694, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 617, "native_id": 601, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6812182664871216, "incorrect_loss_raw": 0.7787464261054993, "correct_loss_per_char": 0.1703045666217804, "incorrect_loss_per_char": 0.25958214203516644, "correct_loss_per_token": 0.6812182664871216, "incorrect_loss_per_token": 0.7787464261054993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6812182664871216, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.6812182664871216, "logits_per_char": -0.1703045666217804, "num_chars": 4}, {"sum_logits": -0.7787464261054993, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -0.7787464261054993, "logits_per_char": -0.25958214203516644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 618, "native_id": 3034, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6599273681640625, "incorrect_loss_raw": 0.24971653521060944, "correct_loss_per_char": 0.5533091227213541, "incorrect_loss_per_char": 0.06242913380265236, "correct_loss_per_token": 1.6599273681640625, "incorrect_loss_per_token": 0.24971653521060944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24971653521060944, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.24971653521060944, "logits_per_char": -0.06242913380265236, "num_chars": 4}, {"sum_logits": -1.6599273681640625, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.6599273681640625, "logits_per_char": -0.5533091227213541, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 619, "native_id": 1754, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4697495698928833, "incorrect_loss_raw": 1.1447278261184692, "correct_loss_per_char": 0.11743739247322083, "incorrect_loss_per_char": 0.38157594203948975, "correct_loss_per_token": 0.4697495698928833, "incorrect_loss_per_token": 1.1447278261184692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4697495698928833, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.4697495698928833, "logits_per_char": -0.11743739247322083, "num_chars": 4}, {"sum_logits": -1.1447278261184692, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.1447278261184692, "logits_per_char": -0.38157594203948975, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 620, "native_id": 725, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6310838460922241, "incorrect_loss_raw": 0.861059308052063, "correct_loss_per_char": 0.15777096152305603, "incorrect_loss_per_char": 0.2870197693506877, "correct_loss_per_token": 0.6310838460922241, "incorrect_loss_per_token": 0.861059308052063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6310838460922241, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.6310838460922241, "logits_per_char": -0.15777096152305603, "num_chars": 4}, {"sum_logits": -0.861059308052063, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -0.861059308052063, "logits_per_char": -0.2870197693506877, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 621, "native_id": 2160, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9835031628608704, "incorrect_loss_raw": 0.5482328534126282, "correct_loss_per_char": 0.3278343876202901, "incorrect_loss_per_char": 0.13705821335315704, "correct_loss_per_token": 0.9835031628608704, "incorrect_loss_per_token": 0.5482328534126282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5482328534126282, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -0.5482328534126282, "logits_per_char": -0.13705821335315704, "num_chars": 4}, {"sum_logits": -0.9835031628608704, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -0.9835031628608704, "logits_per_char": -0.3278343876202901, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 622, "native_id": 560, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1288070678710938, "incorrect_loss_raw": 0.44945603609085083, "correct_loss_per_char": 0.37626902262369794, "incorrect_loss_per_char": 0.11236400902271271, "correct_loss_per_token": 1.1288070678710938, "incorrect_loss_per_token": 0.44945603609085083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44945603609085083, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.44945603609085083, "logits_per_char": -0.11236400902271271, "num_chars": 4}, {"sum_logits": -1.1288070678710938, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.1288070678710938, "logits_per_char": -0.37626902262369794, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 623, "native_id": 1234, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372043251991272, "incorrect_loss_raw": 0.3595409095287323, "correct_loss_per_char": 0.4573477506637573, "incorrect_loss_per_char": 0.08988522738218307, "correct_loss_per_token": 1.372043251991272, "incorrect_loss_per_token": 0.3595409095287323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3595409095287323, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.3595409095287323, "logits_per_char": -0.08988522738218307, "num_chars": 4}, {"sum_logits": -1.372043251991272, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.372043251991272, "logits_per_char": -0.4573477506637573, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 624, "native_id": 384, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38387125730514526, "incorrect_loss_raw": 1.4373247623443604, "correct_loss_per_char": 0.09596781432628632, "incorrect_loss_per_char": 0.4791082541147868, "correct_loss_per_token": 0.38387125730514526, "incorrect_loss_per_token": 1.4373247623443604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38387125730514526, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.38387125730514526, "logits_per_char": -0.09596781432628632, "num_chars": 4}, {"sum_logits": -1.4373247623443604, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -1.4373247623443604, "logits_per_char": -0.4791082541147868, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 625, "native_id": 2000, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4440088272094727, "incorrect_loss_raw": 0.32643982768058777, "correct_loss_per_char": 0.4813362757364909, "incorrect_loss_per_char": 0.08160995692014694, "correct_loss_per_token": 1.4440088272094727, "incorrect_loss_per_token": 0.32643982768058777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32643982768058777, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.32643982768058777, "logits_per_char": -0.08160995692014694, "num_chars": 4}, {"sum_logits": -1.4440088272094727, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.4440088272094727, "logits_per_char": -0.4813362757364909, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 626, "native_id": 2214, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9704986810684204, "incorrect_loss_raw": 0.5513436794281006, "correct_loss_per_char": 0.32349956035614014, "incorrect_loss_per_char": 0.13783591985702515, "correct_loss_per_token": 0.9704986810684204, "incorrect_loss_per_token": 0.5513436794281006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5513436794281006, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.5513436794281006, "logits_per_char": -0.13783591985702515, "num_chars": 4}, {"sum_logits": -0.9704986810684204, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -0.9704986810684204, "logits_per_char": -0.32349956035614014, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 627, "native_id": 2742, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2238489389419556, "incorrect_loss_raw": 0.4249372184276581, "correct_loss_per_char": 0.40794964631398517, "incorrect_loss_per_char": 0.10623430460691452, "correct_loss_per_token": 1.2238489389419556, "incorrect_loss_per_token": 0.4249372184276581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4249372184276581, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.4249372184276581, "logits_per_char": -0.10623430460691452, "num_chars": 4}, {"sum_logits": -1.2238489389419556, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.2238489389419556, "logits_per_char": -0.40794964631398517, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 628, "native_id": 2462, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5769932866096497, "incorrect_loss_raw": 0.9334885478019714, "correct_loss_per_char": 0.1923310955365499, "incorrect_loss_per_char": 0.23337213695049286, "correct_loss_per_token": 0.5769932866096497, "incorrect_loss_per_token": 0.9334885478019714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9334885478019714, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.9334885478019714, "logits_per_char": -0.23337213695049286, "num_chars": 4}, {"sum_logits": -0.5769932866096497, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.5769932866096497, "logits_per_char": -0.1923310955365499, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 629, "native_id": 547, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20335280895233154, "incorrect_loss_raw": 1.953230857849121, "correct_loss_per_char": 0.050838202238082886, "incorrect_loss_per_char": 0.6510769526163737, "correct_loss_per_token": 0.20335280895233154, "incorrect_loss_per_token": 1.953230857849121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20335280895233154, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.20335280895233154, "logits_per_char": -0.050838202238082886, "num_chars": 4}, {"sum_logits": -1.953230857849121, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.953230857849121, "logits_per_char": -0.6510769526163737, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 630, "native_id": 1093, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6421924829483032, "incorrect_loss_raw": 0.2809094488620758, "correct_loss_per_char": 0.5473974943161011, "incorrect_loss_per_char": 0.07022736221551895, "correct_loss_per_token": 1.6421924829483032, "incorrect_loss_per_token": 0.2809094488620758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2809094488620758, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.2809094488620758, "logits_per_char": -0.07022736221551895, "num_chars": 4}, {"sum_logits": -1.6421924829483032, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.6421924829483032, "logits_per_char": -0.5473974943161011, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 631, "native_id": 1765, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2163843810558319, "incorrect_loss_raw": 1.9035866260528564, "correct_loss_per_char": 0.05409609526395798, "incorrect_loss_per_char": 0.6345288753509521, "correct_loss_per_token": 0.2163843810558319, "incorrect_loss_per_token": 1.9035866260528564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2163843810558319, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.2163843810558319, "logits_per_char": -0.05409609526395798, "num_chars": 4}, {"sum_logits": -1.9035866260528564, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.9035866260528564, "logits_per_char": -0.6345288753509521, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 632, "native_id": 1933, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22116586565971375, "incorrect_loss_raw": 1.7550325393676758, "correct_loss_per_char": 0.055291466414928436, "incorrect_loss_per_char": 0.585010846455892, "correct_loss_per_token": 0.22116586565971375, "incorrect_loss_per_token": 1.7550325393676758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22116586565971375, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.22116586565971375, "logits_per_char": -0.055291466414928436, "num_chars": 4}, {"sum_logits": -1.7550325393676758, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.7550325393676758, "logits_per_char": -0.585010846455892, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 633, "native_id": 1141, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4191012978553772, "incorrect_loss_raw": 1.2646944522857666, "correct_loss_per_char": 0.13970043261845908, "incorrect_loss_per_char": 0.31617361307144165, "correct_loss_per_token": 0.4191012978553772, "incorrect_loss_per_token": 1.2646944522857666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2646944522857666, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.2646944522857666, "logits_per_char": -0.31617361307144165, "num_chars": 4}, {"sum_logits": -0.4191012978553772, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.4191012978553772, "logits_per_char": -0.13970043261845908, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 634, "native_id": 1292, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440830945968628, "incorrect_loss_raw": 0.32859373092651367, "correct_loss_per_char": 0.48027698198954266, "incorrect_loss_per_char": 0.08214843273162842, "correct_loss_per_token": 1.440830945968628, "incorrect_loss_per_token": 0.32859373092651367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32859373092651367, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.32859373092651367, "logits_per_char": -0.08214843273162842, "num_chars": 4}, {"sum_logits": -1.440830945968628, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.440830945968628, "logits_per_char": -0.48027698198954266, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 635, "native_id": 686, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1716575622558594, "incorrect_loss_raw": 0.14996808767318726, "correct_loss_per_char": 0.7238858540852865, "incorrect_loss_per_char": 0.037492021918296814, "correct_loss_per_token": 2.1716575622558594, "incorrect_loss_per_token": 0.14996808767318726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14996808767318726, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.14996808767318726, "logits_per_char": -0.037492021918296814, "num_chars": 4}, {"sum_logits": -2.1716575622558594, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -2.1716575622558594, "logits_per_char": -0.7238858540852865, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 636, "native_id": 270, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25758758187294006, "incorrect_loss_raw": 1.6934394836425781, "correct_loss_per_char": 0.06439689546823502, "incorrect_loss_per_char": 0.5644798278808594, "correct_loss_per_token": 0.25758758187294006, "incorrect_loss_per_token": 1.6934394836425781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25758758187294006, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.25758758187294006, "logits_per_char": -0.06439689546823502, "num_chars": 4}, {"sum_logits": -1.6934394836425781, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.6934394836425781, "logits_per_char": -0.5644798278808594, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 637, "native_id": 1799, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30049288272857666, "incorrect_loss_raw": 1.4829667806625366, "correct_loss_per_char": 0.07512322068214417, "incorrect_loss_per_char": 0.4943222602208455, "correct_loss_per_token": 0.30049288272857666, "incorrect_loss_per_token": 1.4829667806625366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30049288272857666, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.30049288272857666, "logits_per_char": -0.07512322068214417, "num_chars": 4}, {"sum_logits": -1.4829667806625366, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.4829667806625366, "logits_per_char": -0.4943222602208455, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 638, "native_id": 943, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8481295108795166, "incorrect_loss_raw": 0.22363238036632538, "correct_loss_per_char": 0.6160431702931722, "incorrect_loss_per_char": 0.055908095091581345, "correct_loss_per_token": 1.8481295108795166, "incorrect_loss_per_token": 0.22363238036632538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22363238036632538, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.22363238036632538, "logits_per_char": -0.055908095091581345, "num_chars": 4}, {"sum_logits": -1.8481295108795166, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.8481295108795166, "logits_per_char": -0.6160431702931722, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 639, "native_id": 1811, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4077276885509491, "incorrect_loss_raw": 1.257056474685669, "correct_loss_per_char": 0.10193192213773727, "incorrect_loss_per_char": 0.41901882489522296, "correct_loss_per_token": 0.4077276885509491, "incorrect_loss_per_token": 1.257056474685669, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4077276885509491, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.4077276885509491, "logits_per_char": -0.10193192213773727, "num_chars": 4}, {"sum_logits": -1.257056474685669, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.257056474685669, "logits_per_char": -0.41901882489522296, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 640, "native_id": 1022, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25763043761253357, "incorrect_loss_raw": 1.6526615619659424, "correct_loss_per_char": 0.06440760940313339, "incorrect_loss_per_char": 0.5508871873219808, "correct_loss_per_token": 0.25763043761253357, "incorrect_loss_per_token": 1.6526615619659424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25763043761253357, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.25763043761253357, "logits_per_char": -0.06440760940313339, "num_chars": 4}, {"sum_logits": -1.6526615619659424, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.6526615619659424, "logits_per_char": -0.5508871873219808, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 641, "native_id": 273, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2460382878780365, "incorrect_loss_raw": 1.811037540435791, "correct_loss_per_char": 0.061509571969509125, "incorrect_loss_per_char": 0.6036791801452637, "correct_loss_per_token": 0.2460382878780365, "incorrect_loss_per_token": 1.811037540435791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2460382878780365, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.2460382878780365, "logits_per_char": -0.061509571969509125, "num_chars": 4}, {"sum_logits": -1.811037540435791, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -1.811037540435791, "logits_per_char": -0.6036791801452637, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 642, "native_id": 1092, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7532252073287964, "incorrect_loss_raw": 0.24119894206523895, "correct_loss_per_char": 0.5844084024429321, "incorrect_loss_per_char": 0.06029973551630974, "correct_loss_per_token": 1.7532252073287964, "incorrect_loss_per_token": 0.24119894206523895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24119894206523895, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.24119894206523895, "logits_per_char": -0.06029973551630974, "num_chars": 4}, {"sum_logits": -1.7532252073287964, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.7532252073287964, "logits_per_char": -0.5844084024429321, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 643, "native_id": 2709, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4903515875339508, "incorrect_loss_raw": 1.1112561225891113, "correct_loss_per_char": 0.1225878968834877, "incorrect_loss_per_char": 0.3704187075297038, "correct_loss_per_token": 0.4903515875339508, "incorrect_loss_per_token": 1.1112561225891113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4903515875339508, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.4903515875339508, "logits_per_char": -0.1225878968834877, "num_chars": 4}, {"sum_logits": -1.1112561225891113, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.1112561225891113, "logits_per_char": -0.3704187075297038, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 644, "native_id": 2578, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21500900387763977, "incorrect_loss_raw": 1.7819280624389648, "correct_loss_per_char": 0.05375225096940994, "incorrect_loss_per_char": 0.5939760208129883, "correct_loss_per_token": 0.21500900387763977, "incorrect_loss_per_token": 1.7819280624389648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21500900387763977, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.21500900387763977, "logits_per_char": -0.05375225096940994, "num_chars": 4}, {"sum_logits": -1.7819280624389648, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.7819280624389648, "logits_per_char": -0.5939760208129883, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 645, "native_id": 2299, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.45981690287590027, "incorrect_loss_raw": 1.187988519668579, "correct_loss_per_char": 0.15327230095863342, "incorrect_loss_per_char": 0.2969971299171448, "correct_loss_per_token": 0.45981690287590027, "incorrect_loss_per_token": 1.187988519668579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.187988519668579, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.187988519668579, "logits_per_char": -0.2969971299171448, "num_chars": 4}, {"sum_logits": -0.45981690287590027, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.45981690287590027, "logits_per_char": -0.15327230095863342, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 646, "native_id": 3033, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5063657760620117, "incorrect_loss_raw": 0.29723912477493286, "correct_loss_per_char": 0.5021219253540039, "incorrect_loss_per_char": 0.07430978119373322, "correct_loss_per_token": 1.5063657760620117, "incorrect_loss_per_token": 0.29723912477493286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29723912477493286, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.29723912477493286, "logits_per_char": -0.07430978119373322, "num_chars": 4}, {"sum_logits": -1.5063657760620117, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -1.5063657760620117, "logits_per_char": -0.5021219253540039, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 647, "native_id": 3076, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24897295236587524, "incorrect_loss_raw": 1.6158483028411865, "correct_loss_per_char": 0.06224323809146881, "incorrect_loss_per_char": 0.5386161009470621, "correct_loss_per_token": 0.24897295236587524, "incorrect_loss_per_token": 1.6158483028411865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24897295236587524, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.24897295236587524, "logits_per_char": -0.06224323809146881, "num_chars": 4}, {"sum_logits": -1.6158483028411865, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6158483028411865, "logits_per_char": -0.5386161009470621, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 648, "native_id": 1614, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24130018055438995, "incorrect_loss_raw": 1.6446037292480469, "correct_loss_per_char": 0.06032504513859749, "incorrect_loss_per_char": 0.5482012430826823, "correct_loss_per_token": 0.24130018055438995, "incorrect_loss_per_token": 1.6446037292480469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24130018055438995, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.24130018055438995, "logits_per_char": -0.06032504513859749, "num_chars": 4}, {"sum_logits": -1.6446037292480469, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.6446037292480469, "logits_per_char": -0.5482012430826823, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 649, "native_id": 892, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2968026399612427, "incorrect_loss_raw": 1.4723752737045288, "correct_loss_per_char": 0.07420065999031067, "incorrect_loss_per_char": 0.4907917579015096, "correct_loss_per_token": 0.2968026399612427, "incorrect_loss_per_token": 1.4723752737045288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2968026399612427, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.2968026399612427, "logits_per_char": -0.07420065999031067, "num_chars": 4}, {"sum_logits": -1.4723752737045288, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.4723752737045288, "logits_per_char": -0.4907917579015096, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 650, "native_id": 823, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3726006746292114, "incorrect_loss_raw": 1.3415021896362305, "correct_loss_per_char": 0.09315016865730286, "incorrect_loss_per_char": 0.44716739654541016, "correct_loss_per_token": 0.3726006746292114, "incorrect_loss_per_token": 1.3415021896362305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3726006746292114, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.3726006746292114, "logits_per_char": -0.09315016865730286, "num_chars": 4}, {"sum_logits": -1.3415021896362305, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.3415021896362305, "logits_per_char": -0.44716739654541016, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 651, "native_id": 2295, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3056144416332245, "incorrect_loss_raw": 1.5758498907089233, "correct_loss_per_char": 0.07640361040830612, "incorrect_loss_per_char": 0.5252832969029745, "correct_loss_per_token": 0.3056144416332245, "incorrect_loss_per_token": 1.5758498907089233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3056144416332245, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.3056144416332245, "logits_per_char": -0.07640361040830612, "num_chars": 4}, {"sum_logits": -1.5758498907089233, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.5758498907089233, "logits_per_char": -0.5252832969029745, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 652, "native_id": 2139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24611341953277588, "incorrect_loss_raw": 1.7276791334152222, "correct_loss_per_char": 0.06152835488319397, "incorrect_loss_per_char": 0.5758930444717407, "correct_loss_per_token": 0.24611341953277588, "incorrect_loss_per_token": 1.7276791334152222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24611341953277588, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.24611341953277588, "logits_per_char": -0.06152835488319397, "num_chars": 4}, {"sum_logits": -1.7276791334152222, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.7276791334152222, "logits_per_char": -0.5758930444717407, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 653, "native_id": 598, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.347810834646225, "incorrect_loss_raw": 1.3772673606872559, "correct_loss_per_char": 0.08695270866155624, "incorrect_loss_per_char": 0.45908912022908527, "correct_loss_per_token": 0.347810834646225, "incorrect_loss_per_token": 1.3772673606872559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.347810834646225, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.347810834646225, "logits_per_char": -0.08695270866155624, "num_chars": 4}, {"sum_logits": -1.3772673606872559, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.3772673606872559, "logits_per_char": -0.45908912022908527, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 654, "native_id": 868, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5649399161338806, "incorrect_loss_raw": 0.9414826035499573, "correct_loss_per_char": 0.14123497903347015, "incorrect_loss_per_char": 0.3138275345166524, "correct_loss_per_token": 0.5649399161338806, "incorrect_loss_per_token": 0.9414826035499573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5649399161338806, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.5649399161338806, "logits_per_char": -0.14123497903347015, "num_chars": 4}, {"sum_logits": -0.9414826035499573, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -0.9414826035499573, "logits_per_char": -0.3138275345166524, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 655, "native_id": 1403, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3048694133758545, "incorrect_loss_raw": 0.3633756637573242, "correct_loss_per_char": 0.43495647112528485, "incorrect_loss_per_char": 0.09084391593933105, "correct_loss_per_token": 1.3048694133758545, "incorrect_loss_per_token": 0.3633756637573242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3633756637573242, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.3633756637573242, "logits_per_char": -0.09084391593933105, "num_chars": 4}, {"sum_logits": -1.3048694133758545, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.3048694133758545, "logits_per_char": -0.43495647112528485, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 656, "native_id": 2531, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.0999169796705246, "incorrect_loss_raw": 2.727100372314453, "correct_loss_per_char": 0.02497924491763115, "incorrect_loss_per_char": 0.909033457438151, "correct_loss_per_token": 0.0999169796705246, "incorrect_loss_per_token": 2.727100372314453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.0999169796705246, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.0999169796705246, "logits_per_char": -0.02497924491763115, "num_chars": 4}, {"sum_logits": -2.727100372314453, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -2.727100372314453, "logits_per_char": -0.909033457438151, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 657, "native_id": 1692, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3950639069080353, "incorrect_loss_raw": 1.2736122608184814, "correct_loss_per_char": 0.09876597672700882, "incorrect_loss_per_char": 0.42453742027282715, "correct_loss_per_token": 0.3950639069080353, "incorrect_loss_per_token": 1.2736122608184814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3950639069080353, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.3950639069080353, "logits_per_char": -0.09876597672700882, "num_chars": 4}, {"sum_logits": -1.2736122608184814, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.2736122608184814, "logits_per_char": -0.42453742027282715, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 658, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5491594672203064, "incorrect_loss_raw": 1.2860349416732788, "correct_loss_per_char": 0.1372898668050766, "incorrect_loss_per_char": 0.42867831389109295, "correct_loss_per_token": 0.5491594672203064, "incorrect_loss_per_token": 1.2860349416732788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5491594672203064, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.5491594672203064, "logits_per_char": -0.1372898668050766, "num_chars": 4}, {"sum_logits": -1.2860349416732788, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.2860349416732788, "logits_per_char": -0.42867831389109295, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 659, "native_id": 2660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5192643404006958, "incorrect_loss_raw": 0.3303541839122772, "correct_loss_per_char": 0.5064214468002319, "incorrect_loss_per_char": 0.0825885459780693, "correct_loss_per_token": 1.5192643404006958, "incorrect_loss_per_token": 0.3303541839122772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3303541839122772, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.3303541839122772, "logits_per_char": -0.0825885459780693, "num_chars": 4}, {"sum_logits": -1.5192643404006958, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.5192643404006958, "logits_per_char": -0.5064214468002319, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 660, "native_id": 3190, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.688713550567627, "incorrect_loss_raw": 0.2553987503051758, "correct_loss_per_char": 0.5629045168558756, "incorrect_loss_per_char": 0.06384968757629395, "correct_loss_per_token": 1.688713550567627, "incorrect_loss_per_token": 0.2553987503051758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2553987503051758, "num_tokens": 1, "num_tokens_all": 1457, "is_greedy": true, "logits_per_token": -0.2553987503051758, "logits_per_char": -0.06384968757629395, "num_chars": 4}, {"sum_logits": -1.688713550567627, "num_tokens": 1, "num_tokens_all": 1457, "is_greedy": false, "logits_per_token": -1.688713550567627, "logits_per_char": -0.5629045168558756, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 661, "native_id": 783, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7186269760131836, "incorrect_loss_raw": 0.27062687277793884, "correct_loss_per_char": 0.5728756586710612, "incorrect_loss_per_char": 0.06765671819448471, "correct_loss_per_token": 1.7186269760131836, "incorrect_loss_per_token": 0.27062687277793884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27062687277793884, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.27062687277793884, "logits_per_char": -0.06765671819448471, "num_chars": 4}, {"sum_logits": -1.7186269760131836, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.7186269760131836, "logits_per_char": -0.5728756586710612, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 662, "native_id": 916, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24876642227172852, "incorrect_loss_raw": 1.9441635608673096, "correct_loss_per_char": 0.06219160556793213, "incorrect_loss_per_char": 0.6480545202891032, "correct_loss_per_token": 0.24876642227172852, "incorrect_loss_per_token": 1.9441635608673096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24876642227172852, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.24876642227172852, "logits_per_char": -0.06219160556793213, "num_chars": 4}, {"sum_logits": -1.9441635608673096, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.9441635608673096, "logits_per_char": -0.6480545202891032, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 663, "native_id": 2266, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.45223456621170044, "incorrect_loss_raw": 1.113527774810791, "correct_loss_per_char": 0.11305864155292511, "incorrect_loss_per_char": 0.37117592493693036, "correct_loss_per_token": 0.45223456621170044, "incorrect_loss_per_token": 1.113527774810791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45223456621170044, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.45223456621170044, "logits_per_char": -0.11305864155292511, "num_chars": 4}, {"sum_logits": -1.113527774810791, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.113527774810791, "logits_per_char": -0.37117592493693036, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 664, "native_id": 67, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7137595415115356, "incorrect_loss_raw": 0.26177287101745605, "correct_loss_per_char": 0.5712531805038452, "incorrect_loss_per_char": 0.06544321775436401, "correct_loss_per_token": 1.7137595415115356, "incorrect_loss_per_token": 0.26177287101745605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26177287101745605, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.26177287101745605, "logits_per_char": -0.06544321775436401, "num_chars": 4}, {"sum_logits": -1.7137595415115356, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -1.7137595415115356, "logits_per_char": -0.5712531805038452, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 665, "native_id": 2848, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.548930823802948, "incorrect_loss_raw": 1.0138449668884277, "correct_loss_per_char": 0.137232705950737, "incorrect_loss_per_char": 0.3379483222961426, "correct_loss_per_token": 0.548930823802948, "incorrect_loss_per_token": 1.0138449668884277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.548930823802948, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": true, "logits_per_token": -0.548930823802948, "logits_per_char": -0.137232705950737, "num_chars": 4}, {"sum_logits": -1.0138449668884277, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -1.0138449668884277, "logits_per_char": -0.3379483222961426, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 666, "native_id": 1487, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26079079508781433, "incorrect_loss_raw": 1.6362897157669067, "correct_loss_per_char": 0.06519769877195358, "incorrect_loss_per_char": 0.5454299052556356, "correct_loss_per_token": 0.26079079508781433, "incorrect_loss_per_token": 1.6362897157669067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.26079079508781433, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.26079079508781433, "logits_per_char": -0.06519769877195358, "num_chars": 4}, {"sum_logits": -1.6362897157669067, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6362897157669067, "logits_per_char": -0.5454299052556356, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 667, "native_id": 1803, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15882940590381622, "incorrect_loss_raw": 2.142646074295044, "correct_loss_per_char": 0.039707351475954056, "incorrect_loss_per_char": 0.714215358098348, "correct_loss_per_token": 0.15882940590381622, "incorrect_loss_per_token": 2.142646074295044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15882940590381622, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.15882940590381622, "logits_per_char": -0.039707351475954056, "num_chars": 4}, {"sum_logits": -2.142646074295044, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -2.142646074295044, "logits_per_char": -0.714215358098348, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 668, "native_id": 968, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2647498846054077, "incorrect_loss_raw": 1.667344570159912, "correct_loss_per_char": 0.06618747115135193, "incorrect_loss_per_char": 0.5557815233866373, "correct_loss_per_token": 0.2647498846054077, "incorrect_loss_per_token": 1.667344570159912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2647498846054077, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2647498846054077, "logits_per_char": -0.06618747115135193, "num_chars": 4}, {"sum_logits": -1.667344570159912, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.667344570159912, "logits_per_char": -0.5557815233866373, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 669, "native_id": 45, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9753349423408508, "incorrect_loss_raw": 0.5420148372650146, "correct_loss_per_char": 0.2438337355852127, "incorrect_loss_per_char": 0.18067161242167154, "correct_loss_per_token": 0.9753349423408508, "incorrect_loss_per_token": 0.5420148372650146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9753349423408508, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.9753349423408508, "logits_per_char": -0.2438337355852127, "num_chars": 4}, {"sum_logits": -0.5420148372650146, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5420148372650146, "logits_per_char": -0.18067161242167154, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 670, "native_id": 1697, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0171263217926025, "incorrect_loss_raw": 0.5387485027313232, "correct_loss_per_char": 0.33904210726420086, "incorrect_loss_per_char": 0.1346871256828308, "correct_loss_per_token": 1.0171263217926025, "incorrect_loss_per_token": 0.5387485027313232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5387485027313232, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5387485027313232, "logits_per_char": -0.1346871256828308, "num_chars": 4}, {"sum_logits": -1.0171263217926025, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.0171263217926025, "logits_per_char": -0.33904210726420086, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 671, "native_id": 1729, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34788525104522705, "incorrect_loss_raw": 1.4317666292190552, "correct_loss_per_char": 0.08697131276130676, "incorrect_loss_per_char": 0.4772555430730184, "correct_loss_per_token": 0.34788525104522705, "incorrect_loss_per_token": 1.4317666292190552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34788525104522705, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.34788525104522705, "logits_per_char": -0.08697131276130676, "num_chars": 4}, {"sum_logits": -1.4317666292190552, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -1.4317666292190552, "logits_per_char": -0.4772555430730184, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 672, "native_id": 2034, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47997134923934937, "incorrect_loss_raw": 1.1618859767913818, "correct_loss_per_char": 0.11999283730983734, "incorrect_loss_per_char": 0.38729532559712726, "correct_loss_per_token": 0.47997134923934937, "incorrect_loss_per_token": 1.1618859767913818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47997134923934937, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.47997134923934937, "logits_per_char": -0.11999283730983734, "num_chars": 4}, {"sum_logits": -1.1618859767913818, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.1618859767913818, "logits_per_char": -0.38729532559712726, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 673, "native_id": 1727, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29346421360969543, "incorrect_loss_raw": 1.6360951662063599, "correct_loss_per_char": 0.07336605340242386, "incorrect_loss_per_char": 0.54536505540212, "correct_loss_per_token": 0.29346421360969543, "incorrect_loss_per_token": 1.6360951662063599, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29346421360969543, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.29346421360969543, "logits_per_char": -0.07336605340242386, "num_chars": 4}, {"sum_logits": -1.6360951662063599, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.6360951662063599, "logits_per_char": -0.54536505540212, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 674, "native_id": 2981, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2062431573867798, "incorrect_loss_raw": 0.48776739835739136, "correct_loss_per_char": 0.30156078934669495, "incorrect_loss_per_char": 0.16258913278579712, "correct_loss_per_token": 1.2062431573867798, "incorrect_loss_per_token": 0.48776739835739136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2062431573867798, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.2062431573867798, "logits_per_char": -0.30156078934669495, "num_chars": 4}, {"sum_logits": -0.48776739835739136, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.48776739835739136, "logits_per_char": -0.16258913278579712, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 675, "native_id": 3164, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7096404433250427, "incorrect_loss_raw": 0.7626479864120483, "correct_loss_per_char": 0.17741011083126068, "incorrect_loss_per_char": 0.2542159954706828, "correct_loss_per_token": 0.7096404433250427, "incorrect_loss_per_token": 0.7626479864120483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7096404433250427, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.7096404433250427, "logits_per_char": -0.17741011083126068, "num_chars": 4}, {"sum_logits": -0.7626479864120483, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -0.7626479864120483, "logits_per_char": -0.2542159954706828, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 676, "native_id": 2610, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19231823086738586, "incorrect_loss_raw": 1.88265860080719, "correct_loss_per_char": 0.048079557716846466, "incorrect_loss_per_char": 0.62755286693573, "correct_loss_per_token": 0.19231823086738586, "incorrect_loss_per_token": 1.88265860080719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19231823086738586, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.19231823086738586, "logits_per_char": -0.048079557716846466, "num_chars": 4}, {"sum_logits": -1.88265860080719, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.88265860080719, "logits_per_char": -0.62755286693573, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 677, "native_id": 1021, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19390271604061127, "incorrect_loss_raw": 1.8233745098114014, "correct_loss_per_char": 0.04847567901015282, "incorrect_loss_per_char": 0.6077915032704672, "correct_loss_per_token": 0.19390271604061127, "incorrect_loss_per_token": 1.8233745098114014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19390271604061127, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.19390271604061127, "logits_per_char": -0.04847567901015282, "num_chars": 4}, {"sum_logits": -1.8233745098114014, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.8233745098114014, "logits_per_char": -0.6077915032704672, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 678, "native_id": 2403, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6512230038642883, "incorrect_loss_raw": 0.8956917524337769, "correct_loss_per_char": 0.16280575096607208, "incorrect_loss_per_char": 0.2985639174779256, "correct_loss_per_token": 0.6512230038642883, "incorrect_loss_per_token": 0.8956917524337769, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6512230038642883, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.6512230038642883, "logits_per_char": -0.16280575096607208, "num_chars": 4}, {"sum_logits": -0.8956917524337769, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -0.8956917524337769, "logits_per_char": -0.2985639174779256, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 679, "native_id": 3216, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482454538345337, "incorrect_loss_raw": 0.3003498911857605, "correct_loss_per_char": 0.494151512781779, "incorrect_loss_per_char": 0.07508747279644012, "correct_loss_per_token": 1.482454538345337, "incorrect_loss_per_token": 0.3003498911857605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3003498911857605, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.3003498911857605, "logits_per_char": -0.07508747279644012, "num_chars": 4}, {"sum_logits": -1.482454538345337, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.482454538345337, "logits_per_char": -0.494151512781779, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 680, "native_id": 2308, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7185626029968262, "incorrect_loss_raw": 0.23980532586574554, "correct_loss_per_char": 0.572854200998942, "incorrect_loss_per_char": 0.059951331466436386, "correct_loss_per_token": 1.7185626029968262, "incorrect_loss_per_token": 0.23980532586574554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23980532586574554, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.23980532586574554, "logits_per_char": -0.059951331466436386, "num_chars": 4}, {"sum_logits": -1.7185626029968262, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.7185626029968262, "logits_per_char": -0.572854200998942, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 681, "native_id": 1985, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2421655654907227, "incorrect_loss_raw": 0.40553751587867737, "correct_loss_per_char": 0.41405518849690753, "incorrect_loss_per_char": 0.10138437896966934, "correct_loss_per_token": 1.2421655654907227, "incorrect_loss_per_token": 0.40553751587867737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40553751587867737, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.40553751587867737, "logits_per_char": -0.10138437896966934, "num_chars": 4}, {"sum_logits": -1.2421655654907227, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.2421655654907227, "logits_per_char": -0.41405518849690753, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 682, "native_id": 3114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2768107056617737, "incorrect_loss_raw": 1.5850660800933838, "correct_loss_per_char": 0.06920267641544342, "incorrect_loss_per_char": 0.5283553600311279, "correct_loss_per_token": 0.2768107056617737, "incorrect_loss_per_token": 1.5850660800933838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2768107056617737, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.2768107056617737, "logits_per_char": -0.06920267641544342, "num_chars": 4}, {"sum_logits": -1.5850660800933838, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.5850660800933838, "logits_per_char": -0.5283553600311279, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 683, "native_id": 1920, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21319393813610077, "incorrect_loss_raw": 1.7914729118347168, "correct_loss_per_char": 0.05329848453402519, "incorrect_loss_per_char": 0.5971576372782389, "correct_loss_per_token": 0.21319393813610077, "incorrect_loss_per_token": 1.7914729118347168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21319393813610077, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.21319393813610077, "logits_per_char": -0.05329848453402519, "num_chars": 4}, {"sum_logits": -1.7914729118347168, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.7914729118347168, "logits_per_char": -0.5971576372782389, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 684, "native_id": 2419, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39272797107696533, "incorrect_loss_raw": 1.3776768445968628, "correct_loss_per_char": 0.09818199276924133, "incorrect_loss_per_char": 0.4592256148656209, "correct_loss_per_token": 0.39272797107696533, "incorrect_loss_per_token": 1.3776768445968628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39272797107696533, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.39272797107696533, "logits_per_char": -0.09818199276924133, "num_chars": 4}, {"sum_logits": -1.3776768445968628, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.3776768445968628, "logits_per_char": -0.4592256148656209, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 685, "native_id": 69, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33383217453956604, "incorrect_loss_raw": 1.4162601232528687, "correct_loss_per_char": 0.08345804363489151, "incorrect_loss_per_char": 0.47208670775095624, "correct_loss_per_token": 0.33383217453956604, "incorrect_loss_per_token": 1.4162601232528687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33383217453956604, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.33383217453956604, "logits_per_char": -0.08345804363489151, "num_chars": 4}, {"sum_logits": -1.4162601232528687, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.4162601232528687, "logits_per_char": -0.47208670775095624, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 686, "native_id": 82, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7815189957618713, "incorrect_loss_raw": 0.6873894929885864, "correct_loss_per_char": 0.2605063319206238, "incorrect_loss_per_char": 0.1718473732471466, "correct_loss_per_token": 0.7815189957618713, "incorrect_loss_per_token": 0.6873894929885864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6873894929885864, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.6873894929885864, "logits_per_char": -0.1718473732471466, "num_chars": 4}, {"sum_logits": -0.7815189957618713, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -0.7815189957618713, "logits_per_char": -0.2605063319206238, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 687, "native_id": 1196, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.11612502485513687, "incorrect_loss_raw": 2.5527310371398926, "correct_loss_per_char": 0.029031256213784218, "incorrect_loss_per_char": 0.8509103457132975, "correct_loss_per_token": 0.11612502485513687, "incorrect_loss_per_token": 2.5527310371398926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.11612502485513687, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.11612502485513687, "logits_per_char": -0.029031256213784218, "num_chars": 4}, {"sum_logits": -2.5527310371398926, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -2.5527310371398926, "logits_per_char": -0.8509103457132975, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 688, "native_id": 2321, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9820719957351685, "incorrect_loss_raw": 0.5445979833602905, "correct_loss_per_char": 0.32735733191172284, "incorrect_loss_per_char": 0.13614949584007263, "correct_loss_per_token": 0.9820719957351685, "incorrect_loss_per_token": 0.5445979833602905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5445979833602905, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": true, "logits_per_token": -0.5445979833602905, "logits_per_char": -0.13614949584007263, "num_chars": 4}, {"sum_logits": -0.9820719957351685, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": false, "logits_per_token": -0.9820719957351685, "logits_per_char": -0.32735733191172284, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 689, "native_id": 505, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.10857436805963516, "incorrect_loss_raw": 2.4026031494140625, "correct_loss_per_char": 0.02714359201490879, "incorrect_loss_per_char": 0.8008677164713541, "correct_loss_per_token": 0.10857436805963516, "incorrect_loss_per_token": 2.4026031494140625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.10857436805963516, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": true, "logits_per_token": -0.10857436805963516, "logits_per_char": -0.02714359201490879, "num_chars": 4}, {"sum_logits": -2.4026031494140625, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -2.4026031494140625, "logits_per_char": -0.8008677164713541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 690, "native_id": 1852, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5616682767868042, "incorrect_loss_raw": 0.9194369316101074, "correct_loss_per_char": 0.14041706919670105, "incorrect_loss_per_char": 0.30647897720336914, "correct_loss_per_token": 0.5616682767868042, "incorrect_loss_per_token": 0.9194369316101074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5616682767868042, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5616682767868042, "logits_per_char": -0.14041706919670105, "num_chars": 4}, {"sum_logits": -0.9194369316101074, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.9194369316101074, "logits_per_char": -0.30647897720336914, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 691, "native_id": 2342, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4306777715682983, "incorrect_loss_raw": 0.2976550757884979, "correct_loss_per_char": 0.4768925905227661, "incorrect_loss_per_char": 0.07441376894712448, "correct_loss_per_token": 1.4306777715682983, "incorrect_loss_per_token": 0.2976550757884979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2976550757884979, "num_tokens": 1, "num_tokens_all": 1194, "is_greedy": true, "logits_per_token": -0.2976550757884979, "logits_per_char": -0.07441376894712448, "num_chars": 4}, {"sum_logits": -1.4306777715682983, "num_tokens": 1, "num_tokens_all": 1194, "is_greedy": false, "logits_per_token": -1.4306777715682983, "logits_per_char": -0.4768925905227661, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 692, "native_id": 1003, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47845450043678284, "incorrect_loss_raw": 1.0846915245056152, "correct_loss_per_char": 0.11961362510919571, "incorrect_loss_per_char": 0.36156384150187176, "correct_loss_per_token": 0.47845450043678284, "incorrect_loss_per_token": 1.0846915245056152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47845450043678284, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.47845450043678284, "logits_per_char": -0.11961362510919571, "num_chars": 4}, {"sum_logits": -1.0846915245056152, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.0846915245056152, "logits_per_char": -0.36156384150187176, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 693, "native_id": 3124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2207442820072174, "incorrect_loss_raw": 1.7132748365402222, "correct_loss_per_char": 0.05518607050180435, "incorrect_loss_per_char": 0.5710916121800741, "correct_loss_per_token": 0.2207442820072174, "incorrect_loss_per_token": 1.7132748365402222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2207442820072174, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.2207442820072174, "logits_per_char": -0.05518607050180435, "num_chars": 4}, {"sum_logits": -1.7132748365402222, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.7132748365402222, "logits_per_char": -0.5710916121800741, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 694, "native_id": 1716, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.536872386932373, "incorrect_loss_raw": 1.0032849311828613, "correct_loss_per_char": 0.13421809673309326, "incorrect_loss_per_char": 0.3344283103942871, "correct_loss_per_token": 0.536872386932373, "incorrect_loss_per_token": 1.0032849311828613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.536872386932373, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.536872386932373, "logits_per_char": -0.13421809673309326, "num_chars": 4}, {"sum_logits": -1.0032849311828613, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.0032849311828613, "logits_per_char": -0.3344283103942871, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 695, "native_id": 857, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3522435128688812, "incorrect_loss_raw": 1.4182895421981812, "correct_loss_per_char": 0.0880608782172203, "incorrect_loss_per_char": 0.47276318073272705, "correct_loss_per_token": 0.3522435128688812, "incorrect_loss_per_token": 1.4182895421981812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3522435128688812, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.3522435128688812, "logits_per_char": -0.0880608782172203, "num_chars": 4}, {"sum_logits": -1.4182895421981812, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.4182895421981812, "logits_per_char": -0.47276318073272705, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 696, "native_id": 172, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6545125246047974, "incorrect_loss_raw": 0.8963625431060791, "correct_loss_per_char": 0.16362813115119934, "incorrect_loss_per_char": 0.29878751436869305, "correct_loss_per_token": 0.6545125246047974, "incorrect_loss_per_token": 0.8963625431060791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6545125246047974, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.6545125246047974, "logits_per_char": -0.16362813115119934, "num_chars": 4}, {"sum_logits": -0.8963625431060791, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -0.8963625431060791, "logits_per_char": -0.29878751436869305, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 697, "native_id": 1766, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.262372761964798, "incorrect_loss_raw": 1.6428920030593872, "correct_loss_per_char": 0.0655931904911995, "incorrect_loss_per_char": 0.5476306676864624, "correct_loss_per_token": 0.262372761964798, "incorrect_loss_per_token": 1.6428920030593872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.262372761964798, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.262372761964798, "logits_per_char": -0.0655931904911995, "num_chars": 4}, {"sum_logits": -1.6428920030593872, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.6428920030593872, "logits_per_char": -0.5476306676864624, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 698, "native_id": 2697, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15049812197685242, "incorrect_loss_raw": 2.379838466644287, "correct_loss_per_char": 0.037624530494213104, "incorrect_loss_per_char": 0.7932794888814291, "correct_loss_per_token": 0.15049812197685242, "incorrect_loss_per_token": 2.379838466644287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15049812197685242, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.15049812197685242, "logits_per_char": -0.037624530494213104, "num_chars": 4}, {"sum_logits": -2.379838466644287, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -2.379838466644287, "logits_per_char": -0.7932794888814291, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 699, "native_id": 456, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4833972454071045, "incorrect_loss_raw": 0.2903473377227783, "correct_loss_per_char": 0.49446574846903485, "incorrect_loss_per_char": 0.07258683443069458, "correct_loss_per_token": 1.4833972454071045, "incorrect_loss_per_token": 0.2903473377227783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2903473377227783, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.2903473377227783, "logits_per_char": -0.07258683443069458, "num_chars": 4}, {"sum_logits": -1.4833972454071045, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.4833972454071045, "logits_per_char": -0.49446574846903485, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 700, "native_id": 1690, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.79586261510849, "incorrect_loss_raw": 0.6832188963890076, "correct_loss_per_char": 0.26528753836949664, "incorrect_loss_per_char": 0.1708047240972519, "correct_loss_per_token": 0.79586261510849, "incorrect_loss_per_token": 0.6832188963890076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6832188963890076, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.6832188963890076, "logits_per_char": -0.1708047240972519, "num_chars": 4}, {"sum_logits": -0.79586261510849, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -0.79586261510849, "logits_per_char": -0.26528753836949664, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 701, "native_id": 729, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4258030652999878, "incorrect_loss_raw": 1.139783501625061, "correct_loss_per_char": 0.10645076632499695, "incorrect_loss_per_char": 0.3799278338750203, "correct_loss_per_token": 0.4258030652999878, "incorrect_loss_per_token": 1.139783501625061, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4258030652999878, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.4258030652999878, "logits_per_char": -0.10645076632499695, "num_chars": 4}, {"sum_logits": -1.139783501625061, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.139783501625061, "logits_per_char": -0.3799278338750203, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 702, "native_id": 2794, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15182049572467804, "incorrect_loss_raw": 2.1779391765594482, "correct_loss_per_char": 0.03795512393116951, "incorrect_loss_per_char": 0.725979725519816, "correct_loss_per_token": 0.15182049572467804, "incorrect_loss_per_token": 2.1779391765594482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15182049572467804, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.15182049572467804, "logits_per_char": -0.03795512393116951, "num_chars": 4}, {"sum_logits": -2.1779391765594482, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -2.1779391765594482, "logits_per_char": -0.725979725519816, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 703, "native_id": 2711, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2234289646148682, "incorrect_loss_raw": 0.4105914533138275, "correct_loss_per_char": 0.40780965487162274, "incorrect_loss_per_char": 0.10264786332845688, "correct_loss_per_token": 1.2234289646148682, "incorrect_loss_per_token": 0.4105914533138275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4105914533138275, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.4105914533138275, "logits_per_char": -0.10264786332845688, "num_chars": 4}, {"sum_logits": -1.2234289646148682, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -1.2234289646148682, "logits_per_char": -0.40780965487162274, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 704, "native_id": 2967, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29454490542411804, "incorrect_loss_raw": 1.4785152673721313, "correct_loss_per_char": 0.07363622635602951, "incorrect_loss_per_char": 0.49283842245737713, "correct_loss_per_token": 0.29454490542411804, "incorrect_loss_per_token": 1.4785152673721313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29454490542411804, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.29454490542411804, "logits_per_char": -0.07363622635602951, "num_chars": 4}, {"sum_logits": -1.4785152673721313, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.4785152673721313, "logits_per_char": -0.49283842245737713, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 705, "native_id": 1509, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37333282828330994, "incorrect_loss_raw": 1.3696486949920654, "correct_loss_per_char": 0.09333320707082748, "incorrect_loss_per_char": 0.45654956499735516, "correct_loss_per_token": 0.37333282828330994, "incorrect_loss_per_token": 1.3696486949920654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37333282828330994, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.37333282828330994, "logits_per_char": -0.09333320707082748, "num_chars": 4}, {"sum_logits": -1.3696486949920654, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.3696486949920654, "logits_per_char": -0.45654956499735516, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 706, "native_id": 698, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7270066142082214, "incorrect_loss_raw": 0.7610803842544556, "correct_loss_per_char": 0.24233553806940714, "incorrect_loss_per_char": 0.1902700960636139, "correct_loss_per_token": 0.7270066142082214, "incorrect_loss_per_token": 0.7610803842544556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7610803842544556, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -0.7610803842544556, "logits_per_char": -0.1902700960636139, "num_chars": 4}, {"sum_logits": -0.7270066142082214, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.7270066142082214, "logits_per_char": -0.24233553806940714, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 707, "native_id": 2917, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6117203235626221, "incorrect_loss_raw": 0.8722735643386841, "correct_loss_per_char": 0.15293008089065552, "incorrect_loss_per_char": 0.29075785477956134, "correct_loss_per_token": 0.6117203235626221, "incorrect_loss_per_token": 0.8722735643386841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6117203235626221, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.6117203235626221, "logits_per_char": -0.15293008089065552, "num_chars": 4}, {"sum_logits": -0.8722735643386841, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -0.8722735643386841, "logits_per_char": -0.29075785477956134, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 708, "native_id": 259, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0350306034088135, "incorrect_loss_raw": 0.5020009875297546, "correct_loss_per_char": 0.3450102011362712, "incorrect_loss_per_char": 0.12550024688243866, "correct_loss_per_token": 1.0350306034088135, "incorrect_loss_per_token": 0.5020009875297546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5020009875297546, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.5020009875297546, "logits_per_char": -0.12550024688243866, "num_chars": 4}, {"sum_logits": -1.0350306034088135, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -1.0350306034088135, "logits_per_char": -0.3450102011362712, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 709, "native_id": 2099, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.453116238117218, "incorrect_loss_raw": 1.2162983417510986, "correct_loss_per_char": 0.1132790595293045, "incorrect_loss_per_char": 0.4054327805836995, "correct_loss_per_token": 0.453116238117218, "incorrect_loss_per_token": 1.2162983417510986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.453116238117218, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": true, "logits_per_token": -0.453116238117218, "logits_per_char": -0.1132790595293045, "num_chars": 4}, {"sum_logits": -1.2162983417510986, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": false, "logits_per_token": -1.2162983417510986, "logits_per_char": -0.4054327805836995, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 710, "native_id": 1556, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.805103063583374, "incorrect_loss_raw": 0.6526235342025757, "correct_loss_per_char": 0.2683676878611247, "incorrect_loss_per_char": 0.16315588355064392, "correct_loss_per_token": 0.805103063583374, "incorrect_loss_per_token": 0.6526235342025757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6526235342025757, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.6526235342025757, "logits_per_char": -0.16315588355064392, "num_chars": 4}, {"sum_logits": -0.805103063583374, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -0.805103063583374, "logits_per_char": -0.2683676878611247, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 711, "native_id": 135, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22384712100028992, "incorrect_loss_raw": 1.6948260068893433, "correct_loss_per_char": 0.05596178025007248, "incorrect_loss_per_char": 0.5649420022964478, "correct_loss_per_token": 0.22384712100028992, "incorrect_loss_per_token": 1.6948260068893433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22384712100028992, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.22384712100028992, "logits_per_char": -0.05596178025007248, "num_chars": 4}, {"sum_logits": -1.6948260068893433, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.6948260068893433, "logits_per_char": -0.5649420022964478, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 712, "native_id": 2775, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2586241662502289, "incorrect_loss_raw": 1.6262516975402832, "correct_loss_per_char": 0.06465604156255722, "incorrect_loss_per_char": 0.5420838991800944, "correct_loss_per_token": 0.2586241662502289, "incorrect_loss_per_token": 1.6262516975402832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2586241662502289, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.2586241662502289, "logits_per_char": -0.06465604156255722, "num_chars": 4}, {"sum_logits": -1.6262516975402832, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.6262516975402832, "logits_per_char": -0.5420838991800944, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 713, "native_id": 1098, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5028746724128723, "incorrect_loss_raw": 1.0173717737197876, "correct_loss_per_char": 0.12571866810321808, "incorrect_loss_per_char": 0.3391239245732625, "correct_loss_per_token": 0.5028746724128723, "incorrect_loss_per_token": 1.0173717737197876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5028746724128723, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5028746724128723, "logits_per_char": -0.12571866810321808, "num_chars": 4}, {"sum_logits": -1.0173717737197876, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0173717737197876, "logits_per_char": -0.3391239245732625, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 714, "native_id": 2993, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.757256031036377, "incorrect_loss_raw": 0.2175338864326477, "correct_loss_per_char": 0.585752010345459, "incorrect_loss_per_char": 0.054383471608161926, "correct_loss_per_token": 1.757256031036377, "incorrect_loss_per_token": 0.2175338864326477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2175338864326477, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.2175338864326477, "logits_per_char": -0.054383471608161926, "num_chars": 4}, {"sum_logits": -1.757256031036377, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.757256031036377, "logits_per_char": -0.585752010345459, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 715, "native_id": 117, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5066737532615662, "incorrect_loss_raw": 0.9865170121192932, "correct_loss_per_char": 0.12666843831539154, "incorrect_loss_per_char": 0.3288390040397644, "correct_loss_per_token": 0.5066737532615662, "incorrect_loss_per_token": 0.9865170121192932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5066737532615662, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.5066737532615662, "logits_per_char": -0.12666843831539154, "num_chars": 4}, {"sum_logits": -0.9865170121192932, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -0.9865170121192932, "logits_per_char": -0.3288390040397644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 716, "native_id": 1413, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.13054774701595306, "incorrect_loss_raw": 2.3432507514953613, "correct_loss_per_char": 0.032636936753988266, "incorrect_loss_per_char": 0.7810835838317871, "correct_loss_per_token": 0.13054774701595306, "incorrect_loss_per_token": 2.3432507514953613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13054774701595306, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.13054774701595306, "logits_per_char": -0.032636936753988266, "num_chars": 4}, {"sum_logits": -2.3432507514953613, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -2.3432507514953613, "logits_per_char": -0.7810835838317871, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 717, "native_id": 2082, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30713069438934326, "incorrect_loss_raw": 1.5074715614318848, "correct_loss_per_char": 0.07678267359733582, "incorrect_loss_per_char": 0.5024905204772949, "correct_loss_per_token": 0.30713069438934326, "incorrect_loss_per_token": 1.5074715614318848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30713069438934326, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": true, "logits_per_token": -0.30713069438934326, "logits_per_char": -0.07678267359733582, "num_chars": 4}, {"sum_logits": -1.5074715614318848, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": false, "logits_per_token": -1.5074715614318848, "logits_per_char": -0.5024905204772949, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 718, "native_id": 245, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.45131033658981323, "incorrect_loss_raw": 1.1365516185760498, "correct_loss_per_char": 0.11282758414745331, "incorrect_loss_per_char": 0.3788505395253499, "correct_loss_per_token": 0.45131033658981323, "incorrect_loss_per_token": 1.1365516185760498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45131033658981323, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.45131033658981323, "logits_per_char": -0.11282758414745331, "num_chars": 4}, {"sum_logits": -1.1365516185760498, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.1365516185760498, "logits_per_char": -0.3788505395253499, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 719, "native_id": 1125, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9487113952636719, "incorrect_loss_raw": 0.21615834534168243, "correct_loss_per_char": 0.6495704650878906, "incorrect_loss_per_char": 0.05403958633542061, "correct_loss_per_token": 1.9487113952636719, "incorrect_loss_per_token": 0.21615834534168243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21615834534168243, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.21615834534168243, "logits_per_char": -0.05403958633542061, "num_chars": 4}, {"sum_logits": -1.9487113952636719, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.9487113952636719, "logits_per_char": -0.6495704650878906, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 720, "native_id": 2120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7420360445976257, "incorrect_loss_raw": 0.7900900840759277, "correct_loss_per_char": 0.2473453481992086, "incorrect_loss_per_char": 0.19752252101898193, "correct_loss_per_token": 0.7420360445976257, "incorrect_loss_per_token": 0.7900900840759277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7900900840759277, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -0.7900900840759277, "logits_per_char": -0.19752252101898193, "num_chars": 4}, {"sum_logits": -0.7420360445976257, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.7420360445976257, "logits_per_char": -0.2473453481992086, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 721, "native_id": 2604, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46848273277282715, "incorrect_loss_raw": 1.129425287246704, "correct_loss_per_char": 0.11712068319320679, "incorrect_loss_per_char": 0.37647509574890137, "correct_loss_per_token": 0.46848273277282715, "incorrect_loss_per_token": 1.129425287246704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46848273277282715, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.46848273277282715, "logits_per_char": -0.11712068319320679, "num_chars": 4}, {"sum_logits": -1.129425287246704, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.129425287246704, "logits_per_char": -0.37647509574890137, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 722, "native_id": 2940, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22572962939739227, "incorrect_loss_raw": 1.9094223976135254, "correct_loss_per_char": 0.05643240734934807, "incorrect_loss_per_char": 0.6364741325378418, "correct_loss_per_token": 0.22572962939739227, "incorrect_loss_per_token": 1.9094223976135254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22572962939739227, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.22572962939739227, "logits_per_char": -0.05643240734934807, "num_chars": 4}, {"sum_logits": -1.9094223976135254, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.9094223976135254, "logits_per_char": -0.6364741325378418, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 723, "native_id": 1685, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3488365411758423, "incorrect_loss_raw": 0.3607562482357025, "correct_loss_per_char": 0.44961218039194745, "incorrect_loss_per_char": 0.09018906205892563, "correct_loss_per_token": 1.3488365411758423, "incorrect_loss_per_token": 0.3607562482357025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3607562482357025, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.3607562482357025, "logits_per_char": -0.09018906205892563, "num_chars": 4}, {"sum_logits": -1.3488365411758423, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.3488365411758423, "logits_per_char": -0.44961218039194745, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 724, "native_id": 1971, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.015254259109497, "incorrect_loss_raw": 0.5291602611541748, "correct_loss_per_char": 0.33841808636983234, "incorrect_loss_per_char": 0.1322900652885437, "correct_loss_per_token": 1.015254259109497, "incorrect_loss_per_token": 0.5291602611541748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5291602611541748, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.5291602611541748, "logits_per_char": -0.1322900652885437, "num_chars": 4}, {"sum_logits": -1.015254259109497, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.015254259109497, "logits_per_char": -0.33841808636983234, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 725, "native_id": 124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2900872826576233, "incorrect_loss_raw": 1.8418467044830322, "correct_loss_per_char": 0.07252182066440582, "incorrect_loss_per_char": 0.6139489014943441, "correct_loss_per_token": 0.2900872826576233, "incorrect_loss_per_token": 1.8418467044830322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2900872826576233, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.2900872826576233, "logits_per_char": -0.07252182066440582, "num_chars": 4}, {"sum_logits": -1.8418467044830322, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.8418467044830322, "logits_per_char": -0.6139489014943441, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 726, "native_id": 2830, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2893838584423065, "incorrect_loss_raw": 1.5541919469833374, "correct_loss_per_char": 0.07234596461057663, "incorrect_loss_per_char": 0.5180639823277792, "correct_loss_per_token": 0.2893838584423065, "incorrect_loss_per_token": 1.5541919469833374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2893838584423065, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.2893838584423065, "logits_per_char": -0.07234596461057663, "num_chars": 4}, {"sum_logits": -1.5541919469833374, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.5541919469833374, "logits_per_char": -0.5180639823277792, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 727, "native_id": 747, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.873378336429596, "incorrect_loss_raw": 0.6972050666809082, "correct_loss_per_char": 0.29112611214319867, "incorrect_loss_per_char": 0.17430126667022705, "correct_loss_per_token": 0.873378336429596, "incorrect_loss_per_token": 0.6972050666809082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6972050666809082, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.6972050666809082, "logits_per_char": -0.17430126667022705, "num_chars": 4}, {"sum_logits": -0.873378336429596, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.873378336429596, "logits_per_char": -0.29112611214319867, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 728, "native_id": 944, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18865258991718292, "incorrect_loss_raw": 1.8626497983932495, "correct_loss_per_char": 0.04716314747929573, "incorrect_loss_per_char": 0.6208832661310831, "correct_loss_per_token": 0.18865258991718292, "incorrect_loss_per_token": 1.8626497983932495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18865258991718292, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.18865258991718292, "logits_per_char": -0.04716314747929573, "num_chars": 4}, {"sum_logits": -1.8626497983932495, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.8626497983932495, "logits_per_char": -0.6208832661310831, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 729, "native_id": 2006, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17876233160495758, "incorrect_loss_raw": 1.9078130722045898, "correct_loss_per_char": 0.044690582901239395, "incorrect_loss_per_char": 0.6359376907348633, "correct_loss_per_token": 0.17876233160495758, "incorrect_loss_per_token": 1.9078130722045898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17876233160495758, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.17876233160495758, "logits_per_char": -0.044690582901239395, "num_chars": 4}, {"sum_logits": -1.9078130722045898, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -1.9078130722045898, "logits_per_char": -0.6359376907348633, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 730, "native_id": 2359, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7933979034423828, "incorrect_loss_raw": 0.2264985740184784, "correct_loss_per_char": 0.5977993011474609, "incorrect_loss_per_char": 0.0566246435046196, "correct_loss_per_token": 1.7933979034423828, "incorrect_loss_per_token": 0.2264985740184784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2264985740184784, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.2264985740184784, "logits_per_char": -0.0566246435046196, "num_chars": 4}, {"sum_logits": -1.7933979034423828, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.7933979034423828, "logits_per_char": -0.5977993011474609, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 731, "native_id": 253, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5549547076225281, "incorrect_loss_raw": 0.9714612364768982, "correct_loss_per_char": 0.13873867690563202, "incorrect_loss_per_char": 0.32382041215896606, "correct_loss_per_token": 0.5549547076225281, "incorrect_loss_per_token": 0.9714612364768982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5549547076225281, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.5549547076225281, "logits_per_char": -0.13873867690563202, "num_chars": 4}, {"sum_logits": -0.9714612364768982, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -0.9714612364768982, "logits_per_char": -0.32382041215896606, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 732, "native_id": 1206, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43439754843711853, "incorrect_loss_raw": 1.2456929683685303, "correct_loss_per_char": 0.10859938710927963, "incorrect_loss_per_char": 0.41523098945617676, "correct_loss_per_token": 0.43439754843711853, "incorrect_loss_per_token": 1.2456929683685303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43439754843711853, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.43439754843711853, "logits_per_char": -0.10859938710927963, "num_chars": 4}, {"sum_logits": -1.2456929683685303, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -1.2456929683685303, "logits_per_char": -0.41523098945617676, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 733, "native_id": 2904, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6042996644973755, "incorrect_loss_raw": 0.30677875876426697, "correct_loss_per_char": 0.5347665548324585, "incorrect_loss_per_char": 0.07669468969106674, "correct_loss_per_token": 1.6042996644973755, "incorrect_loss_per_token": 0.30677875876426697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30677875876426697, "num_tokens": 1, "num_tokens_all": 1356, "is_greedy": true, "logits_per_token": -0.30677875876426697, "logits_per_char": -0.07669468969106674, "num_chars": 4}, {"sum_logits": -1.6042996644973755, "num_tokens": 1, "num_tokens_all": 1356, "is_greedy": false, "logits_per_token": -1.6042996644973755, "logits_per_char": -0.5347665548324585, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 734, "native_id": 1825, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5098681449890137, "incorrect_loss_raw": 1.1614584922790527, "correct_loss_per_char": 0.12746703624725342, "incorrect_loss_per_char": 0.38715283075968426, "correct_loss_per_token": 0.5098681449890137, "incorrect_loss_per_token": 1.1614584922790527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5098681449890137, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.5098681449890137, "logits_per_char": -0.12746703624725342, "num_chars": 4}, {"sum_logits": -1.1614584922790527, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.1614584922790527, "logits_per_char": -0.38715283075968426, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 735, "native_id": 1879, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2688870429992676, "incorrect_loss_raw": 1.6416561603546143, "correct_loss_per_char": 0.0672217607498169, "incorrect_loss_per_char": 0.5472187201182047, "correct_loss_per_token": 0.2688870429992676, "incorrect_loss_per_token": 1.6416561603546143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2688870429992676, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.2688870429992676, "logits_per_char": -0.0672217607498169, "num_chars": 4}, {"sum_logits": -1.6416561603546143, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.6416561603546143, "logits_per_char": -0.5472187201182047, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 736, "native_id": 717, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3707258999347687, "incorrect_loss_raw": 1.3445237874984741, "correct_loss_per_char": 0.09268147498369217, "incorrect_loss_per_char": 0.4481745958328247, "correct_loss_per_token": 0.3707258999347687, "incorrect_loss_per_token": 1.3445237874984741, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3707258999347687, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.3707258999347687, "logits_per_char": -0.09268147498369217, "num_chars": 4}, {"sum_logits": -1.3445237874984741, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -1.3445237874984741, "logits_per_char": -0.4481745958328247, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 737, "native_id": 1078, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7605240941047668, "incorrect_loss_raw": 0.7782734036445618, "correct_loss_per_char": 0.2535080313682556, "incorrect_loss_per_char": 0.19456835091114044, "correct_loss_per_token": 0.7605240941047668, "incorrect_loss_per_token": 0.7782734036445618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7782734036445618, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.7782734036445618, "logits_per_char": -0.19456835091114044, "num_chars": 4}, {"sum_logits": -0.7605240941047668, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.7605240941047668, "logits_per_char": -0.2535080313682556, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 738, "native_id": 660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46739545464515686, "incorrect_loss_raw": 1.1181491613388062, "correct_loss_per_char": 0.11684886366128922, "incorrect_loss_per_char": 0.37271638711293537, "correct_loss_per_token": 0.46739545464515686, "incorrect_loss_per_token": 1.1181491613388062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46739545464515686, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.46739545464515686, "logits_per_char": -0.11684886366128922, "num_chars": 4}, {"sum_logits": -1.1181491613388062, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.1181491613388062, "logits_per_char": -0.37271638711293537, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 739, "native_id": 1709, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18237385153770447, "incorrect_loss_raw": 2.124013900756836, "correct_loss_per_char": 0.04559346288442612, "incorrect_loss_per_char": 0.7080046335856119, "correct_loss_per_token": 0.18237385153770447, "incorrect_loss_per_token": 2.124013900756836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18237385153770447, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.18237385153770447, "logits_per_char": -0.04559346288442612, "num_chars": 4}, {"sum_logits": -2.124013900756836, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -2.124013900756836, "logits_per_char": -0.7080046335856119, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 740, "native_id": 1276, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3513461649417877, "incorrect_loss_raw": 1.319365382194519, "correct_loss_per_char": 0.08783654123544693, "incorrect_loss_per_char": 0.43978846073150635, "correct_loss_per_token": 0.3513461649417877, "incorrect_loss_per_token": 1.319365382194519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3513461649417877, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.3513461649417877, "logits_per_char": -0.08783654123544693, "num_chars": 4}, {"sum_logits": -1.319365382194519, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.319365382194519, "logits_per_char": -0.43978846073150635, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 741, "native_id": 2617, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7692980766296387, "incorrect_loss_raw": 0.7593697905540466, "correct_loss_per_char": 0.19232451915740967, "incorrect_loss_per_char": 0.25312326351801556, "correct_loss_per_token": 0.7692980766296387, "incorrect_loss_per_token": 0.7593697905540466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7692980766296387, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -0.7692980766296387, "logits_per_char": -0.19232451915740967, "num_chars": 4}, {"sum_logits": -0.7593697905540466, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.7593697905540466, "logits_per_char": -0.25312326351801556, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 742, "native_id": 39, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5650444030761719, "incorrect_loss_raw": 0.9983711838722229, "correct_loss_per_char": 0.18834813435872397, "incorrect_loss_per_char": 0.24959279596805573, "correct_loss_per_token": 0.5650444030761719, "incorrect_loss_per_token": 0.9983711838722229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9983711838722229, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -0.9983711838722229, "logits_per_char": -0.24959279596805573, "num_chars": 4}, {"sum_logits": -0.5650444030761719, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.5650444030761719, "logits_per_char": -0.18834813435872397, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 743, "native_id": 2582, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1919894814491272, "incorrect_loss_raw": 2.0083718299865723, "correct_loss_per_char": 0.0479973703622818, "incorrect_loss_per_char": 0.6694572766621908, "correct_loss_per_token": 0.1919894814491272, "incorrect_loss_per_token": 2.0083718299865723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1919894814491272, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.1919894814491272, "logits_per_char": -0.0479973703622818, "num_chars": 4}, {"sum_logits": -2.0083718299865723, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -2.0083718299865723, "logits_per_char": -0.6694572766621908, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 744, "native_id": 1335, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14726945757865906, "incorrect_loss_raw": 2.3192079067230225, "correct_loss_per_char": 0.036817364394664764, "incorrect_loss_per_char": 0.7730693022410074, "correct_loss_per_token": 0.14726945757865906, "incorrect_loss_per_token": 2.3192079067230225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14726945757865906, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.14726945757865906, "logits_per_char": -0.036817364394664764, "num_chars": 4}, {"sum_logits": -2.3192079067230225, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -2.3192079067230225, "logits_per_char": -0.7730693022410074, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 745, "native_id": 3159, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39849862456321716, "incorrect_loss_raw": 1.1915907859802246, "correct_loss_per_char": 0.09962465614080429, "incorrect_loss_per_char": 0.3971969286600749, "correct_loss_per_token": 0.39849862456321716, "incorrect_loss_per_token": 1.1915907859802246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39849862456321716, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.39849862456321716, "logits_per_char": -0.09962465614080429, "num_chars": 4}, {"sum_logits": -1.1915907859802246, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.1915907859802246, "logits_per_char": -0.3971969286600749, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 746, "native_id": 3097, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22364982962608337, "incorrect_loss_raw": 1.6709824800491333, "correct_loss_per_char": 0.055912457406520844, "incorrect_loss_per_char": 0.5569941600163778, "correct_loss_per_token": 0.22364982962608337, "incorrect_loss_per_token": 1.6709824800491333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22364982962608337, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.22364982962608337, "logits_per_char": -0.055912457406520844, "num_chars": 4}, {"sum_logits": -1.6709824800491333, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.6709824800491333, "logits_per_char": -0.5569941600163778, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 747, "native_id": 759, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5683176517486572, "incorrect_loss_raw": 0.9822851419448853, "correct_loss_per_char": 0.1420794129371643, "incorrect_loss_per_char": 0.3274283806482951, "correct_loss_per_token": 0.5683176517486572, "incorrect_loss_per_token": 0.9822851419448853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5683176517486572, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5683176517486572, "logits_per_char": -0.1420794129371643, "num_chars": 4}, {"sum_logits": -0.9822851419448853, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -0.9822851419448853, "logits_per_char": -0.3274283806482951, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 748, "native_id": 133, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.915740966796875, "incorrect_loss_raw": 0.5893411636352539, "correct_loss_per_char": 0.3052469889322917, "incorrect_loss_per_char": 0.14733529090881348, "correct_loss_per_token": 0.915740966796875, "incorrect_loss_per_token": 0.5893411636352539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5893411636352539, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.5893411636352539, "logits_per_char": -0.14733529090881348, "num_chars": 4}, {"sum_logits": -0.915740966796875, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.915740966796875, "logits_per_char": -0.3052469889322917, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 749, "native_id": 1258, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22909675538539886, "incorrect_loss_raw": 1.8978627920150757, "correct_loss_per_char": 0.057274188846349716, "incorrect_loss_per_char": 0.6326209306716919, "correct_loss_per_token": 0.22909675538539886, "incorrect_loss_per_token": 1.8978627920150757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22909675538539886, "num_tokens": 1, "num_tokens_all": 1171, "is_greedy": true, "logits_per_token": -0.22909675538539886, "logits_per_char": -0.057274188846349716, "num_chars": 4}, {"sum_logits": -1.8978627920150757, "num_tokens": 1, "num_tokens_all": 1171, "is_greedy": false, "logits_per_token": -1.8978627920150757, "logits_per_char": -0.6326209306716919, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 750, "native_id": 2482, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6533253192901611, "incorrect_loss_raw": 0.8864790201187134, "correct_loss_per_char": 0.16333132982254028, "incorrect_loss_per_char": 0.2954930067062378, "correct_loss_per_token": 0.6533253192901611, "incorrect_loss_per_token": 0.8864790201187134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6533253192901611, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.6533253192901611, "logits_per_char": -0.16333132982254028, "num_chars": 4}, {"sum_logits": -0.8864790201187134, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -0.8864790201187134, "logits_per_char": -0.2954930067062378, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 751, "native_id": 3085, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.393718957901001, "incorrect_loss_raw": 0.12896715104579926, "correct_loss_per_char": 0.7979063193003336, "incorrect_loss_per_char": 0.032241787761449814, "correct_loss_per_token": 2.393718957901001, "incorrect_loss_per_token": 0.12896715104579926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12896715104579926, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.12896715104579926, "logits_per_char": -0.032241787761449814, "num_chars": 4}, {"sum_logits": -2.393718957901001, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -2.393718957901001, "logits_per_char": -0.7979063193003336, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 752, "native_id": 73, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29212576150894165, "incorrect_loss_raw": 1.563185691833496, "correct_loss_per_char": 0.07303144037723541, "incorrect_loss_per_char": 0.521061897277832, "correct_loss_per_token": 0.29212576150894165, "incorrect_loss_per_token": 1.563185691833496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29212576150894165, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.29212576150894165, "logits_per_char": -0.07303144037723541, "num_chars": 4}, {"sum_logits": -1.563185691833496, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.563185691833496, "logits_per_char": -0.521061897277832, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 753, "native_id": 1739, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22944678366184235, "incorrect_loss_raw": 1.7006503343582153, "correct_loss_per_char": 0.05736169591546059, "incorrect_loss_per_char": 0.5668834447860718, "correct_loss_per_token": 0.22944678366184235, "incorrect_loss_per_token": 1.7006503343582153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22944678366184235, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.22944678366184235, "logits_per_char": -0.05736169591546059, "num_chars": 4}, {"sum_logits": -1.7006503343582153, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.7006503343582153, "logits_per_char": -0.5668834447860718, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 754, "native_id": 2916, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6181280612945557, "incorrect_loss_raw": 0.838905394077301, "correct_loss_per_char": 0.15453201532363892, "incorrect_loss_per_char": 0.27963513135910034, "correct_loss_per_token": 0.6181280612945557, "incorrect_loss_per_token": 0.838905394077301, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6181280612945557, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.6181280612945557, "logits_per_char": -0.15453201532363892, "num_chars": 4}, {"sum_logits": -0.838905394077301, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.838905394077301, "logits_per_char": -0.27963513135910034, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 755, "native_id": 1780, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16493342816829681, "incorrect_loss_raw": 2.0750203132629395, "correct_loss_per_char": 0.041233357042074203, "incorrect_loss_per_char": 0.6916734377543131, "correct_loss_per_token": 0.16493342816829681, "incorrect_loss_per_token": 2.0750203132629395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16493342816829681, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.16493342816829681, "logits_per_char": -0.041233357042074203, "num_chars": 4}, {"sum_logits": -2.0750203132629395, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -2.0750203132629395, "logits_per_char": -0.6916734377543131, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 756, "native_id": 1388, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2524415254592896, "incorrect_loss_raw": 0.36944475769996643, "correct_loss_per_char": 0.41748050848642987, "incorrect_loss_per_char": 0.09236118942499161, "correct_loss_per_token": 1.2524415254592896, "incorrect_loss_per_token": 0.36944475769996643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36944475769996643, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.36944475769996643, "logits_per_char": -0.09236118942499161, "num_chars": 4}, {"sum_logits": -1.2524415254592896, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.2524415254592896, "logits_per_char": -0.41748050848642987, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 757, "native_id": 1174, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.974207878112793, "incorrect_loss_raw": 0.19487400352954865, "correct_loss_per_char": 0.6580692927042643, "incorrect_loss_per_char": 0.04871850088238716, "correct_loss_per_token": 1.974207878112793, "incorrect_loss_per_token": 0.19487400352954865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19487400352954865, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.19487400352954865, "logits_per_char": -0.04871850088238716, "num_chars": 4}, {"sum_logits": -1.974207878112793, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -1.974207878112793, "logits_per_char": -0.6580692927042643, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 758, "native_id": 385, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14427947998046875, "incorrect_loss_raw": 2.2174315452575684, "correct_loss_per_char": 0.03606986999511719, "incorrect_loss_per_char": 0.7391438484191895, "correct_loss_per_token": 0.14427947998046875, "incorrect_loss_per_token": 2.2174315452575684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14427947998046875, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.14427947998046875, "logits_per_char": -0.03606986999511719, "num_chars": 4}, {"sum_logits": -2.2174315452575684, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -2.2174315452575684, "logits_per_char": -0.7391438484191895, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 759, "native_id": 327, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.081863284111023, "incorrect_loss_raw": 0.47918274998664856, "correct_loss_per_char": 0.3606210947036743, "incorrect_loss_per_char": 0.11979568749666214, "correct_loss_per_token": 1.081863284111023, "incorrect_loss_per_token": 0.47918274998664856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47918274998664856, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.47918274998664856, "logits_per_char": -0.11979568749666214, "num_chars": 4}, {"sum_logits": -1.081863284111023, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.081863284111023, "logits_per_char": -0.3606210947036743, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 760, "native_id": 2363, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.3663008213043213, "incorrect_loss_raw": 0.13259629905223846, "correct_loss_per_char": 0.7887669404347738, "incorrect_loss_per_char": 0.033149074763059616, "correct_loss_per_token": 2.3663008213043213, "incorrect_loss_per_token": 0.13259629905223846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.13259629905223846, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.13259629905223846, "logits_per_char": -0.033149074763059616, "num_chars": 4}, {"sum_logits": -2.3663008213043213, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -2.3663008213043213, "logits_per_char": -0.7887669404347738, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 761, "native_id": 2575, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7290032505989075, "incorrect_loss_raw": 0.8413227200508118, "correct_loss_per_char": 0.24300108353296915, "incorrect_loss_per_char": 0.21033068001270294, "correct_loss_per_token": 0.7290032505989075, "incorrect_loss_per_token": 0.8413227200508118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8413227200508118, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -0.8413227200508118, "logits_per_char": -0.21033068001270294, "num_chars": 4}, {"sum_logits": -0.7290032505989075, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.7290032505989075, "logits_per_char": -0.24300108353296915, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 762, "native_id": 2334, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5037766695022583, "incorrect_loss_raw": 1.2103509902954102, "correct_loss_per_char": 0.16792555650075278, "incorrect_loss_per_char": 0.30258774757385254, "correct_loss_per_token": 0.5037766695022583, "incorrect_loss_per_token": 1.2103509902954102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2103509902954102, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.2103509902954102, "logits_per_char": -0.30258774757385254, "num_chars": 4}, {"sum_logits": -0.5037766695022583, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.5037766695022583, "logits_per_char": -0.16792555650075278, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 763, "native_id": 2779, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.257350206375122, "incorrect_loss_raw": 0.3873905837535858, "correct_loss_per_char": 0.419116735458374, "incorrect_loss_per_char": 0.09684764593839645, "correct_loss_per_token": 1.257350206375122, "incorrect_loss_per_token": 0.3873905837535858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3873905837535858, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.3873905837535858, "logits_per_char": -0.09684764593839645, "num_chars": 4}, {"sum_logits": -1.257350206375122, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.257350206375122, "logits_per_char": -0.419116735458374, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 764, "native_id": 2648, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9130793809890747, "incorrect_loss_raw": 0.19180843234062195, "correct_loss_per_char": 0.6376931269963583, "incorrect_loss_per_char": 0.04795210808515549, "correct_loss_per_token": 1.9130793809890747, "incorrect_loss_per_token": 0.19180843234062195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19180843234062195, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.19180843234062195, "logits_per_char": -0.04795210808515549, "num_chars": 4}, {"sum_logits": -1.9130793809890747, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.9130793809890747, "logits_per_char": -0.6376931269963583, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 765, "native_id": 2464, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40155407786369324, "incorrect_loss_raw": 1.2177866697311401, "correct_loss_per_char": 0.10038851946592331, "incorrect_loss_per_char": 0.40592888991038006, "correct_loss_per_token": 0.40155407786369324, "incorrect_loss_per_token": 1.2177866697311401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40155407786369324, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.40155407786369324, "logits_per_char": -0.10038851946592331, "num_chars": 4}, {"sum_logits": -1.2177866697311401, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.2177866697311401, "logits_per_char": -0.40592888991038006, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 766, "native_id": 3120, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2808101177215576, "incorrect_loss_raw": 0.36586859822273254, "correct_loss_per_char": 0.42693670590718585, "incorrect_loss_per_char": 0.09146714955568314, "correct_loss_per_token": 1.2808101177215576, "incorrect_loss_per_token": 0.36586859822273254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36586859822273254, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.36586859822273254, "logits_per_char": -0.09146714955568314, "num_chars": 4}, {"sum_logits": -1.2808101177215576, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.2808101177215576, "logits_per_char": -0.42693670590718585, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 767, "native_id": 2884, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5189366340637207, "incorrect_loss_raw": 0.29465845227241516, "correct_loss_per_char": 0.5063122113545736, "incorrect_loss_per_char": 0.07366461306810379, "correct_loss_per_token": 1.5189366340637207, "incorrect_loss_per_token": 0.29465845227241516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29465845227241516, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.29465845227241516, "logits_per_char": -0.07366461306810379, "num_chars": 4}, {"sum_logits": -1.5189366340637207, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.5189366340637207, "logits_per_char": -0.5063122113545736, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 768, "native_id": 2630, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1782831847667694, "incorrect_loss_raw": 1.9912948608398438, "correct_loss_per_char": 0.04457079619169235, "incorrect_loss_per_char": 0.6637649536132812, "correct_loss_per_token": 0.1782831847667694, "incorrect_loss_per_token": 1.9912948608398438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1782831847667694, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.1782831847667694, "logits_per_char": -0.04457079619169235, "num_chars": 4}, {"sum_logits": -1.9912948608398438, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.9912948608398438, "logits_per_char": -0.6637649536132812, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 769, "native_id": 2147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8248761892318726, "incorrect_loss_raw": 0.7037386894226074, "correct_loss_per_char": 0.2749587297439575, "incorrect_loss_per_char": 0.17593467235565186, "correct_loss_per_token": 0.8248761892318726, "incorrect_loss_per_token": 0.7037386894226074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7037386894226074, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.7037386894226074, "logits_per_char": -0.17593467235565186, "num_chars": 4}, {"sum_logits": -0.8248761892318726, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -0.8248761892318726, "logits_per_char": -0.2749587297439575, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 770, "native_id": 748, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8239502906799316, "incorrect_loss_raw": 0.22448161244392395, "correct_loss_per_char": 0.6079834302266439, "incorrect_loss_per_char": 0.05612040311098099, "correct_loss_per_token": 1.8239502906799316, "incorrect_loss_per_token": 0.22448161244392395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22448161244392395, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.22448161244392395, "logits_per_char": -0.05612040311098099, "num_chars": 4}, {"sum_logits": -1.8239502906799316, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -1.8239502906799316, "logits_per_char": -0.6079834302266439, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 771, "native_id": 1662, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2724286913871765, "incorrect_loss_raw": 1.6424779891967773, "correct_loss_per_char": 0.06810717284679413, "incorrect_loss_per_char": 0.5474926630655924, "correct_loss_per_token": 0.2724286913871765, "incorrect_loss_per_token": 1.6424779891967773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2724286913871765, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.2724286913871765, "logits_per_char": -0.06810717284679413, "num_chars": 4}, {"sum_logits": -1.6424779891967773, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.6424779891967773, "logits_per_char": -0.5474926630655924, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 772, "native_id": 436, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1949925422668457, "incorrect_loss_raw": 0.42644649744033813, "correct_loss_per_char": 0.3983308474222819, "incorrect_loss_per_char": 0.10661162436008453, "correct_loss_per_token": 1.1949925422668457, "incorrect_loss_per_token": 0.42644649744033813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42644649744033813, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.42644649744033813, "logits_per_char": -0.10661162436008453, "num_chars": 4}, {"sum_logits": -1.1949925422668457, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.1949925422668457, "logits_per_char": -0.3983308474222819, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 773, "native_id": 2275, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6854275465011597, "incorrect_loss_raw": 0.2337566465139389, "correct_loss_per_char": 0.5618091821670532, "incorrect_loss_per_char": 0.058439161628484726, "correct_loss_per_token": 1.6854275465011597, "incorrect_loss_per_token": 0.2337566465139389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2337566465139389, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.2337566465139389, "logits_per_char": -0.058439161628484726, "num_chars": 4}, {"sum_logits": -1.6854275465011597, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -1.6854275465011597, "logits_per_char": -0.5618091821670532, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 774, "native_id": 2119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5684984922409058, "incorrect_loss_raw": 0.2586026191711426, "correct_loss_per_char": 0.5228328307469686, "incorrect_loss_per_char": 0.06465065479278564, "correct_loss_per_token": 1.5684984922409058, "incorrect_loss_per_token": 0.2586026191711426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2586026191711426, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.2586026191711426, "logits_per_char": -0.06465065479278564, "num_chars": 4}, {"sum_logits": -1.5684984922409058, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.5684984922409058, "logits_per_char": -0.5228328307469686, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 775, "native_id": 2919, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30507656931877136, "incorrect_loss_raw": 1.6238446235656738, "correct_loss_per_char": 0.07626914232969284, "incorrect_loss_per_char": 0.541281541188558, "correct_loss_per_token": 0.30507656931877136, "incorrect_loss_per_token": 1.6238446235656738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30507656931877136, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.30507656931877136, "logits_per_char": -0.07626914232969284, "num_chars": 4}, {"sum_logits": -1.6238446235656738, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.6238446235656738, "logits_per_char": -0.541281541188558, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 776, "native_id": 3029, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36142903566360474, "incorrect_loss_raw": 1.4468207359313965, "correct_loss_per_char": 0.09035725891590118, "incorrect_loss_per_char": 0.48227357864379883, "correct_loss_per_token": 0.36142903566360474, "incorrect_loss_per_token": 1.4468207359313965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36142903566360474, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.36142903566360474, "logits_per_char": -0.09035725891590118, "num_chars": 4}, {"sum_logits": -1.4468207359313965, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.4468207359313965, "logits_per_char": -0.48227357864379883, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 777, "native_id": 2122, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.528006911277771, "incorrect_loss_raw": 1.0238978862762451, "correct_loss_per_char": 0.13200172781944275, "incorrect_loss_per_char": 0.34129929542541504, "correct_loss_per_token": 0.528006911277771, "incorrect_loss_per_token": 1.0238978862762451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.528006911277771, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.528006911277771, "logits_per_char": -0.13200172781944275, "num_chars": 4}, {"sum_logits": -1.0238978862762451, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -1.0238978862762451, "logits_per_char": -0.34129929542541504, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 778, "native_id": 2195, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6462703943252563, "incorrect_loss_raw": 0.25217974185943604, "correct_loss_per_char": 0.5487567981084188, "incorrect_loss_per_char": 0.06304493546485901, "correct_loss_per_token": 1.6462703943252563, "incorrect_loss_per_token": 0.25217974185943604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25217974185943604, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.25217974185943604, "logits_per_char": -0.06304493546485901, "num_chars": 4}, {"sum_logits": -1.6462703943252563, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.6462703943252563, "logits_per_char": -0.5487567981084188, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 779, "native_id": 778, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32693588733673096, "incorrect_loss_raw": 1.4746631383895874, "correct_loss_per_char": 0.08173397183418274, "incorrect_loss_per_char": 0.4915543794631958, "correct_loss_per_token": 0.32693588733673096, "incorrect_loss_per_token": 1.4746631383895874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32693588733673096, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.32693588733673096, "logits_per_char": -0.08173397183418274, "num_chars": 4}, {"sum_logits": -1.4746631383895874, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.4746631383895874, "logits_per_char": -0.4915543794631958, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 780, "native_id": 2549, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6530088186264038, "incorrect_loss_raw": 0.828188955783844, "correct_loss_per_char": 0.16325220465660095, "incorrect_loss_per_char": 0.2760629852612813, "correct_loss_per_token": 0.6530088186264038, "incorrect_loss_per_token": 0.828188955783844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6530088186264038, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.6530088186264038, "logits_per_char": -0.16325220465660095, "num_chars": 4}, {"sum_logits": -0.828188955783844, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -0.828188955783844, "logits_per_char": -0.2760629852612813, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 781, "native_id": 410, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3239498734474182, "incorrect_loss_raw": 1.4130189418792725, "correct_loss_per_char": 0.08098746836185455, "incorrect_loss_per_char": 0.4710063139597575, "correct_loss_per_token": 0.3239498734474182, "incorrect_loss_per_token": 1.4130189418792725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3239498734474182, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.3239498734474182, "logits_per_char": -0.08098746836185455, "num_chars": 4}, {"sum_logits": -1.4130189418792725, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.4130189418792725, "logits_per_char": -0.4710063139597575, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 782, "native_id": 1623, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.07745137065649033, "incorrect_loss_raw": 2.8757693767547607, "correct_loss_per_char": 0.01936284266412258, "incorrect_loss_per_char": 0.9585897922515869, "correct_loss_per_token": 0.07745137065649033, "incorrect_loss_per_token": 2.8757693767547607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.07745137065649033, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.07745137065649033, "logits_per_char": -0.01936284266412258, "num_chars": 4}, {"sum_logits": -2.8757693767547607, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -2.8757693767547607, "logits_per_char": -0.9585897922515869, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 783, "native_id": 367, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1012415885925293, "incorrect_loss_raw": 0.17500324547290802, "correct_loss_per_char": 0.7004138628641764, "incorrect_loss_per_char": 0.043750811368227005, "correct_loss_per_token": 2.1012415885925293, "incorrect_loss_per_token": 0.17500324547290802, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17500324547290802, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.17500324547290802, "logits_per_char": -0.043750811368227005, "num_chars": 4}, {"sum_logits": -2.1012415885925293, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -2.1012415885925293, "logits_per_char": -0.7004138628641764, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 784, "native_id": 1302, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8643868565559387, "incorrect_loss_raw": 0.6433982253074646, "correct_loss_per_char": 0.2881289521853129, "incorrect_loss_per_char": 0.16084955632686615, "correct_loss_per_token": 0.8643868565559387, "incorrect_loss_per_token": 0.6433982253074646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6433982253074646, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.6433982253074646, "logits_per_char": -0.16084955632686615, "num_chars": 4}, {"sum_logits": -0.8643868565559387, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.8643868565559387, "logits_per_char": -0.2881289521853129, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 785, "native_id": 2100, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48183485865592957, "incorrect_loss_raw": 1.0777475833892822, "correct_loss_per_char": 0.12045871466398239, "incorrect_loss_per_char": 0.35924919446309406, "correct_loss_per_token": 0.48183485865592957, "incorrect_loss_per_token": 1.0777475833892822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48183485865592957, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.48183485865592957, "logits_per_char": -0.12045871466398239, "num_chars": 4}, {"sum_logits": -1.0777475833892822, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.0777475833892822, "logits_per_char": -0.35924919446309406, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 786, "native_id": 513, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30330830812454224, "incorrect_loss_raw": 1.4638166427612305, "correct_loss_per_char": 0.07582707703113556, "incorrect_loss_per_char": 0.48793888092041016, "correct_loss_per_token": 0.30330830812454224, "incorrect_loss_per_token": 1.4638166427612305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30330830812454224, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.30330830812454224, "logits_per_char": -0.07582707703113556, "num_chars": 4}, {"sum_logits": -1.4638166427612305, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.4638166427612305, "logits_per_char": -0.48793888092041016, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 787, "native_id": 2565, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1279258728027344, "incorrect_loss_raw": 0.5101175308227539, "correct_loss_per_char": 0.3759752909342448, "incorrect_loss_per_char": 0.12752938270568848, "correct_loss_per_token": 1.1279258728027344, "incorrect_loss_per_token": 0.5101175308227539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5101175308227539, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.5101175308227539, "logits_per_char": -0.12752938270568848, "num_chars": 4}, {"sum_logits": -1.1279258728027344, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -1.1279258728027344, "logits_per_char": -0.3759752909342448, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 788, "native_id": 1353, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3576311469078064, "incorrect_loss_raw": 1.443112850189209, "correct_loss_per_char": 0.11921038230260213, "incorrect_loss_per_char": 0.36077821254730225, "correct_loss_per_token": 0.3576311469078064, "incorrect_loss_per_token": 1.443112850189209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.443112850189209, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.443112850189209, "logits_per_char": -0.36077821254730225, "num_chars": 4}, {"sum_logits": -0.3576311469078064, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.3576311469078064, "logits_per_char": -0.11921038230260213, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 789, "native_id": 1973, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.163923501968384, "incorrect_loss_raw": 0.14128074049949646, "correct_loss_per_char": 0.7213078339894613, "incorrect_loss_per_char": 0.035320185124874115, "correct_loss_per_token": 2.163923501968384, "incorrect_loss_per_token": 0.14128074049949646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.14128074049949646, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.14128074049949646, "logits_per_char": -0.035320185124874115, "num_chars": 4}, {"sum_logits": -2.163923501968384, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -2.163923501968384, "logits_per_char": -0.7213078339894613, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 790, "native_id": 1073, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3604254424571991, "incorrect_loss_raw": 1.3805890083312988, "correct_loss_per_char": 0.09010636061429977, "incorrect_loss_per_char": 0.4601963361104329, "correct_loss_per_token": 0.3604254424571991, "incorrect_loss_per_token": 1.3805890083312988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3604254424571991, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.3604254424571991, "logits_per_char": -0.09010636061429977, "num_chars": 4}, {"sum_logits": -1.3805890083312988, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.3805890083312988, "logits_per_char": -0.4601963361104329, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 791, "native_id": 3199, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23632392287254333, "incorrect_loss_raw": 1.7597661018371582, "correct_loss_per_char": 0.059080980718135834, "incorrect_loss_per_char": 0.5865887006123861, "correct_loss_per_token": 0.23632392287254333, "incorrect_loss_per_token": 1.7597661018371582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23632392287254333, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.23632392287254333, "logits_per_char": -0.059080980718135834, "num_chars": 4}, {"sum_logits": -1.7597661018371582, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.7597661018371582, "logits_per_char": -0.5865887006123861, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 792, "native_id": 261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32575178146362305, "incorrect_loss_raw": 1.4049603939056396, "correct_loss_per_char": 0.08143794536590576, "incorrect_loss_per_char": 0.4683201313018799, "correct_loss_per_token": 0.32575178146362305, "incorrect_loss_per_token": 1.4049603939056396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32575178146362305, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.32575178146362305, "logits_per_char": -0.08143794536590576, "num_chars": 4}, {"sum_logits": -1.4049603939056396, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -1.4049603939056396, "logits_per_char": -0.4683201313018799, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 793, "native_id": 2468, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46146827936172485, "incorrect_loss_raw": 1.1514711380004883, "correct_loss_per_char": 0.11536706984043121, "incorrect_loss_per_char": 0.3838237126668294, "correct_loss_per_token": 0.46146827936172485, "incorrect_loss_per_token": 1.1514711380004883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46146827936172485, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.46146827936172485, "logits_per_char": -0.11536706984043121, "num_chars": 4}, {"sum_logits": -1.1514711380004883, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.1514711380004883, "logits_per_char": -0.3838237126668294, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 794, "native_id": 1845, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4369893968105316, "incorrect_loss_raw": 1.230459213256836, "correct_loss_per_char": 0.1092473492026329, "incorrect_loss_per_char": 0.410153071085612, "correct_loss_per_token": 0.4369893968105316, "incorrect_loss_per_token": 1.230459213256836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4369893968105316, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.4369893968105316, "logits_per_char": -0.1092473492026329, "num_chars": 4}, {"sum_logits": -1.230459213256836, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.230459213256836, "logits_per_char": -0.410153071085612, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 795, "native_id": 43, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6104415655136108, "incorrect_loss_raw": 0.2441903054714203, "correct_loss_per_char": 0.5368138551712036, "incorrect_loss_per_char": 0.06104757636785507, "correct_loss_per_token": 1.6104415655136108, "incorrect_loss_per_token": 0.2441903054714203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2441903054714203, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.2441903054714203, "logits_per_char": -0.06104757636785507, "num_chars": 4}, {"sum_logits": -1.6104415655136108, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.6104415655136108, "logits_per_char": -0.5368138551712036, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 796, "native_id": 1445, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2223643809556961, "incorrect_loss_raw": 1.8086575269699097, "correct_loss_per_char": 0.055591095238924026, "incorrect_loss_per_char": 0.6028858423233032, "correct_loss_per_token": 0.2223643809556961, "incorrect_loss_per_token": 1.8086575269699097, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2223643809556961, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2223643809556961, "logits_per_char": -0.055591095238924026, "num_chars": 4}, {"sum_logits": -1.8086575269699097, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.8086575269699097, "logits_per_char": -0.6028858423233032, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 797, "native_id": 148, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.56672602891922, "incorrect_loss_raw": 0.9694375991821289, "correct_loss_per_char": 0.18890867630640665, "incorrect_loss_per_char": 0.24235939979553223, "correct_loss_per_token": 0.56672602891922, "incorrect_loss_per_token": 0.9694375991821289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9694375991821289, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -0.9694375991821289, "logits_per_char": -0.24235939979553223, "num_chars": 4}, {"sum_logits": -0.56672602891922, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.56672602891922, "logits_per_char": -0.18890867630640665, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 798, "native_id": 2427, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15352194011211395, "incorrect_loss_raw": 2.1845955848693848, "correct_loss_per_char": 0.03838048502802849, "incorrect_loss_per_char": 0.7281985282897949, "correct_loss_per_token": 0.15352194011211395, "incorrect_loss_per_token": 2.1845955848693848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15352194011211395, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.15352194011211395, "logits_per_char": -0.03838048502802849, "num_chars": 4}, {"sum_logits": -2.1845955848693848, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -2.1845955848693848, "logits_per_char": -0.7281985282897949, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 799, "native_id": 885, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.701011061668396, "incorrect_loss_raw": 0.7787340879440308, "correct_loss_per_char": 0.23367035388946533, "incorrect_loss_per_char": 0.1946835219860077, "correct_loss_per_token": 0.701011061668396, "incorrect_loss_per_token": 0.7787340879440308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7787340879440308, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -0.7787340879440308, "logits_per_char": -0.1946835219860077, "num_chars": 4}, {"sum_logits": -0.701011061668396, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.701011061668396, "logits_per_char": -0.23367035388946533, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 800, "native_id": 442, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.11814303696155548, "incorrect_loss_raw": 2.313157320022583, "correct_loss_per_char": 0.02953575924038887, "incorrect_loss_per_char": 0.7710524400075277, "correct_loss_per_token": 0.11814303696155548, "incorrect_loss_per_token": 2.313157320022583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.11814303696155548, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.11814303696155548, "logits_per_char": -0.02953575924038887, "num_chars": 4}, {"sum_logits": -2.313157320022583, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -2.313157320022583, "logits_per_char": -0.7710524400075277, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 801, "native_id": 1826, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.169863224029541, "incorrect_loss_raw": 0.45021378993988037, "correct_loss_per_char": 0.389954408009847, "incorrect_loss_per_char": 0.11255344748497009, "correct_loss_per_token": 1.169863224029541, "incorrect_loss_per_token": 0.45021378993988037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45021378993988037, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.45021378993988037, "logits_per_char": -0.11255344748497009, "num_chars": 4}, {"sum_logits": -1.169863224029541, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.169863224029541, "logits_per_char": -0.389954408009847, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 802, "native_id": 2259, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.102063849568367, "incorrect_loss_raw": 2.829211473464966, "correct_loss_per_char": 0.02551596239209175, "incorrect_loss_per_char": 0.9430704911549886, "correct_loss_per_token": 0.102063849568367, "incorrect_loss_per_token": 2.829211473464966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.102063849568367, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.102063849568367, "logits_per_char": -0.02551596239209175, "num_chars": 4}, {"sum_logits": -2.829211473464966, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -2.829211473464966, "logits_per_char": -0.9430704911549886, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 803, "native_id": 733, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24274970591068268, "incorrect_loss_raw": 1.9103436470031738, "correct_loss_per_char": 0.06068742647767067, "incorrect_loss_per_char": 0.6367812156677246, "correct_loss_per_token": 0.24274970591068268, "incorrect_loss_per_token": 1.9103436470031738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24274970591068268, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.24274970591068268, "logits_per_char": -0.06068742647767067, "num_chars": 4}, {"sum_logits": -1.9103436470031738, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.9103436470031738, "logits_per_char": -0.6367812156677246, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 804, "native_id": 2348, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2825269401073456, "incorrect_loss_raw": 1.5703812837600708, "correct_loss_per_char": 0.0706317350268364, "incorrect_loss_per_char": 0.5234604279200236, "correct_loss_per_token": 0.2825269401073456, "incorrect_loss_per_token": 1.5703812837600708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2825269401073456, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.2825269401073456, "logits_per_char": -0.0706317350268364, "num_chars": 4}, {"sum_logits": -1.5703812837600708, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.5703812837600708, "logits_per_char": -0.5234604279200236, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 805, "native_id": 169, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5375176668167114, "incorrect_loss_raw": 0.9811053276062012, "correct_loss_per_char": 0.17917255560557047, "incorrect_loss_per_char": 0.2452763319015503, "correct_loss_per_token": 0.5375176668167114, "incorrect_loss_per_token": 0.9811053276062012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9811053276062012, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.9811053276062012, "logits_per_char": -0.2452763319015503, "num_chars": 4}, {"sum_logits": -0.5375176668167114, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.5375176668167114, "logits_per_char": -0.17917255560557047, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 806, "native_id": 2627, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.013643503189087, "incorrect_loss_raw": 0.5458694100379944, "correct_loss_per_char": 0.3378811677296956, "incorrect_loss_per_char": 0.1364673525094986, "correct_loss_per_token": 1.013643503189087, "incorrect_loss_per_token": 0.5458694100379944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5458694100379944, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.5458694100379944, "logits_per_char": -0.1364673525094986, "num_chars": 4}, {"sum_logits": -1.013643503189087, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.013643503189087, "logits_per_char": -0.3378811677296956, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 807, "native_id": 2057, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2951018512248993, "incorrect_loss_raw": 1.554709792137146, "correct_loss_per_char": 0.07377546280622482, "incorrect_loss_per_char": 0.5182365973790487, "correct_loss_per_token": 0.2951018512248993, "incorrect_loss_per_token": 1.554709792137146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2951018512248993, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.2951018512248993, "logits_per_char": -0.07377546280622482, "num_chars": 4}, {"sum_logits": -1.554709792137146, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.554709792137146, "logits_per_char": -0.5182365973790487, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 808, "native_id": 2373, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2978176772594452, "incorrect_loss_raw": 1.4997549057006836, "correct_loss_per_char": 0.0744544193148613, "incorrect_loss_per_char": 0.49991830190022785, "correct_loss_per_token": 0.2978176772594452, "incorrect_loss_per_token": 1.4997549057006836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2978176772594452, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.2978176772594452, "logits_per_char": -0.0744544193148613, "num_chars": 4}, {"sum_logits": -1.4997549057006836, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.4997549057006836, "logits_per_char": -0.49991830190022785, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 809, "native_id": 3040, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5057744979858398, "incorrect_loss_raw": 0.29207393527030945, "correct_loss_per_char": 0.5019248326619467, "incorrect_loss_per_char": 0.07301848381757736, "correct_loss_per_token": 1.5057744979858398, "incorrect_loss_per_token": 0.29207393527030945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29207393527030945, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.29207393527030945, "logits_per_char": -0.07301848381757736, "num_chars": 4}, {"sum_logits": -1.5057744979858398, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.5057744979858398, "logits_per_char": -0.5019248326619467, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 810, "native_id": 1271, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28037703037261963, "incorrect_loss_raw": 1.4935673475265503, "correct_loss_per_char": 0.07009425759315491, "incorrect_loss_per_char": 0.4978557825088501, "correct_loss_per_token": 0.28037703037261963, "incorrect_loss_per_token": 1.4935673475265503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28037703037261963, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.28037703037261963, "logits_per_char": -0.07009425759315491, "num_chars": 4}, {"sum_logits": -1.4935673475265503, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4935673475265503, "logits_per_char": -0.4978557825088501, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 811, "native_id": 2368, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23068568110466003, "incorrect_loss_raw": 1.840132713317871, "correct_loss_per_char": 0.05767142027616501, "incorrect_loss_per_char": 0.613377571105957, "correct_loss_per_token": 0.23068568110466003, "incorrect_loss_per_token": 1.840132713317871, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23068568110466003, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.23068568110466003, "logits_per_char": -0.05767142027616501, "num_chars": 4}, {"sum_logits": -1.840132713317871, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.840132713317871, "logits_per_char": -0.613377571105957, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 812, "native_id": 132, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6542811989784241, "incorrect_loss_raw": 0.9155306220054626, "correct_loss_per_char": 0.21809373299280801, "incorrect_loss_per_char": 0.22888265550136566, "correct_loss_per_token": 0.6542811989784241, "incorrect_loss_per_token": 0.9155306220054626, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9155306220054626, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -0.9155306220054626, "logits_per_char": -0.22888265550136566, "num_chars": 4}, {"sum_logits": -0.6542811989784241, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.6542811989784241, "logits_per_char": -0.21809373299280801, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 813, "native_id": 2346, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3280603587627411, "incorrect_loss_raw": 1.3653990030288696, "correct_loss_per_char": 0.08201508969068527, "incorrect_loss_per_char": 0.45513300100962323, "correct_loss_per_token": 0.3280603587627411, "incorrect_loss_per_token": 1.3653990030288696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3280603587627411, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.3280603587627411, "logits_per_char": -0.08201508969068527, "num_chars": 4}, {"sum_logits": -1.3653990030288696, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.3653990030288696, "logits_per_char": -0.45513300100962323, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 814, "native_id": 1382, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.760886311531067, "incorrect_loss_raw": 0.2607370615005493, "correct_loss_per_char": 0.586962103843689, "incorrect_loss_per_char": 0.06518426537513733, "correct_loss_per_token": 1.760886311531067, "incorrect_loss_per_token": 0.2607370615005493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2607370615005493, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.2607370615005493, "logits_per_char": -0.06518426537513733, "num_chars": 4}, {"sum_logits": -1.760886311531067, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.760886311531067, "logits_per_char": -0.586962103843689, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 815, "native_id": 2222, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2392541468143463, "incorrect_loss_raw": 1.8148142099380493, "correct_loss_per_char": 0.05981353670358658, "incorrect_loss_per_char": 0.6049380699793497, "correct_loss_per_token": 0.2392541468143463, "incorrect_loss_per_token": 1.8148142099380493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2392541468143463, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.2392541468143463, "logits_per_char": -0.05981353670358658, "num_chars": 4}, {"sum_logits": -1.8148142099380493, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.8148142099380493, "logits_per_char": -0.6049380699793497, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 816, "native_id": 3066, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3052254319190979, "incorrect_loss_raw": 1.5713281631469727, "correct_loss_per_char": 0.07630635797977448, "incorrect_loss_per_char": 0.5237760543823242, "correct_loss_per_token": 0.3052254319190979, "incorrect_loss_per_token": 1.5713281631469727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3052254319190979, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.3052254319190979, "logits_per_char": -0.07630635797977448, "num_chars": 4}, {"sum_logits": -1.5713281631469727, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.5713281631469727, "logits_per_char": -0.5237760543823242, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 817, "native_id": 870, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2763511836528778, "incorrect_loss_raw": 1.695784330368042, "correct_loss_per_char": 0.06908779591321945, "incorrect_loss_per_char": 0.565261443456014, "correct_loss_per_token": 0.2763511836528778, "incorrect_loss_per_token": 1.695784330368042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2763511836528778, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.2763511836528778, "logits_per_char": -0.06908779591321945, "num_chars": 4}, {"sum_logits": -1.695784330368042, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.695784330368042, "logits_per_char": -0.565261443456014, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 818, "native_id": 3117, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5857834219932556, "incorrect_loss_raw": 0.9076223373413086, "correct_loss_per_char": 0.1464458554983139, "incorrect_loss_per_char": 0.30254077911376953, "correct_loss_per_token": 0.5857834219932556, "incorrect_loss_per_token": 0.9076223373413086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5857834219932556, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5857834219932556, "logits_per_char": -0.1464458554983139, "num_chars": 4}, {"sum_logits": -0.9076223373413086, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.9076223373413086, "logits_per_char": -0.30254077911376953, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 819, "native_id": 2124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22488564252853394, "incorrect_loss_raw": 2.077831268310547, "correct_loss_per_char": 0.056221410632133484, "incorrect_loss_per_char": 0.6926104227701823, "correct_loss_per_token": 0.22488564252853394, "incorrect_loss_per_token": 2.077831268310547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22488564252853394, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.22488564252853394, "logits_per_char": -0.056221410632133484, "num_chars": 4}, {"sum_logits": -2.077831268310547, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -2.077831268310547, "logits_per_char": -0.6926104227701823, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 820, "native_id": 998, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7579573392868042, "incorrect_loss_raw": 0.7258524298667908, "correct_loss_per_char": 0.25265244642893475, "incorrect_loss_per_char": 0.1814631074666977, "correct_loss_per_token": 0.7579573392868042, "incorrect_loss_per_token": 0.7258524298667908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7258524298667908, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.7258524298667908, "logits_per_char": -0.1814631074666977, "num_chars": 4}, {"sum_logits": -0.7579573392868042, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -0.7579573392868042, "logits_per_char": -0.25265244642893475, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 821, "native_id": 3259, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4680577516555786, "incorrect_loss_raw": 0.32674846053123474, "correct_loss_per_char": 0.48935258388519287, "incorrect_loss_per_char": 0.08168711513280869, "correct_loss_per_token": 1.4680577516555786, "incorrect_loss_per_token": 0.32674846053123474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32674846053123474, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.32674846053123474, "logits_per_char": -0.08168711513280869, "num_chars": 4}, {"sum_logits": -1.4680577516555786, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.4680577516555786, "logits_per_char": -0.48935258388519287, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 822, "native_id": 1227, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28496095538139343, "incorrect_loss_raw": 1.5155004262924194, "correct_loss_per_char": 0.07124023884534836, "incorrect_loss_per_char": 0.5051668087641398, "correct_loss_per_token": 0.28496095538139343, "incorrect_loss_per_token": 1.5155004262924194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28496095538139343, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.28496095538139343, "logits_per_char": -0.07124023884534836, "num_chars": 4}, {"sum_logits": -1.5155004262924194, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -1.5155004262924194, "logits_per_char": -0.5051668087641398, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 823, "native_id": 68, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6056885719299316, "incorrect_loss_raw": 0.9260449409484863, "correct_loss_per_char": 0.20189619064331055, "incorrect_loss_per_char": 0.23151123523712158, "correct_loss_per_token": 0.6056885719299316, "incorrect_loss_per_token": 0.9260449409484863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9260449409484863, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.9260449409484863, "logits_per_char": -0.23151123523712158, "num_chars": 4}, {"sum_logits": -0.6056885719299316, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.6056885719299316, "logits_per_char": -0.20189619064331055, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 824, "native_id": 2907, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9996484518051147, "incorrect_loss_raw": 0.5480103492736816, "correct_loss_per_char": 0.3332161506017049, "incorrect_loss_per_char": 0.1370025873184204, "correct_loss_per_token": 0.9996484518051147, "incorrect_loss_per_token": 0.5480103492736816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5480103492736816, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5480103492736816, "logits_per_char": -0.1370025873184204, "num_chars": 4}, {"sum_logits": -0.9996484518051147, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.9996484518051147, "logits_per_char": -0.3332161506017049, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 825, "native_id": 344, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5042648911476135, "incorrect_loss_raw": 1.1802024841308594, "correct_loss_per_char": 0.12606622278690338, "incorrect_loss_per_char": 0.3934008280436198, "correct_loss_per_token": 0.5042648911476135, "incorrect_loss_per_token": 1.1802024841308594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5042648911476135, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.5042648911476135, "logits_per_char": -0.12606622278690338, "num_chars": 4}, {"sum_logits": -1.1802024841308594, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.1802024841308594, "logits_per_char": -0.3934008280436198, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 826, "native_id": 114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21119925379753113, "incorrect_loss_raw": 1.8424878120422363, "correct_loss_per_char": 0.05279981344938278, "incorrect_loss_per_char": 0.6141626040140787, "correct_loss_per_token": 0.21119925379753113, "incorrect_loss_per_token": 1.8424878120422363, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21119925379753113, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.21119925379753113, "logits_per_char": -0.05279981344938278, "num_chars": 4}, {"sum_logits": -1.8424878120422363, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.8424878120422363, "logits_per_char": -0.6141626040140787, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 827, "native_id": 3031, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32907387614250183, "incorrect_loss_raw": 1.3727384805679321, "correct_loss_per_char": 0.08226846903562546, "incorrect_loss_per_char": 0.45757949352264404, "correct_loss_per_token": 0.32907387614250183, "incorrect_loss_per_token": 1.3727384805679321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32907387614250183, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.32907387614250183, "logits_per_char": -0.08226846903562546, "num_chars": 4}, {"sum_logits": -1.3727384805679321, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.3727384805679321, "logits_per_char": -0.45757949352264404, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 828, "native_id": 2283, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46482184529304504, "incorrect_loss_raw": 1.139316201210022, "correct_loss_per_char": 0.15494061509768167, "incorrect_loss_per_char": 0.2848290503025055, "correct_loss_per_token": 0.46482184529304504, "incorrect_loss_per_token": 1.139316201210022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.139316201210022, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.139316201210022, "logits_per_char": -0.2848290503025055, "num_chars": 4}, {"sum_logits": -0.46482184529304504, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.46482184529304504, "logits_per_char": -0.15494061509768167, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 829, "native_id": 3138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.45382559299469, "incorrect_loss_raw": 0.34423497319221497, "correct_loss_per_char": 0.48460853099823, "incorrect_loss_per_char": 0.08605874329805374, "correct_loss_per_token": 1.45382559299469, "incorrect_loss_per_token": 0.34423497319221497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34423497319221497, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.34423497319221497, "logits_per_char": -0.08605874329805374, "num_chars": 4}, {"sum_logits": -1.45382559299469, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.45382559299469, "logits_per_char": -0.48460853099823, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 830, "native_id": 2572, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9634029865264893, "incorrect_loss_raw": 0.6572052240371704, "correct_loss_per_char": 0.3211343288421631, "incorrect_loss_per_char": 0.1643013060092926, "correct_loss_per_token": 0.9634029865264893, "incorrect_loss_per_token": 0.6572052240371704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6572052240371704, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.6572052240371704, "logits_per_char": -0.1643013060092926, "num_chars": 4}, {"sum_logits": -0.9634029865264893, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -0.9634029865264893, "logits_per_char": -0.3211343288421631, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 831, "native_id": 2517, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3310108184814453, "incorrect_loss_raw": 1.4406907558441162, "correct_loss_per_char": 0.08275270462036133, "incorrect_loss_per_char": 0.48023025194803876, "correct_loss_per_token": 0.3310108184814453, "incorrect_loss_per_token": 1.4406907558441162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3310108184814453, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.3310108184814453, "logits_per_char": -0.08275270462036133, "num_chars": 4}, {"sum_logits": -1.4406907558441162, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.4406907558441162, "logits_per_char": -0.48023025194803876, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 832, "native_id": 1601, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39096009731292725, "incorrect_loss_raw": 1.3629415035247803, "correct_loss_per_char": 0.09774002432823181, "incorrect_loss_per_char": 0.4543138345082601, "correct_loss_per_token": 0.39096009731292725, "incorrect_loss_per_token": 1.3629415035247803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39096009731292725, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.39096009731292725, "logits_per_char": -0.09774002432823181, "num_chars": 4}, {"sum_logits": -1.3629415035247803, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.3629415035247803, "logits_per_char": -0.4543138345082601, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 833, "native_id": 1866, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8316305875778198, "incorrect_loss_raw": 0.23199139535427094, "correct_loss_per_char": 0.6105435291926066, "incorrect_loss_per_char": 0.057997848838567734, "correct_loss_per_token": 1.8316305875778198, "incorrect_loss_per_token": 0.23199139535427094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23199139535427094, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.23199139535427094, "logits_per_char": -0.057997848838567734, "num_chars": 4}, {"sum_logits": -1.8316305875778198, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.8316305875778198, "logits_per_char": -0.6105435291926066, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 834, "native_id": 3065, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37038421630859375, "incorrect_loss_raw": 1.3369685411453247, "correct_loss_per_char": 0.09259605407714844, "incorrect_loss_per_char": 0.4456561803817749, "correct_loss_per_token": 0.37038421630859375, "incorrect_loss_per_token": 1.3369685411453247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37038421630859375, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.37038421630859375, "logits_per_char": -0.09259605407714844, "num_chars": 4}, {"sum_logits": -1.3369685411453247, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.3369685411453247, "logits_per_char": -0.4456561803817749, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 835, "native_id": 893, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3111131191253662, "incorrect_loss_raw": 1.481659173965454, "correct_loss_per_char": 0.07777827978134155, "incorrect_loss_per_char": 0.49388639132181805, "correct_loss_per_token": 0.3111131191253662, "incorrect_loss_per_token": 1.481659173965454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3111131191253662, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.3111131191253662, "logits_per_char": -0.07777827978134155, "num_chars": 4}, {"sum_logits": -1.481659173965454, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.481659173965454, "logits_per_char": -0.49388639132181805, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 836, "native_id": 322, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19587139785289764, "incorrect_loss_raw": 1.8314036130905151, "correct_loss_per_char": 0.04896784946322441, "incorrect_loss_per_char": 0.6104678710301717, "correct_loss_per_token": 0.19587139785289764, "incorrect_loss_per_token": 1.8314036130905151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19587139785289764, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.19587139785289764, "logits_per_char": -0.04896784946322441, "num_chars": 4}, {"sum_logits": -1.8314036130905151, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.8314036130905151, "logits_per_char": -0.6104678710301717, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 837, "native_id": 1427, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3425769805908203, "incorrect_loss_raw": 0.35461732745170593, "correct_loss_per_char": 0.4475256601969401, "incorrect_loss_per_char": 0.08865433186292648, "correct_loss_per_token": 1.3425769805908203, "incorrect_loss_per_token": 0.35461732745170593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35461732745170593, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.35461732745170593, "logits_per_char": -0.08865433186292648, "num_chars": 4}, {"sum_logits": -1.3425769805908203, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.3425769805908203, "logits_per_char": -0.4475256601969401, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 838, "native_id": 1370, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9699710607528687, "incorrect_loss_raw": 0.5768683552742004, "correct_loss_per_char": 0.32332368691762287, "incorrect_loss_per_char": 0.1442170888185501, "correct_loss_per_token": 0.9699710607528687, "incorrect_loss_per_token": 0.5768683552742004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5768683552742004, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.5768683552742004, "logits_per_char": -0.1442170888185501, "num_chars": 4}, {"sum_logits": -0.9699710607528687, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -0.9699710607528687, "logits_per_char": -0.32332368691762287, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 839, "native_id": 1444, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18988555669784546, "incorrect_loss_raw": 2.028391122817993, "correct_loss_per_char": 0.047471389174461365, "incorrect_loss_per_char": 0.6761303742726644, "correct_loss_per_token": 0.18988555669784546, "incorrect_loss_per_token": 2.028391122817993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18988555669784546, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.18988555669784546, "logits_per_char": -0.047471389174461365, "num_chars": 4}, {"sum_logits": -2.028391122817993, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -2.028391122817993, "logits_per_char": -0.6761303742726644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 840, "native_id": 1590, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27916425466537476, "incorrect_loss_raw": 1.5607330799102783, "correct_loss_per_char": 0.06979106366634369, "incorrect_loss_per_char": 0.5202443599700928, "correct_loss_per_token": 0.27916425466537476, "incorrect_loss_per_token": 1.5607330799102783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27916425466537476, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.27916425466537476, "logits_per_char": -0.06979106366634369, "num_chars": 4}, {"sum_logits": -1.5607330799102783, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -1.5607330799102783, "logits_per_char": -0.5202443599700928, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 841, "native_id": 1454, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3612295091152191, "incorrect_loss_raw": 1.2846661806106567, "correct_loss_per_char": 0.09030737727880478, "incorrect_loss_per_char": 0.42822206020355225, "correct_loss_per_token": 0.3612295091152191, "incorrect_loss_per_token": 1.2846661806106567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3612295091152191, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.3612295091152191, "logits_per_char": -0.09030737727880478, "num_chars": 4}, {"sum_logits": -1.2846661806106567, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.2846661806106567, "logits_per_char": -0.42822206020355225, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 842, "native_id": 389, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33378735184669495, "incorrect_loss_raw": 1.3585083484649658, "correct_loss_per_char": 0.08344683796167374, "incorrect_loss_per_char": 0.4528361161549886, "correct_loss_per_token": 0.33378735184669495, "incorrect_loss_per_token": 1.3585083484649658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33378735184669495, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.33378735184669495, "logits_per_char": -0.08344683796167374, "num_chars": 4}, {"sum_logits": -1.3585083484649658, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.3585083484649658, "logits_per_char": -0.4528361161549886, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 843, "native_id": 127, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7345883846282959, "incorrect_loss_raw": 0.727647066116333, "correct_loss_per_char": 0.24486279487609863, "incorrect_loss_per_char": 0.18191176652908325, "correct_loss_per_token": 0.7345883846282959, "incorrect_loss_per_token": 0.727647066116333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.727647066116333, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.727647066116333, "logits_per_char": -0.18191176652908325, "num_chars": 4}, {"sum_logits": -0.7345883846282959, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -0.7345883846282959, "logits_per_char": -0.24486279487609863, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 844, "native_id": 529, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2562916576862335, "incorrect_loss_raw": 1.8011717796325684, "correct_loss_per_char": 0.06407291442155838, "incorrect_loss_per_char": 0.6003905932108561, "correct_loss_per_token": 0.2562916576862335, "incorrect_loss_per_token": 1.8011717796325684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2562916576862335, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.2562916576862335, "logits_per_char": -0.06407291442155838, "num_chars": 4}, {"sum_logits": -1.8011717796325684, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.8011717796325684, "logits_per_char": -0.6003905932108561, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 845, "native_id": 3222, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5513324737548828, "incorrect_loss_raw": 1.0188416242599487, "correct_loss_per_char": 0.1378331184387207, "incorrect_loss_per_char": 0.3396138747533162, "correct_loss_per_token": 0.5513324737548828, "incorrect_loss_per_token": 1.0188416242599487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5513324737548828, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.5513324737548828, "logits_per_char": -0.1378331184387207, "num_chars": 4}, {"sum_logits": -1.0188416242599487, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.0188416242599487, "logits_per_char": -0.3396138747533162, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 846, "native_id": 1847, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22845777869224548, "incorrect_loss_raw": 1.913697361946106, "correct_loss_per_char": 0.05711444467306137, "incorrect_loss_per_char": 0.637899120648702, "correct_loss_per_token": 0.22845777869224548, "incorrect_loss_per_token": 1.913697361946106, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22845777869224548, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.22845777869224548, "logits_per_char": -0.05711444467306137, "num_chars": 4}, {"sum_logits": -1.913697361946106, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.913697361946106, "logits_per_char": -0.637899120648702, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 847, "native_id": 1467, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1823415756225586, "incorrect_loss_raw": 0.1386340856552124, "correct_loss_per_char": 0.7274471918741862, "incorrect_loss_per_char": 0.0346585214138031, "correct_loss_per_token": 2.1823415756225586, "incorrect_loss_per_token": 0.1386340856552124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1386340856552124, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.1386340856552124, "logits_per_char": -0.0346585214138031, "num_chars": 4}, {"sum_logits": -2.1823415756225586, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -2.1823415756225586, "logits_per_char": -0.7274471918741862, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 848, "native_id": 515, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3795630931854248, "incorrect_loss_raw": 0.35126882791519165, "correct_loss_per_char": 0.4598543643951416, "incorrect_loss_per_char": 0.08781720697879791, "correct_loss_per_token": 1.3795630931854248, "incorrect_loss_per_token": 0.35126882791519165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35126882791519165, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.35126882791519165, "logits_per_char": -0.08781720697879791, "num_chars": 4}, {"sum_logits": -1.3795630931854248, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.3795630931854248, "logits_per_char": -0.4598543643951416, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 849, "native_id": 394, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4994925260543823, "incorrect_loss_raw": 0.4011821448802948, "correct_loss_per_char": 0.49983084201812744, "incorrect_loss_per_char": 0.1002955362200737, "correct_loss_per_token": 1.4994925260543823, "incorrect_loss_per_token": 0.4011821448802948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4011821448802948, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.4011821448802948, "logits_per_char": -0.1002955362200737, "num_chars": 4}, {"sum_logits": -1.4994925260543823, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.4994925260543823, "logits_per_char": -0.49983084201812744, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 850, "native_id": 252, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15524275600910187, "incorrect_loss_raw": 2.159379005432129, "correct_loss_per_char": 0.03881068900227547, "incorrect_loss_per_char": 0.7197930018107096, "correct_loss_per_token": 0.15524275600910187, "incorrect_loss_per_token": 2.159379005432129, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15524275600910187, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.15524275600910187, "logits_per_char": -0.03881068900227547, "num_chars": 4}, {"sum_logits": -2.159379005432129, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -2.159379005432129, "logits_per_char": -0.7197930018107096, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 851, "native_id": 1090, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29700222611427307, "incorrect_loss_raw": 1.5028663873672485, "correct_loss_per_char": 0.07425055652856827, "incorrect_loss_per_char": 0.5009554624557495, "correct_loss_per_token": 0.29700222611427307, "incorrect_loss_per_token": 1.5028663873672485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29700222611427307, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.29700222611427307, "logits_per_char": -0.07425055652856827, "num_chars": 4}, {"sum_logits": -1.5028663873672485, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.5028663873672485, "logits_per_char": -0.5009554624557495, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 852, "native_id": 2329, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3595036268234253, "incorrect_loss_raw": 1.3134952783584595, "correct_loss_per_char": 0.08987590670585632, "incorrect_loss_per_char": 0.4378317594528198, "correct_loss_per_token": 0.3595036268234253, "incorrect_loss_per_token": 1.3134952783584595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3595036268234253, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.3595036268234253, "logits_per_char": -0.08987590670585632, "num_chars": 4}, {"sum_logits": -1.3134952783584595, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.3134952783584595, "logits_per_char": -0.4378317594528198, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 853, "native_id": 649, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28471118211746216, "incorrect_loss_raw": 1.5217506885528564, "correct_loss_per_char": 0.07117779552936554, "incorrect_loss_per_char": 0.5072502295176188, "correct_loss_per_token": 0.28471118211746216, "incorrect_loss_per_token": 1.5217506885528564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28471118211746216, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.28471118211746216, "logits_per_char": -0.07117779552936554, "num_chars": 4}, {"sum_logits": -1.5217506885528564, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.5217506885528564, "logits_per_char": -0.5072502295176188, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 854, "native_id": 129, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1620216369628906, "incorrect_loss_raw": 0.4552837014198303, "correct_loss_per_char": 0.3873405456542969, "incorrect_loss_per_char": 0.11382092535495758, "correct_loss_per_token": 1.1620216369628906, "incorrect_loss_per_token": 0.4552837014198303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4552837014198303, "num_tokens": 1, "num_tokens_all": 1169, "is_greedy": true, "logits_per_token": -0.4552837014198303, "logits_per_char": -0.11382092535495758, "num_chars": 4}, {"sum_logits": -1.1620216369628906, "num_tokens": 1, "num_tokens_all": 1169, "is_greedy": false, "logits_per_token": -1.1620216369628906, "logits_per_char": -0.3873405456542969, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 855, "native_id": 2962, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6433517336845398, "incorrect_loss_raw": 0.8465878963470459, "correct_loss_per_char": 0.2144505778948466, "incorrect_loss_per_char": 0.21164697408676147, "correct_loss_per_token": 0.6433517336845398, "incorrect_loss_per_token": 0.8465878963470459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8465878963470459, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -0.8465878963470459, "logits_per_char": -0.21164697408676147, "num_chars": 4}, {"sum_logits": -0.6433517336845398, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.6433517336845398, "logits_per_char": -0.2144505778948466, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 856, "native_id": 2294, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5146263241767883, "incorrect_loss_raw": 1.0736061334609985, "correct_loss_per_char": 0.12865658104419708, "incorrect_loss_per_char": 0.3578687111536662, "correct_loss_per_token": 0.5146263241767883, "incorrect_loss_per_token": 1.0736061334609985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5146263241767883, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.5146263241767883, "logits_per_char": -0.12865658104419708, "num_chars": 4}, {"sum_logits": -1.0736061334609985, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.0736061334609985, "logits_per_char": -0.3578687111536662, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 857, "native_id": 2022, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0340251922607422, "incorrect_loss_raw": 0.5689591765403748, "correct_loss_per_char": 0.34467506408691406, "incorrect_loss_per_char": 0.1422397941350937, "correct_loss_per_token": 1.0340251922607422, "incorrect_loss_per_token": 0.5689591765403748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5689591765403748, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.5689591765403748, "logits_per_char": -0.1422397941350937, "num_chars": 4}, {"sum_logits": -1.0340251922607422, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.0340251922607422, "logits_per_char": -0.34467506408691406, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 858, "native_id": 336, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3975597620010376, "incorrect_loss_raw": 1.2805781364440918, "correct_loss_per_char": 0.0993899405002594, "incorrect_loss_per_char": 0.42685937881469727, "correct_loss_per_token": 0.3975597620010376, "incorrect_loss_per_token": 1.2805781364440918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3975597620010376, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.3975597620010376, "logits_per_char": -0.0993899405002594, "num_chars": 4}, {"sum_logits": -1.2805781364440918, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.2805781364440918, "logits_per_char": -0.42685937881469727, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 859, "native_id": 3239, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.09409287571907043, "incorrect_loss_raw": 2.7028844356536865, "correct_loss_per_char": 0.02352321892976761, "incorrect_loss_per_char": 0.9009614785512289, "correct_loss_per_token": 0.09409287571907043, "incorrect_loss_per_token": 2.7028844356536865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.09409287571907043, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.09409287571907043, "logits_per_char": -0.02352321892976761, "num_chars": 4}, {"sum_logits": -2.7028844356536865, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -2.7028844356536865, "logits_per_char": -0.9009614785512289, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 860, "native_id": 1783, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.2475829124450684, "incorrect_loss_raw": 0.12737083435058594, "correct_loss_per_char": 0.7491943041483561, "incorrect_loss_per_char": 0.031842708587646484, "correct_loss_per_token": 2.2475829124450684, "incorrect_loss_per_token": 0.12737083435058594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.12737083435058594, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.12737083435058594, "logits_per_char": -0.031842708587646484, "num_chars": 4}, {"sum_logits": -2.2475829124450684, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -2.2475829124450684, "logits_per_char": -0.7491943041483561, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 861, "native_id": 1474, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6776241064071655, "incorrect_loss_raw": 0.2712254822254181, "correct_loss_per_char": 0.5592080354690552, "incorrect_loss_per_char": 0.06780637055635452, "correct_loss_per_token": 1.6776241064071655, "incorrect_loss_per_token": 0.2712254822254181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2712254822254181, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.2712254822254181, "logits_per_char": -0.06780637055635452, "num_chars": 4}, {"sum_logits": -1.6776241064071655, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.6776241064071655, "logits_per_char": -0.5592080354690552, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 862, "native_id": 2438, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6500089168548584, "incorrect_loss_raw": 0.24257826805114746, "correct_loss_per_char": 0.5500029722849528, "incorrect_loss_per_char": 0.060644567012786865, "correct_loss_per_token": 1.6500089168548584, "incorrect_loss_per_token": 0.24257826805114746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.24257826805114746, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.24257826805114746, "logits_per_char": -0.060644567012786865, "num_chars": 4}, {"sum_logits": -1.6500089168548584, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6500089168548584, "logits_per_char": -0.5500029722849528, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 863, "native_id": 1722, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2053158283233643, "incorrect_loss_raw": 0.42575642466545105, "correct_loss_per_char": 0.40177194277445477, "incorrect_loss_per_char": 0.10643910616636276, "correct_loss_per_token": 1.2053158283233643, "incorrect_loss_per_token": 0.42575642466545105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42575642466545105, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.42575642466545105, "logits_per_char": -0.10643910616636276, "num_chars": 4}, {"sum_logits": -1.2053158283233643, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.2053158283233643, "logits_per_char": -0.40177194277445477, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 864, "native_id": 1289, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.281078815460205, "incorrect_loss_raw": 0.39566418528556824, "correct_loss_per_char": 0.42702627182006836, "incorrect_loss_per_char": 0.09891604632139206, "correct_loss_per_token": 1.281078815460205, "incorrect_loss_per_token": 0.39566418528556824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39566418528556824, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.39566418528556824, "logits_per_char": -0.09891604632139206, "num_chars": 4}, {"sum_logits": -1.281078815460205, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.281078815460205, "logits_per_char": -0.42702627182006836, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 865, "native_id": 786, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4766428470611572, "incorrect_loss_raw": 0.33170390129089355, "correct_loss_per_char": 0.49221428235371906, "incorrect_loss_per_char": 0.08292597532272339, "correct_loss_per_token": 1.4766428470611572, "incorrect_loss_per_token": 0.33170390129089355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33170390129089355, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.33170390129089355, "logits_per_char": -0.08292597532272339, "num_chars": 4}, {"sum_logits": -1.4766428470611572, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.4766428470611572, "logits_per_char": -0.49221428235371906, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 866, "native_id": 2218, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5251985192298889, "incorrect_loss_raw": 0.9873195290565491, "correct_loss_per_char": 0.13129962980747223, "incorrect_loss_per_char": 0.32910650968551636, "correct_loss_per_token": 0.5251985192298889, "incorrect_loss_per_token": 0.9873195290565491, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5251985192298889, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.5251985192298889, "logits_per_char": -0.13129962980747223, "num_chars": 4}, {"sum_logits": -0.9873195290565491, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -0.9873195290565491, "logits_per_char": -0.32910650968551636, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 867, "native_id": 679, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1143022775650024, "incorrect_loss_raw": 0.489386647939682, "correct_loss_per_char": 0.3714340925216675, "incorrect_loss_per_char": 0.1223466619849205, "correct_loss_per_token": 1.1143022775650024, "incorrect_loss_per_token": 0.489386647939682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.489386647939682, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.489386647939682, "logits_per_char": -0.1223466619849205, "num_chars": 4}, {"sum_logits": -1.1143022775650024, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.1143022775650024, "logits_per_char": -0.3714340925216675, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 868, "native_id": 2353, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.743990182876587, "incorrect_loss_raw": 0.2278221845626831, "correct_loss_per_char": 0.5813300609588623, "incorrect_loss_per_char": 0.056955546140670776, "correct_loss_per_token": 1.743990182876587, "incorrect_loss_per_token": 0.2278221845626831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2278221845626831, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.2278221845626831, "logits_per_char": -0.056955546140670776, "num_chars": 4}, {"sum_logits": -1.743990182876587, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.743990182876587, "logits_per_char": -0.5813300609588623, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 869, "native_id": 939, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3644879162311554, "incorrect_loss_raw": 1.361198902130127, "correct_loss_per_char": 0.09112197905778885, "incorrect_loss_per_char": 0.453732967376709, "correct_loss_per_token": 0.3644879162311554, "incorrect_loss_per_token": 1.361198902130127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3644879162311554, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.3644879162311554, "logits_per_char": -0.09112197905778885, "num_chars": 4}, {"sum_logits": -1.361198902130127, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.361198902130127, "logits_per_char": -0.453732967376709, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 870, "native_id": 1734, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32501518726348877, "incorrect_loss_raw": 1.4474259614944458, "correct_loss_per_char": 0.08125379681587219, "incorrect_loss_per_char": 0.4824753204981486, "correct_loss_per_token": 0.32501518726348877, "incorrect_loss_per_token": 1.4474259614944458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.32501518726348877, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.32501518726348877, "logits_per_char": -0.08125379681587219, "num_chars": 4}, {"sum_logits": -1.4474259614944458, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.4474259614944458, "logits_per_char": -0.4824753204981486, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 871, "native_id": 701, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3385467827320099, "incorrect_loss_raw": 1.3969860076904297, "correct_loss_per_char": 0.08463669568300247, "incorrect_loss_per_char": 0.46566200256347656, "correct_loss_per_token": 0.3385467827320099, "incorrect_loss_per_token": 1.3969860076904297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3385467827320099, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.3385467827320099, "logits_per_char": -0.08463669568300247, "num_chars": 4}, {"sum_logits": -1.3969860076904297, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.3969860076904297, "logits_per_char": -0.46566200256347656, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 872, "native_id": 1771, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37735792994499207, "incorrect_loss_raw": 1.3003979921340942, "correct_loss_per_char": 0.09433948248624802, "incorrect_loss_per_char": 0.43346599737803143, "correct_loss_per_token": 0.37735792994499207, "incorrect_loss_per_token": 1.3003979921340942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37735792994499207, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.37735792994499207, "logits_per_char": -0.09433948248624802, "num_chars": 4}, {"sum_logits": -1.3003979921340942, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.3003979921340942, "logits_per_char": -0.43346599737803143, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 873, "native_id": 2518, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.7098910808563232, "incorrect_loss_raw": 0.09809776395559311, "correct_loss_per_char": 0.9032970269521078, "incorrect_loss_per_char": 0.024524440988898277, "correct_loss_per_token": 2.7098910808563232, "incorrect_loss_per_token": 0.09809776395559311, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.09809776395559311, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.09809776395559311, "logits_per_char": -0.024524440988898277, "num_chars": 4}, {"sum_logits": -2.7098910808563232, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -2.7098910808563232, "logits_per_char": -0.9032970269521078, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 874, "native_id": 572, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.306837260723114, "incorrect_loss_raw": 1.5856270790100098, "correct_loss_per_char": 0.0767093151807785, "incorrect_loss_per_char": 0.5285423596700033, "correct_loss_per_token": 0.306837260723114, "incorrect_loss_per_token": 1.5856270790100098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.306837260723114, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.306837260723114, "logits_per_char": -0.0767093151807785, "num_chars": 4}, {"sum_logits": -1.5856270790100098, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.5856270790100098, "logits_per_char": -0.5285423596700033, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 875, "native_id": 1553, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1642359495162964, "incorrect_loss_raw": 0.4431331753730774, "correct_loss_per_char": 0.38807864983876544, "incorrect_loss_per_char": 0.11078329384326935, "correct_loss_per_token": 1.1642359495162964, "incorrect_loss_per_token": 0.4431331753730774, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4431331753730774, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.4431331753730774, "logits_per_char": -0.11078329384326935, "num_chars": 4}, {"sum_logits": -1.1642359495162964, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.1642359495162964, "logits_per_char": -0.38807864983876544, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 876, "native_id": 2051, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31701594591140747, "incorrect_loss_raw": 1.5768780708312988, "correct_loss_per_char": 0.07925398647785187, "incorrect_loss_per_char": 0.525626023610433, "correct_loss_per_token": 0.31701594591140747, "incorrect_loss_per_token": 1.5768780708312988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31701594591140747, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.31701594591140747, "logits_per_char": -0.07925398647785187, "num_chars": 4}, {"sum_logits": -1.5768780708312988, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.5768780708312988, "logits_per_char": -0.525626023610433, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 877, "native_id": 3162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6777176260948181, "incorrect_loss_raw": 0.874350905418396, "correct_loss_per_char": 0.16942940652370453, "incorrect_loss_per_char": 0.291450301806132, "correct_loss_per_token": 0.6777176260948181, "incorrect_loss_per_token": 0.874350905418396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6777176260948181, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.6777176260948181, "logits_per_char": -0.16942940652370453, "num_chars": 4}, {"sum_logits": -0.874350905418396, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -0.874350905418396, "logits_per_char": -0.291450301806132, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 878, "native_id": 2358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2744028568267822, "incorrect_loss_raw": 1.5251879692077637, "correct_loss_per_char": 0.06860071420669556, "incorrect_loss_per_char": 0.5083959897359213, "correct_loss_per_token": 0.2744028568267822, "incorrect_loss_per_token": 1.5251879692077637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2744028568267822, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.2744028568267822, "logits_per_char": -0.06860071420669556, "num_chars": 4}, {"sum_logits": -1.5251879692077637, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.5251879692077637, "logits_per_char": -0.5083959897359213, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 879, "native_id": 1579, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7891782522201538, "incorrect_loss_raw": 0.22344429790973663, "correct_loss_per_char": 0.5963927507400513, "incorrect_loss_per_char": 0.05586107447743416, "correct_loss_per_token": 1.7891782522201538, "incorrect_loss_per_token": 0.22344429790973663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.22344429790973663, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.22344429790973663, "logits_per_char": -0.05586107447743416, "num_chars": 4}, {"sum_logits": -1.7891782522201538, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.7891782522201538, "logits_per_char": -0.5963927507400513, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 880, "native_id": 3184, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4836243689060211, "incorrect_loss_raw": 1.0763218402862549, "correct_loss_per_char": 0.12090609222650528, "incorrect_loss_per_char": 0.35877394676208496, "correct_loss_per_token": 0.4836243689060211, "incorrect_loss_per_token": 1.0763218402862549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4836243689060211, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.4836243689060211, "logits_per_char": -0.12090609222650528, "num_chars": 4}, {"sum_logits": -1.0763218402862549, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.0763218402862549, "logits_per_char": -0.35877394676208496, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 881, "native_id": 2507, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9903507232666016, "incorrect_loss_raw": 0.18373064696788788, "correct_loss_per_char": 0.6634502410888672, "incorrect_loss_per_char": 0.04593266174197197, "correct_loss_per_token": 1.9903507232666016, "incorrect_loss_per_token": 0.18373064696788788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18373064696788788, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.18373064696788788, "logits_per_char": -0.04593266174197197, "num_chars": 4}, {"sum_logits": -1.9903507232666016, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.9903507232666016, "logits_per_char": -0.6634502410888672, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 882, "native_id": 1134, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8407889604568481, "incorrect_loss_raw": 0.6433858871459961, "correct_loss_per_char": 0.2802629868189494, "incorrect_loss_per_char": 0.16084647178649902, "correct_loss_per_token": 0.8407889604568481, "incorrect_loss_per_token": 0.6433858871459961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6433858871459961, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.6433858871459961, "logits_per_char": -0.16084647178649902, "num_chars": 4}, {"sum_logits": -0.8407889604568481, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -0.8407889604568481, "logits_per_char": -0.2802629868189494, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 883, "native_id": 2696, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2301231175661087, "incorrect_loss_raw": 1.7842501401901245, "correct_loss_per_char": 0.057530779391527176, "incorrect_loss_per_char": 0.5947500467300415, "correct_loss_per_token": 0.2301231175661087, "incorrect_loss_per_token": 1.7842501401901245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2301231175661087, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": true, "logits_per_token": -0.2301231175661087, "logits_per_char": -0.057530779391527176, "num_chars": 4}, {"sum_logits": -1.7842501401901245, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.7842501401901245, "logits_per_char": -0.5947500467300415, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 884, "native_id": 585, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5143371820449829, "incorrect_loss_raw": 1.0506718158721924, "correct_loss_per_char": 0.17144572734832764, "incorrect_loss_per_char": 0.2626679539680481, "correct_loss_per_token": 0.5143371820449829, "incorrect_loss_per_token": 1.0506718158721924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0506718158721924, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -1.0506718158721924, "logits_per_char": -0.2626679539680481, "num_chars": 4}, {"sum_logits": -0.5143371820449829, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.5143371820449829, "logits_per_char": -0.17144572734832764, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 885, "native_id": 1465, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4813607335090637, "incorrect_loss_raw": 1.1678180694580078, "correct_loss_per_char": 0.12034018337726593, "incorrect_loss_per_char": 0.38927268981933594, "correct_loss_per_token": 0.4813607335090637, "incorrect_loss_per_token": 1.1678180694580078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4813607335090637, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.4813607335090637, "logits_per_char": -0.12034018337726593, "num_chars": 4}, {"sum_logits": -1.1678180694580078, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.1678180694580078, "logits_per_char": -0.38927268981933594, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 886, "native_id": 538, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39647483825683594, "incorrect_loss_raw": 1.2891240119934082, "correct_loss_per_char": 0.09911870956420898, "incorrect_loss_per_char": 0.42970800399780273, "correct_loss_per_token": 0.39647483825683594, "incorrect_loss_per_token": 1.2891240119934082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39647483825683594, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.39647483825683594, "logits_per_char": -0.09911870956420898, "num_chars": 4}, {"sum_logits": -1.2891240119934082, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.2891240119934082, "logits_per_char": -0.42970800399780273, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 887, "native_id": 1069, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4206210970878601, "incorrect_loss_raw": 1.2741570472717285, "correct_loss_per_char": 0.10515527427196503, "incorrect_loss_per_char": 0.42471901575724286, "correct_loss_per_token": 0.4206210970878601, "incorrect_loss_per_token": 1.2741570472717285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4206210970878601, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.4206210970878601, "logits_per_char": -0.10515527427196503, "num_chars": 4}, {"sum_logits": -1.2741570472717285, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.2741570472717285, "logits_per_char": -0.42471901575724286, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 888, "native_id": 1275, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7798509001731873, "incorrect_loss_raw": 0.7147789001464844, "correct_loss_per_char": 0.25995030005772907, "incorrect_loss_per_char": 0.1786947250366211, "correct_loss_per_token": 0.7798509001731873, "incorrect_loss_per_token": 0.7147789001464844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7147789001464844, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.7147789001464844, "logits_per_char": -0.1786947250366211, "num_chars": 4}, {"sum_logits": -0.7798509001731873, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -0.7798509001731873, "logits_per_char": -0.25995030005772907, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 889, "native_id": 2734, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15837103128433228, "incorrect_loss_raw": 2.1218550205230713, "correct_loss_per_char": 0.03959275782108307, "incorrect_loss_per_char": 0.7072850068410238, "correct_loss_per_token": 0.15837103128433228, "incorrect_loss_per_token": 2.1218550205230713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15837103128433228, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.15837103128433228, "logits_per_char": -0.03959275782108307, "num_chars": 4}, {"sum_logits": -2.1218550205230713, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -2.1218550205230713, "logits_per_char": -0.7072850068410238, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 890, "native_id": 1209, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.1947193741798401, "incorrect_loss_raw": 2.010594606399536, "correct_loss_per_char": 0.04867984354496002, "incorrect_loss_per_char": 0.6701982021331787, "correct_loss_per_token": 0.1947193741798401, "incorrect_loss_per_token": 2.010594606399536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1947193741798401, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.1947193741798401, "logits_per_char": -0.04867984354496002, "num_chars": 4}, {"sum_logits": -2.010594606399536, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -2.010594606399536, "logits_per_char": -0.6701982021331787, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 891, "native_id": 2634, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.087238073348999, "incorrect_loss_raw": 0.17150306701660156, "correct_loss_per_char": 0.6957460244496664, "incorrect_loss_per_char": 0.04287576675415039, "correct_loss_per_token": 2.087238073348999, "incorrect_loss_per_token": 0.17150306701660156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17150306701660156, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.17150306701660156, "logits_per_char": -0.04287576675415039, "num_chars": 4}, {"sum_logits": -2.087238073348999, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -2.087238073348999, "logits_per_char": -0.6957460244496664, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 892, "native_id": 2939, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16831129789352417, "incorrect_loss_raw": 2.0999011993408203, "correct_loss_per_char": 0.04207782447338104, "incorrect_loss_per_char": 0.6999670664469401, "correct_loss_per_token": 0.16831129789352417, "incorrect_loss_per_token": 2.0999011993408203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16831129789352417, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.16831129789352417, "logits_per_char": -0.04207782447338104, "num_chars": 4}, {"sum_logits": -2.0999011993408203, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -2.0999011993408203, "logits_per_char": -0.6999670664469401, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 893, "native_id": 1865, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2088789939880371, "incorrect_loss_raw": 1.8064961433410645, "correct_loss_per_char": 0.05221974849700928, "incorrect_loss_per_char": 0.6021653811136881, "correct_loss_per_token": 0.2088789939880371, "incorrect_loss_per_token": 1.8064961433410645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2088789939880371, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.2088789939880371, "logits_per_char": -0.05221974849700928, "num_chars": 4}, {"sum_logits": -1.8064961433410645, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.8064961433410645, "logits_per_char": -0.6021653811136881, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 894, "native_id": 239, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.864135980606079, "incorrect_loss_raw": 0.20709918439388275, "correct_loss_per_char": 0.6213786602020264, "incorrect_loss_per_char": 0.05177479609847069, "correct_loss_per_token": 1.864135980606079, "incorrect_loss_per_token": 0.20709918439388275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20709918439388275, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.20709918439388275, "logits_per_char": -0.05177479609847069, "num_chars": 4}, {"sum_logits": -1.864135980606079, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.864135980606079, "logits_per_char": -0.6213786602020264, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 895, "native_id": 2931, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4854111075401306, "incorrect_loss_raw": 1.1353061199188232, "correct_loss_per_char": 0.12135277688503265, "incorrect_loss_per_char": 0.3784353733062744, "correct_loss_per_token": 0.4854111075401306, "incorrect_loss_per_token": 1.1353061199188232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4854111075401306, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.4854111075401306, "logits_per_char": -0.12135277688503265, "num_chars": 4}, {"sum_logits": -1.1353061199188232, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -1.1353061199188232, "logits_per_char": -0.3784353733062744, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 896, "native_id": 1718, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1347507238388062, "incorrect_loss_raw": 0.4334903359413147, "correct_loss_per_char": 0.37825024127960205, "incorrect_loss_per_char": 0.10837258398532867, "correct_loss_per_token": 1.1347507238388062, "incorrect_loss_per_token": 0.4334903359413147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4334903359413147, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.4334903359413147, "logits_per_char": -0.10837258398532867, "num_chars": 4}, {"sum_logits": -1.1347507238388062, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.1347507238388062, "logits_per_char": -0.37825024127960205, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 897, "native_id": 1510, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29703444242477417, "incorrect_loss_raw": 1.4640429019927979, "correct_loss_per_char": 0.07425861060619354, "incorrect_loss_per_char": 0.48801430066426593, "correct_loss_per_token": 0.29703444242477417, "incorrect_loss_per_token": 1.4640429019927979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29703444242477417, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.29703444242477417, "logits_per_char": -0.07425861060619354, "num_chars": 4}, {"sum_logits": -1.4640429019927979, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.4640429019927979, "logits_per_char": -0.48801430066426593, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 898, "native_id": 203, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9293700456619263, "incorrect_loss_raw": 0.6253899335861206, "correct_loss_per_char": 0.3097900152206421, "incorrect_loss_per_char": 0.15634748339653015, "correct_loss_per_token": 0.9293700456619263, "incorrect_loss_per_token": 0.6253899335861206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6253899335861206, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.6253899335861206, "logits_per_char": -0.15634748339653015, "num_chars": 4}, {"sum_logits": -0.9293700456619263, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.9293700456619263, "logits_per_char": -0.3097900152206421, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 899, "native_id": 2926, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21920284628868103, "incorrect_loss_raw": 1.8842339515686035, "correct_loss_per_char": 0.05480071157217026, "incorrect_loss_per_char": 0.6280779838562012, "correct_loss_per_token": 0.21920284628868103, "incorrect_loss_per_token": 1.8842339515686035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21920284628868103, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.21920284628868103, "logits_per_char": -0.05480071157217026, "num_chars": 4}, {"sum_logits": -1.8842339515686035, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.8842339515686035, "logits_per_char": -0.6280779838562012, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 900, "native_id": 2824, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27737218141555786, "incorrect_loss_raw": 1.6912856101989746, "correct_loss_per_char": 0.06934304535388947, "incorrect_loss_per_char": 0.5637618700663248, "correct_loss_per_token": 0.27737218141555786, "incorrect_loss_per_token": 1.6912856101989746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.27737218141555786, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.27737218141555786, "logits_per_char": -0.06934304535388947, "num_chars": 4}, {"sum_logits": -1.6912856101989746, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.6912856101989746, "logits_per_char": -0.5637618700663248, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 901, "native_id": 2076, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4891837239265442, "incorrect_loss_raw": 1.157841682434082, "correct_loss_per_char": 0.12229593098163605, "incorrect_loss_per_char": 0.38594722747802734, "correct_loss_per_token": 0.4891837239265442, "incorrect_loss_per_token": 1.157841682434082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4891837239265442, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.4891837239265442, "logits_per_char": -0.12229593098163605, "num_chars": 4}, {"sum_logits": -1.157841682434082, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.157841682434082, "logits_per_char": -0.38594722747802734, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 902, "native_id": 2944, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21269795298576355, "incorrect_loss_raw": 1.828444242477417, "correct_loss_per_char": 0.05317448824644089, "incorrect_loss_per_char": 0.609481414159139, "correct_loss_per_token": 0.21269795298576355, "incorrect_loss_per_token": 1.828444242477417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21269795298576355, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.21269795298576355, "logits_per_char": -0.05317448824644089, "num_chars": 4}, {"sum_logits": -1.828444242477417, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.828444242477417, "logits_per_char": -0.609481414159139, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 903, "native_id": 2745, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.11429116129875183, "incorrect_loss_raw": 2.433671236038208, "correct_loss_per_char": 0.028572790324687958, "incorrect_loss_per_char": 0.8112237453460693, "correct_loss_per_token": 0.11429116129875183, "incorrect_loss_per_token": 2.433671236038208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.11429116129875183, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.11429116129875183, "logits_per_char": -0.028572790324687958, "num_chars": 4}, {"sum_logits": -2.433671236038208, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -2.433671236038208, "logits_per_char": -0.8112237453460693, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 904, "native_id": 1255, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.15754958987236023, "incorrect_loss_raw": 2.0505335330963135, "correct_loss_per_char": 0.03938739746809006, "incorrect_loss_per_char": 0.6835111776987711, "correct_loss_per_token": 0.15754958987236023, "incorrect_loss_per_token": 2.0505335330963135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.15754958987236023, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.15754958987236023, "logits_per_char": -0.03938739746809006, "num_chars": 4}, {"sum_logits": -2.0505335330963135, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -2.0505335330963135, "logits_per_char": -0.6835111776987711, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 905, "native_id": 776, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5130385160446167, "incorrect_loss_raw": 0.29814597964286804, "correct_loss_per_char": 0.5043461720148722, "incorrect_loss_per_char": 0.07453649491071701, "correct_loss_per_token": 1.5130385160446167, "incorrect_loss_per_token": 0.29814597964286804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29814597964286804, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.29814597964286804, "logits_per_char": -0.07453649491071701, "num_chars": 4}, {"sum_logits": -1.5130385160446167, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.5130385160446167, "logits_per_char": -0.5043461720148722, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 906, "native_id": 2392, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5236672163009644, "incorrect_loss_raw": 1.0068156719207764, "correct_loss_per_char": 0.1309168040752411, "incorrect_loss_per_char": 0.3356052239735921, "correct_loss_per_token": 0.5236672163009644, "incorrect_loss_per_token": 1.0068156719207764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5236672163009644, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.5236672163009644, "logits_per_char": -0.1309168040752411, "num_chars": 4}, {"sum_logits": -1.0068156719207764, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.0068156719207764, "logits_per_char": -0.3356052239735921, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 907, "native_id": 1588, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0686148405075073, "incorrect_loss_raw": 0.48443853855133057, "correct_loss_per_char": 0.35620494683583576, "incorrect_loss_per_char": 0.12110963463783264, "correct_loss_per_token": 1.0686148405075073, "incorrect_loss_per_token": 0.48443853855133057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48443853855133057, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.48443853855133057, "logits_per_char": -0.12110963463783264, "num_chars": 4}, {"sum_logits": -1.0686148405075073, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.0686148405075073, "logits_per_char": -0.35620494683583576, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 908, "native_id": 1156, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19116191565990448, "incorrect_loss_raw": 2.0455846786499023, "correct_loss_per_char": 0.04779047891497612, "incorrect_loss_per_char": 0.6818615595499674, "correct_loss_per_token": 0.19116191565990448, "incorrect_loss_per_token": 2.0455846786499023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19116191565990448, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.19116191565990448, "logits_per_char": -0.04779047891497612, "num_chars": 4}, {"sum_logits": -2.0455846786499023, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -2.0455846786499023, "logits_per_char": -0.6818615595499674, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 909, "native_id": 1295, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2519364655017853, "incorrect_loss_raw": 1.832893967628479, "correct_loss_per_char": 0.06298411637544632, "incorrect_loss_per_char": 0.6109646558761597, "correct_loss_per_token": 0.2519364655017853, "incorrect_loss_per_token": 1.832893967628479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2519364655017853, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.2519364655017853, "logits_per_char": -0.06298411637544632, "num_chars": 4}, {"sum_logits": -1.832893967628479, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.832893967628479, "logits_per_char": -0.6109646558761597, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 910, "native_id": 2298, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2109568864107132, "incorrect_loss_raw": 1.9503377676010132, "correct_loss_per_char": 0.0527392216026783, "incorrect_loss_per_char": 0.6501125892003378, "correct_loss_per_token": 0.2109568864107132, "incorrect_loss_per_token": 1.9503377676010132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2109568864107132, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.2109568864107132, "logits_per_char": -0.0527392216026783, "num_chars": 4}, {"sum_logits": -1.9503377676010132, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.9503377676010132, "logits_per_char": -0.6501125892003378, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 911, "native_id": 1574, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6028039455413818, "incorrect_loss_raw": 0.2757527530193329, "correct_loss_per_char": 0.5342679818471273, "incorrect_loss_per_char": 0.06893818825483322, "correct_loss_per_token": 1.6028039455413818, "incorrect_loss_per_token": 0.2757527530193329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2757527530193329, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.2757527530193329, "logits_per_char": -0.06893818825483322, "num_chars": 4}, {"sum_logits": -1.6028039455413818, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.6028039455413818, "logits_per_char": -0.5342679818471273, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 912, "native_id": 1702, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20578178763389587, "incorrect_loss_raw": 1.8478878736495972, "correct_loss_per_char": 0.05144544690847397, "incorrect_loss_per_char": 0.6159626245498657, "correct_loss_per_token": 0.20578178763389587, "incorrect_loss_per_token": 1.8478878736495972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20578178763389587, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.20578178763389587, "logits_per_char": -0.05144544690847397, "num_chars": 4}, {"sum_logits": -1.8478878736495972, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.8478878736495972, "logits_per_char": -0.6159626245498657, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 913, "native_id": 3048, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17773963510990143, "incorrect_loss_raw": 2.082547187805176, "correct_loss_per_char": 0.04443490877747536, "incorrect_loss_per_char": 0.6941823959350586, "correct_loss_per_token": 0.17773963510990143, "incorrect_loss_per_token": 2.082547187805176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17773963510990143, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.17773963510990143, "logits_per_char": -0.04443490877747536, "num_chars": 4}, {"sum_logits": -2.082547187805176, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -2.082547187805176, "logits_per_char": -0.6941823959350586, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 914, "native_id": 2535, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8433718085289001, "incorrect_loss_raw": 0.6264360547065735, "correct_loss_per_char": 0.28112393617630005, "incorrect_loss_per_char": 0.15660901367664337, "correct_loss_per_token": 0.8433718085289001, "incorrect_loss_per_token": 0.6264360547065735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6264360547065735, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.6264360547065735, "logits_per_char": -0.15660901367664337, "num_chars": 4}, {"sum_logits": -0.8433718085289001, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -0.8433718085289001, "logits_per_char": -0.28112393617630005, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 915, "native_id": 2998, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.588395118713379, "incorrect_loss_raw": 0.3016400933265686, "correct_loss_per_char": 0.5294650395711263, "incorrect_loss_per_char": 0.07541002333164215, "correct_loss_per_token": 1.588395118713379, "incorrect_loss_per_token": 0.3016400933265686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3016400933265686, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.3016400933265686, "logits_per_char": -0.07541002333164215, "num_chars": 4}, {"sum_logits": -1.588395118713379, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.588395118713379, "logits_per_char": -0.5294650395711263, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 916, "native_id": 230, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.317286878824234, "incorrect_loss_raw": 1.4166419506072998, "correct_loss_per_char": 0.0793217197060585, "incorrect_loss_per_char": 0.4722139835357666, "correct_loss_per_token": 0.317286878824234, "incorrect_loss_per_token": 1.4166419506072998, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.317286878824234, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.317286878824234, "logits_per_char": -0.0793217197060585, "num_chars": 4}, {"sum_logits": -1.4166419506072998, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.4166419506072998, "logits_per_char": -0.4722139835357666, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 917, "native_id": 2813, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3355364799499512, "incorrect_loss_raw": 0.37886470556259155, "correct_loss_per_char": 0.4451788266499837, "incorrect_loss_per_char": 0.09471617639064789, "correct_loss_per_token": 1.3355364799499512, "incorrect_loss_per_token": 0.37886470556259155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37886470556259155, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.37886470556259155, "logits_per_char": -0.09471617639064789, "num_chars": 4}, {"sum_logits": -1.3355364799499512, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.3355364799499512, "logits_per_char": -0.4451788266499837, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 918, "native_id": 1052, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.249800682067871, "incorrect_loss_raw": 0.39510029554367065, "correct_loss_per_char": 0.41660022735595703, "incorrect_loss_per_char": 0.09877507388591766, "correct_loss_per_token": 1.249800682067871, "incorrect_loss_per_token": 0.39510029554367065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39510029554367065, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.39510029554367065, "logits_per_char": -0.09877507388591766, "num_chars": 4}, {"sum_logits": -1.249800682067871, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.249800682067871, "logits_per_char": -0.41660022735595703, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 919, "native_id": 798, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2775070369243622, "incorrect_loss_raw": 1.5990557670593262, "correct_loss_per_char": 0.06937675923109055, "incorrect_loss_per_char": 0.5330185890197754, "correct_loss_per_token": 0.2775070369243622, "incorrect_loss_per_token": 1.5990557670593262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2775070369243622, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.2775070369243622, "logits_per_char": -0.06937675923109055, "num_chars": 4}, {"sum_logits": -1.5990557670593262, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.5990557670593262, "logits_per_char": -0.5330185890197754, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 920, "native_id": 1291, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3792722821235657, "incorrect_loss_raw": 1.3437663316726685, "correct_loss_per_char": 0.09481807053089142, "incorrect_loss_per_char": 0.44792211055755615, "correct_loss_per_token": 0.3792722821235657, "incorrect_loss_per_token": 1.3437663316726685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3792722821235657, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.3792722821235657, "logits_per_char": -0.09481807053089142, "num_chars": 4}, {"sum_logits": -1.3437663316726685, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3437663316726685, "logits_per_char": -0.44792211055755615, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 921, "native_id": 388, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5861530303955078, "incorrect_loss_raw": 0.9183236360549927, "correct_loss_per_char": 0.14653825759887695, "incorrect_loss_per_char": 0.30610787868499756, "correct_loss_per_token": 0.5861530303955078, "incorrect_loss_per_token": 0.9183236360549927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5861530303955078, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.5861530303955078, "logits_per_char": -0.14653825759887695, "num_chars": 4}, {"sum_logits": -0.9183236360549927, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -0.9183236360549927, "logits_per_char": -0.30610787868499756, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 922, "native_id": 1650, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1475613117218018, "incorrect_loss_raw": 0.45205792784690857, "correct_loss_per_char": 0.3825204372406006, "incorrect_loss_per_char": 0.11301448196172714, "correct_loss_per_token": 1.1475613117218018, "incorrect_loss_per_token": 0.45205792784690857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45205792784690857, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.45205792784690857, "logits_per_char": -0.11301448196172714, "num_chars": 4}, {"sum_logits": -1.1475613117218018, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.1475613117218018, "logits_per_char": -0.3825204372406006, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 923, "native_id": 1495, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2285471260547638, "incorrect_loss_raw": 1.822185754776001, "correct_loss_per_char": 0.05713678151369095, "incorrect_loss_per_char": 0.6073952515920004, "correct_loss_per_token": 0.2285471260547638, "incorrect_loss_per_token": 1.822185754776001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2285471260547638, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.2285471260547638, "logits_per_char": -0.05713678151369095, "num_chars": 4}, {"sum_logits": -1.822185754776001, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.822185754776001, "logits_per_char": -0.6073952515920004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 924, "native_id": 1493, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47206467390060425, "incorrect_loss_raw": 1.1288261413574219, "correct_loss_per_char": 0.11801616847515106, "incorrect_loss_per_char": 0.37627538045247394, "correct_loss_per_token": 0.47206467390060425, "incorrect_loss_per_token": 1.1288261413574219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47206467390060425, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.47206467390060425, "logits_per_char": -0.11801616847515106, "num_chars": 4}, {"sum_logits": -1.1288261413574219, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.1288261413574219, "logits_per_char": -0.37627538045247394, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 925, "native_id": 1749, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.564331293106079, "incorrect_loss_raw": 0.11314613372087479, "correct_loss_per_char": 0.8547770977020264, "incorrect_loss_per_char": 0.028286533430218697, "correct_loss_per_token": 2.564331293106079, "incorrect_loss_per_token": 0.11314613372087479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.11314613372087479, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.11314613372087479, "logits_per_char": -0.028286533430218697, "num_chars": 4}, {"sum_logits": -2.564331293106079, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -2.564331293106079, "logits_per_char": -0.8547770977020264, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 926, "native_id": 1214, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2657339870929718, "incorrect_loss_raw": 1.6097171306610107, "correct_loss_per_char": 0.06643349677324295, "incorrect_loss_per_char": 0.5365723768870035, "correct_loss_per_token": 0.2657339870929718, "incorrect_loss_per_token": 1.6097171306610107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2657339870929718, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.2657339870929718, "logits_per_char": -0.06643349677324295, "num_chars": 4}, {"sum_logits": -1.6097171306610107, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.6097171306610107, "logits_per_char": -0.5365723768870035, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 927, "native_id": 1592, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6559408903121948, "incorrect_loss_raw": 0.29801592230796814, "correct_loss_per_char": 0.5519802967707316, "incorrect_loss_per_char": 0.07450398057699203, "correct_loss_per_token": 1.6559408903121948, "incorrect_loss_per_token": 0.29801592230796814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.29801592230796814, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.29801592230796814, "logits_per_char": -0.07450398057699203, "num_chars": 4}, {"sum_logits": -1.6559408903121948, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.6559408903121948, "logits_per_char": -0.5519802967707316, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 928, "native_id": 2799, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1611394882202148, "incorrect_loss_raw": 0.46188271045684814, "correct_loss_per_char": 0.38704649607340497, "incorrect_loss_per_char": 0.11547067761421204, "correct_loss_per_token": 1.1611394882202148, "incorrect_loss_per_token": 0.46188271045684814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46188271045684814, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.46188271045684814, "logits_per_char": -0.11547067761421204, "num_chars": 4}, {"sum_logits": -1.1611394882202148, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -1.1611394882202148, "logits_per_char": -0.38704649607340497, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 929, "native_id": 1154, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2314244508743286, "incorrect_loss_raw": 1.7125200033187866, "correct_loss_per_char": 0.05785611271858215, "incorrect_loss_per_char": 0.5708400011062622, "correct_loss_per_token": 0.2314244508743286, "incorrect_loss_per_token": 1.7125200033187866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2314244508743286, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.2314244508743286, "logits_per_char": -0.05785611271858215, "num_chars": 4}, {"sum_logits": -1.7125200033187866, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.7125200033187866, "logits_per_char": -0.5708400011062622, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 930, "native_id": 2351, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20463930070400238, "incorrect_loss_raw": 1.8240501880645752, "correct_loss_per_char": 0.051159825176000595, "incorrect_loss_per_char": 0.6080167293548584, "correct_loss_per_token": 0.20463930070400238, "incorrect_loss_per_token": 1.8240501880645752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.20463930070400238, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.20463930070400238, "logits_per_char": -0.051159825176000595, "num_chars": 4}, {"sum_logits": -1.8240501880645752, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.8240501880645752, "logits_per_char": -0.6080167293548584, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 931, "native_id": 694, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31957685947418213, "incorrect_loss_raw": 1.4966977834701538, "correct_loss_per_char": 0.07989421486854553, "incorrect_loss_per_char": 0.49889926115671795, "correct_loss_per_token": 0.31957685947418213, "incorrect_loss_per_token": 1.4966977834701538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.31957685947418213, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.31957685947418213, "logits_per_char": -0.07989421486854553, "num_chars": 4}, {"sum_logits": -1.4966977834701538, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.4966977834701538, "logits_per_char": -0.49889926115671795, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 932, "native_id": 3183, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40240412950515747, "incorrect_loss_raw": 1.2253079414367676, "correct_loss_per_char": 0.10060103237628937, "incorrect_loss_per_char": 0.40843598047892254, "correct_loss_per_token": 0.40240412950515747, "incorrect_loss_per_token": 1.2253079414367676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40240412950515747, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.40240412950515747, "logits_per_char": -0.10060103237628937, "num_chars": 4}, {"sum_logits": -1.2253079414367676, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.2253079414367676, "logits_per_char": -0.40843598047892254, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 933, "native_id": 2327, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2287920713424683, "incorrect_loss_raw": 0.41403496265411377, "correct_loss_per_char": 0.40959735711415607, "incorrect_loss_per_char": 0.10350874066352844, "correct_loss_per_token": 1.2287920713424683, "incorrect_loss_per_token": 0.41403496265411377, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41403496265411377, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.41403496265411377, "logits_per_char": -0.10350874066352844, "num_chars": 4}, {"sum_logits": -1.2287920713424683, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.2287920713424683, "logits_per_char": -0.40959735711415607, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 934, "native_id": 1470, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3215101957321167, "incorrect_loss_raw": 0.38347017765045166, "correct_loss_per_char": 0.44050339857737225, "incorrect_loss_per_char": 0.09586754441261292, "correct_loss_per_token": 1.3215101957321167, "incorrect_loss_per_token": 0.38347017765045166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38347017765045166, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.38347017765045166, "logits_per_char": -0.09586754441261292, "num_chars": 4}, {"sum_logits": -1.3215101957321167, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.3215101957321167, "logits_per_char": -0.44050339857737225, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 935, "native_id": 822, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.514883041381836, "incorrect_loss_raw": 0.3180168569087982, "correct_loss_per_char": 0.5049610137939453, "incorrect_loss_per_char": 0.07950421422719955, "correct_loss_per_token": 1.514883041381836, "incorrect_loss_per_token": 0.3180168569087982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3180168569087982, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.3180168569087982, "logits_per_char": -0.07950421422719955, "num_chars": 4}, {"sum_logits": -1.514883041381836, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -1.514883041381836, "logits_per_char": -0.5049610137939453, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 936, "native_id": 3095, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1571626663208008, "incorrect_loss_raw": 0.4520469307899475, "correct_loss_per_char": 0.3857208887736003, "incorrect_loss_per_char": 0.11301173269748688, "correct_loss_per_token": 1.1571626663208008, "incorrect_loss_per_token": 0.4520469307899475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4520469307899475, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.4520469307899475, "logits_per_char": -0.11301173269748688, "num_chars": 4}, {"sum_logits": -1.1571626663208008, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.1571626663208008, "logits_per_char": -0.3857208887736003, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 937, "native_id": 3243, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.373671293258667, "incorrect_loss_raw": 1.3270792961120605, "correct_loss_per_char": 0.09341782331466675, "incorrect_loss_per_char": 0.44235976537068683, "correct_loss_per_token": 0.373671293258667, "incorrect_loss_per_token": 1.3270792961120605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.373671293258667, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.373671293258667, "logits_per_char": -0.09341782331466675, "num_chars": 4}, {"sum_logits": -1.3270792961120605, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.3270792961120605, "logits_per_char": -0.44235976537068683, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 938, "native_id": 254, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5938417911529541, "incorrect_loss_raw": 0.9634969234466553, "correct_loss_per_char": 0.14846044778823853, "incorrect_loss_per_char": 0.3211656411488851, "correct_loss_per_token": 0.5938417911529541, "incorrect_loss_per_token": 0.9634969234466553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5938417911529541, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.5938417911529541, "logits_per_char": -0.14846044778823853, "num_chars": 4}, {"sum_logits": -0.9634969234466553, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -0.9634969234466553, "logits_per_char": -0.3211656411488851, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 939, "native_id": 1544, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3364499807357788, "incorrect_loss_raw": 1.4519027471542358, "correct_loss_per_char": 0.0841124951839447, "incorrect_loss_per_char": 0.4839675823847453, "correct_loss_per_token": 0.3364499807357788, "incorrect_loss_per_token": 1.4519027471542358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3364499807357788, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -0.3364499807357788, "logits_per_char": -0.0841124951839447, "num_chars": 4}, {"sum_logits": -1.4519027471542358, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -1.4519027471542358, "logits_per_char": -0.4839675823847453, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 940, "native_id": 2997, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.57105952501297, "incorrect_loss_raw": 0.9383420944213867, "correct_loss_per_char": 0.1427648812532425, "incorrect_loss_per_char": 0.3127806981404622, "correct_loss_per_token": 0.57105952501297, "incorrect_loss_per_token": 0.9383420944213867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.57105952501297, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.57105952501297, "logits_per_char": -0.1427648812532425, "num_chars": 4}, {"sum_logits": -0.9383420944213867, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -0.9383420944213867, "logits_per_char": -0.3127806981404622, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 941, "native_id": 2337, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25622254610061646, "incorrect_loss_raw": 1.7156541347503662, "correct_loss_per_char": 0.06405563652515411, "incorrect_loss_per_char": 0.5718847115834554, "correct_loss_per_token": 0.25622254610061646, "incorrect_loss_per_token": 1.7156541347503662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25622254610061646, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.25622254610061646, "logits_per_char": -0.06405563652515411, "num_chars": 4}, {"sum_logits": -1.7156541347503662, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.7156541347503662, "logits_per_char": -0.5718847115834554, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 942, "native_id": 543, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.818576693534851, "incorrect_loss_raw": 0.2184269279241562, "correct_loss_per_char": 0.6061922311782837, "incorrect_loss_per_char": 0.05460673198103905, "correct_loss_per_token": 1.818576693534851, "incorrect_loss_per_token": 0.2184269279241562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2184269279241562, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": true, "logits_per_token": -0.2184269279241562, "logits_per_char": -0.05460673198103905, "num_chars": 4}, {"sum_logits": -1.818576693534851, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.818576693534851, "logits_per_char": -0.6061922311782837, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 943, "native_id": 970, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.382926344871521, "incorrect_loss_raw": 1.2790679931640625, "correct_loss_per_char": 0.09573158621788025, "incorrect_loss_per_char": 0.4263559977213542, "correct_loss_per_token": 0.382926344871521, "incorrect_loss_per_token": 1.2790679931640625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.382926344871521, "num_tokens": 1, "num_tokens_all": 1227, "is_greedy": true, "logits_per_token": -0.382926344871521, "logits_per_char": -0.09573158621788025, "num_chars": 4}, {"sum_logits": -1.2790679931640625, "num_tokens": 1, "num_tokens_all": 1227, "is_greedy": false, "logits_per_token": -1.2790679931640625, "logits_per_char": -0.4263559977213542, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 944, "native_id": 1538, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19695737957954407, "incorrect_loss_raw": 2.063800096511841, "correct_loss_per_char": 0.04923934489488602, "incorrect_loss_per_char": 0.6879333655039469, "correct_loss_per_token": 0.19695737957954407, "incorrect_loss_per_token": 2.063800096511841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19695737957954407, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.19695737957954407, "logits_per_char": -0.04923934489488602, "num_chars": 4}, {"sum_logits": -2.063800096511841, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -2.063800096511841, "logits_per_char": -0.6879333655039469, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 945, "native_id": 3051, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6482670903205872, "incorrect_loss_raw": 0.8433557748794556, "correct_loss_per_char": 0.2160890301068624, "incorrect_loss_per_char": 0.2108389437198639, "correct_loss_per_token": 0.6482670903205872, "incorrect_loss_per_token": 0.8433557748794556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8433557748794556, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -0.8433557748794556, "logits_per_char": -0.2108389437198639, "num_chars": 4}, {"sum_logits": -0.6482670903205872, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.6482670903205872, "logits_per_char": -0.2160890301068624, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 946, "native_id": 2948, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.735844075679779, "incorrect_loss_raw": 0.7621529698371887, "correct_loss_per_char": 0.18396101891994476, "incorrect_loss_per_char": 0.25405098994572956, "correct_loss_per_token": 0.735844075679779, "incorrect_loss_per_token": 0.7621529698371887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.735844075679779, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.735844075679779, "logits_per_char": -0.18396101891994476, "num_chars": 4}, {"sum_logits": -0.7621529698371887, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.7621529698371887, "logits_per_char": -0.25405098994572956, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 947, "native_id": 1683, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0057570934295654, "incorrect_loss_raw": 0.5617397427558899, "correct_loss_per_char": 0.3352523644765218, "incorrect_loss_per_char": 0.14043493568897247, "correct_loss_per_token": 1.0057570934295654, "incorrect_loss_per_token": 0.5617397427558899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5617397427558899, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.5617397427558899, "logits_per_char": -0.14043493568897247, "num_chars": 4}, {"sum_logits": -1.0057570934295654, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.0057570934295654, "logits_per_char": -0.3352523644765218, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 948, "native_id": 1040, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9636873006820679, "incorrect_loss_raw": 0.19121281802654266, "correct_loss_per_char": 0.6545624335606893, "incorrect_loss_per_char": 0.047803204506635666, "correct_loss_per_token": 1.9636873006820679, "incorrect_loss_per_token": 0.19121281802654266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.19121281802654266, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.19121281802654266, "logits_per_char": -0.047803204506635666, "num_chars": 4}, {"sum_logits": -1.9636873006820679, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.9636873006820679, "logits_per_char": -0.6545624335606893, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 949, "native_id": 914, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3813599944114685, "incorrect_loss_raw": 1.2698805332183838, "correct_loss_per_char": 0.09533999860286713, "incorrect_loss_per_char": 0.4232935110727946, "correct_loss_per_token": 0.3813599944114685, "incorrect_loss_per_token": 1.2698805332183838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3813599944114685, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.3813599944114685, "logits_per_char": -0.09533999860286713, "num_chars": 4}, {"sum_logits": -1.2698805332183838, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.2698805332183838, "logits_per_char": -0.4232935110727946, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 950, "native_id": 2897, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16292592883110046, "incorrect_loss_raw": 2.2533740997314453, "correct_loss_per_char": 0.040731482207775116, "incorrect_loss_per_char": 0.7511246999104818, "correct_loss_per_token": 0.16292592883110046, "incorrect_loss_per_token": 2.2533740997314453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.16292592883110046, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.16292592883110046, "logits_per_char": -0.040731482207775116, "num_chars": 4}, {"sum_logits": -2.2533740997314453, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -2.2533740997314453, "logits_per_char": -0.7511246999104818, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 951, "native_id": 2274, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3930622637271881, "incorrect_loss_raw": 1.3477377891540527, "correct_loss_per_char": 0.09826556593179703, "incorrect_loss_per_char": 0.4492459297180176, "correct_loss_per_token": 0.3930622637271881, "incorrect_loss_per_token": 1.3477377891540527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3930622637271881, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.3930622637271881, "logits_per_char": -0.09826556593179703, "num_chars": 4}, {"sum_logits": -1.3477377891540527, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.3477377891540527, "logits_per_char": -0.4492459297180176, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 952, "native_id": 1810, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23412072658538818, "incorrect_loss_raw": 1.988308072090149, "correct_loss_per_char": 0.058530181646347046, "incorrect_loss_per_char": 0.6627693573633829, "correct_loss_per_token": 0.23412072658538818, "incorrect_loss_per_token": 1.988308072090149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23412072658538818, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.23412072658538818, "logits_per_char": -0.058530181646347046, "num_chars": 4}, {"sum_logits": -1.988308072090149, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.988308072090149, "logits_per_char": -0.6627693573633829, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 953, "native_id": 1285, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0766332149505615, "incorrect_loss_raw": 0.2223193347454071, "correct_loss_per_char": 0.6922110716501871, "incorrect_loss_per_char": 0.055579833686351776, "correct_loss_per_token": 2.0766332149505615, "incorrect_loss_per_token": 0.2223193347454071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2223193347454071, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.2223193347454071, "logits_per_char": -0.055579833686351776, "num_chars": 4}, {"sum_logits": -2.0766332149505615, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -2.0766332149505615, "logits_per_char": -0.6922110716501871, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 954, "native_id": 3151, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4426717162132263, "incorrect_loss_raw": 1.1026313304901123, "correct_loss_per_char": 0.11066792905330658, "incorrect_loss_per_char": 0.3675437768300374, "correct_loss_per_token": 0.4426717162132263, "incorrect_loss_per_token": 1.1026313304901123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4426717162132263, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.4426717162132263, "logits_per_char": -0.11066792905330658, "num_chars": 4}, {"sum_logits": -1.1026313304901123, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.1026313304901123, "logits_per_char": -0.3675437768300374, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 955, "native_id": 2402, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5605415105819702, "incorrect_loss_raw": 0.3176206648349762, "correct_loss_per_char": 0.5201805035273234, "incorrect_loss_per_char": 0.07940516620874405, "correct_loss_per_token": 1.5605415105819702, "incorrect_loss_per_token": 0.3176206648349762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3176206648349762, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.3176206648349762, "logits_per_char": -0.07940516620874405, "num_chars": 4}, {"sum_logits": -1.5605415105819702, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.5605415105819702, "logits_per_char": -0.5201805035273234, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 956, "native_id": 2954, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44304484128952026, "incorrect_loss_raw": 1.227757215499878, "correct_loss_per_char": 0.11076121032238007, "incorrect_loss_per_char": 0.409252405166626, "correct_loss_per_token": 0.44304484128952026, "incorrect_loss_per_token": 1.227757215499878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44304484128952026, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.44304484128952026, "logits_per_char": -0.11076121032238007, "num_chars": 4}, {"sum_logits": -1.227757215499878, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.227757215499878, "logits_per_char": -0.409252405166626, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 957, "native_id": 1027, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.325219750404358, "incorrect_loss_raw": 0.36304065585136414, "correct_loss_per_char": 0.44173991680145264, "incorrect_loss_per_char": 0.09076016396284103, "correct_loss_per_token": 1.325219750404358, "incorrect_loss_per_token": 0.36304065585136414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36304065585136414, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.36304065585136414, "logits_per_char": -0.09076016396284103, "num_chars": 4}, {"sum_logits": -1.325219750404358, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.325219750404358, "logits_per_char": -0.44173991680145264, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 958, "native_id": 2804, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4922710061073303, "incorrect_loss_raw": 1.0688509941101074, "correct_loss_per_char": 0.12306775152683258, "incorrect_loss_per_char": 0.35628366470336914, "correct_loss_per_token": 0.4922710061073303, "incorrect_loss_per_token": 1.0688509941101074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4922710061073303, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.4922710061073303, "logits_per_char": -0.12306775152683258, "num_chars": 4}, {"sum_logits": -1.0688509941101074, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -1.0688509941101074, "logits_per_char": -0.35628366470336914, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 959, "native_id": 2674, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5037406086921692, "incorrect_loss_raw": 1.0332854986190796, "correct_loss_per_char": 0.1259351521730423, "incorrect_loss_per_char": 0.3444284995396932, "correct_loss_per_token": 0.5037406086921692, "incorrect_loss_per_token": 1.0332854986190796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5037406086921692, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.5037406086921692, "logits_per_char": -0.1259351521730423, "num_chars": 4}, {"sum_logits": -1.0332854986190796, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.0332854986190796, "logits_per_char": -0.3444284995396932, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 960, "native_id": 1841, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.874518632888794, "incorrect_loss_raw": 0.2152833640575409, "correct_loss_per_char": 0.6248395442962646, "incorrect_loss_per_char": 0.05382084101438522, "correct_loss_per_token": 1.874518632888794, "incorrect_loss_per_token": 0.2152833640575409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2152833640575409, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.2152833640575409, "logits_per_char": -0.05382084101438522, "num_chars": 4}, {"sum_logits": -1.874518632888794, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.874518632888794, "logits_per_char": -0.6248395442962646, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 961, "native_id": 2728, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2225683182477951, "incorrect_loss_raw": 1.7389841079711914, "correct_loss_per_char": 0.055642079561948776, "incorrect_loss_per_char": 0.5796613693237305, "correct_loss_per_token": 0.2225683182477951, "incorrect_loss_per_token": 1.7389841079711914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2225683182477951, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.2225683182477951, "logits_per_char": -0.055642079561948776, "num_chars": 4}, {"sum_logits": -1.7389841079711914, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.7389841079711914, "logits_per_char": -0.5796613693237305, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 962, "native_id": 3038, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7398359775543213, "incorrect_loss_raw": 0.23598343133926392, "correct_loss_per_char": 0.5799453258514404, "incorrect_loss_per_char": 0.05899585783481598, "correct_loss_per_token": 1.7398359775543213, "incorrect_loss_per_token": 0.23598343133926392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23598343133926392, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.23598343133926392, "logits_per_char": -0.05899585783481598, "num_chars": 4}, {"sum_logits": -1.7398359775543213, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.7398359775543213, "logits_per_char": -0.5799453258514404, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 963, "native_id": 2475, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2318800389766693, "incorrect_loss_raw": 1.7518233060836792, "correct_loss_per_char": 0.05797000974416733, "incorrect_loss_per_char": 0.5839411020278931, "correct_loss_per_token": 0.2318800389766693, "incorrect_loss_per_token": 1.7518233060836792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2318800389766693, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.2318800389766693, "logits_per_char": -0.05797000974416733, "num_chars": 4}, {"sum_logits": -1.7518233060836792, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.7518233060836792, "logits_per_char": -0.5839411020278931, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 964, "native_id": 372, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3833593428134918, "incorrect_loss_raw": 1.301309585571289, "correct_loss_per_char": 0.09583983570337296, "incorrect_loss_per_char": 0.4337698618570964, "correct_loss_per_token": 0.3833593428134918, "incorrect_loss_per_token": 1.301309585571289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3833593428134918, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.3833593428134918, "logits_per_char": -0.09583983570337296, "num_chars": 4}, {"sum_logits": -1.301309585571289, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.301309585571289, "logits_per_char": -0.4337698618570964, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 965, "native_id": 2902, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2475777566432953, "incorrect_loss_raw": 1.819651484489441, "correct_loss_per_char": 0.06189443916082382, "incorrect_loss_per_char": 0.6065504948298136, "correct_loss_per_token": 0.2475777566432953, "incorrect_loss_per_token": 1.819651484489441, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2475777566432953, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.2475777566432953, "logits_per_char": -0.06189443916082382, "num_chars": 4}, {"sum_logits": -1.819651484489441, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -1.819651484489441, "logits_per_char": -0.6065504948298136, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 966, "native_id": 2141, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7091832160949707, "incorrect_loss_raw": 0.21738924086093903, "correct_loss_per_char": 0.5697277386983236, "incorrect_loss_per_char": 0.054347310215234756, "correct_loss_per_token": 1.7091832160949707, "incorrect_loss_per_token": 0.21738924086093903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.21738924086093903, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.21738924086093903, "logits_per_char": -0.054347310215234756, "num_chars": 4}, {"sum_logits": -1.7091832160949707, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.7091832160949707, "logits_per_char": -0.5697277386983236, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 967, "native_id": 2524, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2804335951805115, "incorrect_loss_raw": 1.5854980945587158, "correct_loss_per_char": 0.07010839879512787, "incorrect_loss_per_char": 0.5284993648529053, "correct_loss_per_token": 0.2804335951805115, "incorrect_loss_per_token": 1.5854980945587158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2804335951805115, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.2804335951805115, "logits_per_char": -0.07010839879512787, "num_chars": 4}, {"sum_logits": -1.5854980945587158, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.5854980945587158, "logits_per_char": -0.5284993648529053, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 968, "native_id": 2008, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9750385284423828, "incorrect_loss_raw": 0.1744282841682434, "correct_loss_per_char": 0.6583461761474609, "incorrect_loss_per_char": 0.04360707104206085, "correct_loss_per_token": 1.9750385284423828, "incorrect_loss_per_token": 0.1744282841682434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.1744282841682434, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.1744282841682434, "logits_per_char": -0.04360707104206085, "num_chars": 4}, {"sum_logits": -1.9750385284423828, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.9750385284423828, "logits_per_char": -0.6583461761474609, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 969, "native_id": 3122, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9829995036125183, "incorrect_loss_raw": 0.5168519616127014, "correct_loss_per_char": 0.3276665012041728, "incorrect_loss_per_char": 0.12921299040317535, "correct_loss_per_token": 0.9829995036125183, "incorrect_loss_per_token": 0.5168519616127014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5168519616127014, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.5168519616127014, "logits_per_char": -0.12921299040317535, "num_chars": 4}, {"sum_logits": -0.9829995036125183, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -0.9829995036125183, "logits_per_char": -0.3276665012041728, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 970, "native_id": 237, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5124398469924927, "incorrect_loss_raw": 1.00834059715271, "correct_loss_per_char": 0.12810996174812317, "incorrect_loss_per_char": 0.33611353238423664, "correct_loss_per_token": 0.5124398469924927, "incorrect_loss_per_token": 1.00834059715271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5124398469924927, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.5124398469924927, "logits_per_char": -0.12810996174812317, "num_chars": 4}, {"sum_logits": -1.00834059715271, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.00834059715271, "logits_per_char": -0.33611353238423664, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 971, "native_id": 1232, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5370608568191528, "incorrect_loss_raw": 0.9368391036987305, "correct_loss_per_char": 0.1342652142047882, "incorrect_loss_per_char": 0.31227970123291016, "correct_loss_per_token": 0.5370608568191528, "incorrect_loss_per_token": 0.9368391036987305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5370608568191528, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.5370608568191528, "logits_per_char": -0.1342652142047882, "num_chars": 4}, {"sum_logits": -0.9368391036987305, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -0.9368391036987305, "logits_per_char": -0.31227970123291016, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 972, "native_id": 867, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1990249156951904, "incorrect_loss_raw": 0.42018741369247437, "correct_loss_per_char": 0.3996749718983968, "incorrect_loss_per_char": 0.10504685342311859, "correct_loss_per_token": 1.1990249156951904, "incorrect_loss_per_token": 0.42018741369247437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42018741369247437, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.42018741369247437, "logits_per_char": -0.10504685342311859, "num_chars": 4}, {"sum_logits": -1.1990249156951904, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.1990249156951904, "logits_per_char": -0.3996749718983968, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 973, "native_id": 1552, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37225541472435, "incorrect_loss_raw": 1.2337491512298584, "correct_loss_per_char": 0.0930638536810875, "incorrect_loss_per_char": 0.41124971707661945, "correct_loss_per_token": 0.37225541472435, "incorrect_loss_per_token": 1.2337491512298584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37225541472435, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.37225541472435, "logits_per_char": -0.0930638536810875, "num_chars": 4}, {"sum_logits": -1.2337491512298584, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.2337491512298584, "logits_per_char": -0.41124971707661945, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 974, "native_id": 2336, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8799086809158325, "incorrect_loss_raw": 0.6534866690635681, "correct_loss_per_char": 0.29330289363861084, "incorrect_loss_per_char": 0.16337166726589203, "correct_loss_per_token": 0.8799086809158325, "incorrect_loss_per_token": 0.6534866690635681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6534866690635681, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.6534866690635681, "logits_per_char": -0.16337166726589203, "num_chars": 4}, {"sum_logits": -0.8799086809158325, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -0.8799086809158325, "logits_per_char": -0.29330289363861084, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 975, "native_id": 1684, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454649567604065, "incorrect_loss_raw": 0.3167417049407959, "correct_loss_per_char": 0.484883189201355, "incorrect_loss_per_char": 0.07918542623519897, "correct_loss_per_token": 1.454649567604065, "incorrect_loss_per_token": 0.3167417049407959, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3167417049407959, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.3167417049407959, "logits_per_char": -0.07918542623519897, "num_chars": 4}, {"sum_logits": -1.454649567604065, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.454649567604065, "logits_per_char": -0.484883189201355, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 976, "native_id": 291, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8389295339584351, "incorrect_loss_raw": 0.758907675743103, "correct_loss_per_char": 0.279643177986145, "incorrect_loss_per_char": 0.18972691893577576, "correct_loss_per_token": 0.8389295339584351, "incorrect_loss_per_token": 0.758907675743103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.758907675743103, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.758907675743103, "logits_per_char": -0.18972691893577576, "num_chars": 4}, {"sum_logits": -0.8389295339584351, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.8389295339584351, "logits_per_char": -0.279643177986145, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 977, "native_id": 775, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5230650901794434, "incorrect_loss_raw": 1.077928066253662, "correct_loss_per_char": 0.17435503005981445, "incorrect_loss_per_char": 0.2694820165634155, "correct_loss_per_token": 0.5230650901794434, "incorrect_loss_per_token": 1.077928066253662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.077928066253662, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.077928066253662, "logits_per_char": -0.2694820165634155, "num_chars": 4}, {"sum_logits": -0.5230650901794434, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.5230650901794434, "logits_per_char": -0.17435503005981445, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 978, "native_id": 625, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4634484052658081, "incorrect_loss_raw": 1.0943055152893066, "correct_loss_per_char": 0.11586210131645203, "incorrect_loss_per_char": 0.36476850509643555, "correct_loss_per_token": 0.4634484052658081, "incorrect_loss_per_token": 1.0943055152893066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4634484052658081, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.4634484052658081, "logits_per_char": -0.11586210131645203, "num_chars": 4}, {"sum_logits": -1.0943055152893066, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.0943055152893066, "logits_per_char": -0.36476850509643555, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 979, "native_id": 2979, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30142447352409363, "incorrect_loss_raw": 1.5937564373016357, "correct_loss_per_char": 0.07535611838102341, "incorrect_loss_per_char": 0.5312521457672119, "correct_loss_per_token": 0.30142447352409363, "incorrect_loss_per_token": 1.5937564373016357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.30142447352409363, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.30142447352409363, "logits_per_char": -0.07535611838102341, "num_chars": 4}, {"sum_logits": -1.5937564373016357, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.5937564373016357, "logits_per_char": -0.5312521457672119, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 980, "native_id": 2782, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9744671583175659, "incorrect_loss_raw": 0.5570510625839233, "correct_loss_per_char": 0.3248223861058553, "incorrect_loss_per_char": 0.13926276564598083, "correct_loss_per_token": 0.9744671583175659, "incorrect_loss_per_token": 0.5570510625839233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5570510625839233, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.5570510625839233, "logits_per_char": -0.13926276564598083, "num_chars": 4}, {"sum_logits": -0.9744671583175659, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -0.9744671583175659, "logits_per_char": -0.3248223861058553, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 981, "native_id": 1193, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2464599758386612, "incorrect_loss_raw": 1.8136234283447266, "correct_loss_per_char": 0.0616149939596653, "incorrect_loss_per_char": 0.6045411427815756, "correct_loss_per_token": 0.2464599758386612, "incorrect_loss_per_token": 1.8136234283447266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2464599758386612, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.2464599758386612, "logits_per_char": -0.0616149939596653, "num_chars": 4}, {"sum_logits": -1.8136234283447266, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.8136234283447266, "logits_per_char": -0.6045411427815756, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 982, "native_id": 740, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9244614243507385, "incorrect_loss_raw": 0.5965175628662109, "correct_loss_per_char": 0.30815380811691284, "incorrect_loss_per_char": 0.14912939071655273, "correct_loss_per_token": 0.9244614243507385, "incorrect_loss_per_token": 0.5965175628662109, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5965175628662109, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -0.5965175628662109, "logits_per_char": -0.14912939071655273, "num_chars": 4}, {"sum_logits": -0.9244614243507385, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -0.9244614243507385, "logits_per_char": -0.30815380811691284, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 983, "native_id": 2206, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3011804521083832, "incorrect_loss_raw": 1.4729008674621582, "correct_loss_per_char": 0.0752951130270958, "incorrect_loss_per_char": 0.4909669558207194, "correct_loss_per_token": 0.3011804521083832, "incorrect_loss_per_token": 1.4729008674621582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3011804521083832, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.3011804521083832, "logits_per_char": -0.0752951130270958, "num_chars": 4}, {"sum_logits": -1.4729008674621582, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.4729008674621582, "logits_per_char": -0.4909669558207194, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 984, "native_id": 1784, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8498345017433167, "incorrect_loss_raw": 0.7425533533096313, "correct_loss_per_char": 0.21245862543582916, "incorrect_loss_per_char": 0.2475177844365438, "correct_loss_per_token": 0.8498345017433167, "incorrect_loss_per_token": 0.7425533533096313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8498345017433167, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -0.8498345017433167, "logits_per_char": -0.21245862543582916, "num_chars": 4}, {"sum_logits": -0.7425533533096313, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.7425533533096313, "logits_per_char": -0.2475177844365438, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 985, "native_id": 1923, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3034795820713043, "incorrect_loss_raw": 1.459215521812439, "correct_loss_per_char": 0.07586989551782608, "incorrect_loss_per_char": 0.4864051739374797, "correct_loss_per_token": 0.3034795820713043, "incorrect_loss_per_token": 1.459215521812439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3034795820713043, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.3034795820713043, "logits_per_char": -0.07586989551782608, "num_chars": 4}, {"sum_logits": -1.459215521812439, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.459215521812439, "logits_per_char": -0.4864051739374797, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 986, "native_id": 2869, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.935248613357544, "incorrect_loss_raw": 0.5789384841918945, "correct_loss_per_char": 0.31174953778584796, "incorrect_loss_per_char": 0.14473462104797363, "correct_loss_per_token": 0.935248613357544, "incorrect_loss_per_token": 0.5789384841918945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5789384841918945, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.5789384841918945, "logits_per_char": -0.14473462104797363, "num_chars": 4}, {"sum_logits": -0.935248613357544, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -0.935248613357544, "logits_per_char": -0.31174953778584796, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 987, "native_id": 990, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23019935190677643, "incorrect_loss_raw": 1.769235372543335, "correct_loss_per_char": 0.05754983797669411, "incorrect_loss_per_char": 0.5897451241811117, "correct_loss_per_token": 0.23019935190677643, "incorrect_loss_per_token": 1.769235372543335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.23019935190677643, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.23019935190677643, "logits_per_char": -0.05754983797669411, "num_chars": 4}, {"sum_logits": -1.769235372543335, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.769235372543335, "logits_per_char": -0.5897451241811117, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 988, "native_id": 1955, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3206077814102173, "incorrect_loss_raw": 0.3509543240070343, "correct_loss_per_char": 0.44020259380340576, "incorrect_loss_per_char": 0.08773858100175858, "correct_loss_per_token": 1.3206077814102173, "incorrect_loss_per_token": 0.3509543240070343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3509543240070343, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.3509543240070343, "logits_per_char": -0.08773858100175858, "num_chars": 4}, {"sum_logits": -1.3206077814102173, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.3206077814102173, "logits_per_char": -0.44020259380340576, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 989, "native_id": 2437, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28836870193481445, "incorrect_loss_raw": 1.5304322242736816, "correct_loss_per_char": 0.07209217548370361, "incorrect_loss_per_char": 0.5101440747578939, "correct_loss_per_token": 0.28836870193481445, "incorrect_loss_per_token": 1.5304322242736816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28836870193481445, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.28836870193481445, "logits_per_char": -0.07209217548370361, "num_chars": 4}, {"sum_logits": -1.5304322242736816, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.5304322242736816, "logits_per_char": -0.5101440747578939, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 990, "native_id": 393, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6536839008331299, "incorrect_loss_raw": 0.8305402398109436, "correct_loss_per_char": 0.2178946336110433, "incorrect_loss_per_char": 0.2076350599527359, "correct_loss_per_token": 0.6536839008331299, "incorrect_loss_per_token": 0.8305402398109436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8305402398109436, "num_tokens": 1, "num_tokens_all": 854, "is_greedy": false, "logits_per_token": -0.8305402398109436, "logits_per_char": -0.2076350599527359, "num_chars": 4}, {"sum_logits": -0.6536839008331299, "num_tokens": 1, "num_tokens_all": 854, "is_greedy": true, "logits_per_token": -0.6536839008331299, "logits_per_char": -0.2178946336110433, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 991, "native_id": 650, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4382152259349823, "incorrect_loss_raw": 1.198290467262268, "correct_loss_per_char": 0.10955380648374557, "incorrect_loss_per_char": 0.39943015575408936, "correct_loss_per_token": 0.4382152259349823, "incorrect_loss_per_token": 1.198290467262268, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4382152259349823, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4382152259349823, "logits_per_char": -0.10955380648374557, "num_chars": 4}, {"sum_logits": -1.198290467262268, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.198290467262268, "logits_per_char": -0.39943015575408936, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 992, "native_id": 3200, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38156700134277344, "incorrect_loss_raw": 1.2228492498397827, "correct_loss_per_char": 0.09539175033569336, "incorrect_loss_per_char": 0.4076164166132609, "correct_loss_per_token": 0.38156700134277344, "incorrect_loss_per_token": 1.2228492498397827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38156700134277344, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.38156700134277344, "logits_per_char": -0.09539175033569336, "num_chars": 4}, {"sum_logits": -1.2228492498397827, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.2228492498397827, "logits_per_char": -0.4076164166132609, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 993, "native_id": 470, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34490782022476196, "incorrect_loss_raw": 1.4867448806762695, "correct_loss_per_char": 0.08622695505619049, "incorrect_loss_per_char": 0.49558162689208984, "correct_loss_per_token": 0.34490782022476196, "incorrect_loss_per_token": 1.4867448806762695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34490782022476196, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.34490782022476196, "logits_per_char": -0.08622695505619049, "num_chars": 4}, {"sum_logits": -1.4867448806762695, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.4867448806762695, "logits_per_char": -0.49558162689208984, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 994, "native_id": 399, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33631032705307007, "incorrect_loss_raw": 1.3503482341766357, "correct_loss_per_char": 0.08407758176326752, "incorrect_loss_per_char": 0.4501160780588786, "correct_loss_per_token": 0.33631032705307007, "incorrect_loss_per_token": 1.3503482341766357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33631032705307007, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.33631032705307007, "logits_per_char": -0.08407758176326752, "num_chars": 4}, {"sum_logits": -1.3503482341766357, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.3503482341766357, "logits_per_char": -0.4501160780588786, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 995, "native_id": 600, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.281010240316391, "incorrect_loss_raw": 1.60145103931427, "correct_loss_per_char": 0.07025256007909775, "incorrect_loss_per_char": 0.5338170131047567, "correct_loss_per_token": 0.281010240316391, "incorrect_loss_per_token": 1.60145103931427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.281010240316391, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.281010240316391, "logits_per_char": -0.07025256007909775, "num_chars": 4}, {"sum_logits": -1.60145103931427, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.60145103931427, "logits_per_char": -0.5338170131047567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 996, "native_id": 531, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.462838649749756, "incorrect_loss_raw": 0.17524980008602142, "correct_loss_per_char": 0.820946216583252, "incorrect_loss_per_char": 0.043812450021505356, "correct_loss_per_token": 2.462838649749756, "incorrect_loss_per_token": 0.17524980008602142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.17524980008602142, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.17524980008602142, "logits_per_char": -0.043812450021505356, "num_chars": 4}, {"sum_logits": -2.462838649749756, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -2.462838649749756, "logits_per_char": -0.820946216583252, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 997, "native_id": 508, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9490025043487549, "incorrect_loss_raw": 0.18213213980197906, "correct_loss_per_char": 0.649667501449585, "incorrect_loss_per_char": 0.045533034950494766, "correct_loss_per_token": 1.9490025043487549, "incorrect_loss_per_token": 0.18213213980197906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18213213980197906, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.18213213980197906, "logits_per_char": -0.045533034950494766, "num_chars": 4}, {"sum_logits": -1.9490025043487549, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.9490025043487549, "logits_per_char": -0.649667501449585, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 998, "native_id": 1929, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25866585969924927, "incorrect_loss_raw": 1.659264087677002, "correct_loss_per_char": 0.06466646492481232, "incorrect_loss_per_char": 0.5530880292256674, "correct_loss_per_token": 0.25866585969924927, "incorrect_loss_per_token": 1.659264087677002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25866585969924927, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -0.25866585969924927, "logits_per_char": -0.06466646492481232, "num_chars": 4}, {"sum_logits": -1.659264087677002, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.659264087677002, "logits_per_char": -0.5530880292256674, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 999, "native_id": 1517, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7397709488868713, "incorrect_loss_raw": 0.7277787923812866, "correct_loss_per_char": 0.24659031629562378, "incorrect_loss_per_char": 0.18194469809532166, "correct_loss_per_token": 0.7397709488868713, "incorrect_loss_per_token": 0.7277787923812866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7277787923812866, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.7277787923812866, "logits_per_char": -0.18194469809532166, "num_chars": 4}, {"sum_logits": -0.7397709488868713, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.7397709488868713, "logits_per_char": -0.24659031629562378, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"}