{"doc_id": 0, "native_id": "Mercury_7175875", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.068050384521484, "logits_per_token_corr": -3.438292912074498, "logits_per_char_corr": -0.6685569551255968, "bits_per_byte_corr": 0.9645238037122167}, "model_output": [{"sum_logits": -20.126062393188477, "num_tokens": 6, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.10586166381836, "logits_per_token": -3.3543437321980796, "logits_per_char": -0.609880678581469, "bits_per_byte": 0.8798718305241133, "num_chars": 33}, {"sum_logits": -25.25318145751953, "num_tokens": 7, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -36.89746856689453, "logits_per_token": -3.6075973510742188, "logits_per_char": -0.7014772627088759, "bits_per_byte": 1.012017768207187, "num_chars": 36}, {"sum_logits": -24.068050384521484, "num_tokens": 7, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -36.65596389770508, "logits_per_token": -3.438292912074498, "logits_per_char": -0.6685569551255968, "bits_per_byte": 0.9645238037122167, "num_chars": 36}, {"sum_logits": -21.559574127197266, "num_tokens": 7, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -36.000247955322266, "logits_per_token": -3.079939161028181, "logits_per_char": -0.5389893531799317, "bits_per_byte": 0.7775972669251962, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1, "native_id": "Mercury_SC_409171", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.54654884338379, "logits_per_token_corr": -3.709309768676758, "logits_per_char_corr": -0.6395361670132341, "bits_per_byte_corr": 0.9226556566197917}, "model_output": [{"sum_logits": -18.16587257385254, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -28.608261108398438, "logits_per_token": -3.633174514770508, "logits_per_char": -0.5859958894791142, "bits_per_byte": 0.8454133637334424, "num_chars": 31}, {"sum_logits": -18.54654884338379, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -30.433300018310547, "logits_per_token": -3.709309768676758, "logits_per_char": -0.6395361670132341, "bits_per_byte": 0.9226556566197917, "num_chars": 29}, {"sum_logits": -25.319721221923828, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -37.27893829345703, "logits_per_token": -5.063944244384766, "logits_per_char": -0.7672642794522372, "bits_per_byte": 1.1069283710177815, "num_chars": 33}, {"sum_logits": -20.172136306762695, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.99762725830078, "logits_per_token": -4.034427261352539, "logits_per_char": -0.5763467516217913, "bits_per_byte": 0.8314926003978188, "num_chars": 35}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 2, "native_id": "Mercury_SC_408547", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.02289390563965, "logits_per_token_corr": -2.102289390563965, "logits_per_char_corr": -0.44729561501360954, "bits_per_byte_corr": 0.6453111655919771}, "model_output": [{"sum_logits": -28.84601402282715, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -38.25163269042969, "logits_per_token": -3.6057517528533936, "logits_per_char": -0.6137449792090882, "bits_per_byte": 0.8854468378760877, "num_chars": 47}, {"sum_logits": -12.951393127441406, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.284099578857422, "logits_per_token": -1.6189241409301758, "logits_per_char": -0.28155202450959577, "bits_per_byte": 0.40619370951253353, "num_chars": 46}, {"sum_logits": -21.02289390563965, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -32.90852355957031, "logits_per_token": -2.102289390563965, "logits_per_char": -0.44729561501360954, "bits_per_byte": 0.6453111655919771, "num_chars": 47}, {"sum_logits": -19.227962493896484, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -32.946624755859375, "logits_per_token": -2.4034953117370605, "logits_per_char": -0.4806990623474121, "bits_per_byte": 0.6935021534090843, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 3, "native_id": "Mercury_407327", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.422128677368164, "logits_per_token_corr": -3.5527660846710205, "logits_per_char_corr": -0.7287725301889273, "bits_per_byte_corr": 1.0513965152404232}, "model_output": [{"sum_logits": -9.069866180419922, "num_tokens": 2, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -14.811290740966797, "logits_per_token": -4.534933090209961, "logits_per_char": -0.7558221817016602, "bits_per_byte": 1.0904209133356455, "num_chars": 12}, {"sum_logits": -17.257305145263672, "num_tokens": 3, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -22.246906280517578, "logits_per_token": -5.752435048421224, "logits_per_char": -0.6902922058105468, "bits_per_byte": 0.9958811420878951, "num_chars": 25}, {"sum_logits": -29.872962951660156, "num_tokens": 6, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -34.400146484375, "logits_per_token": -4.978827158610026, "logits_per_char": -0.8298045264350044, "bits_per_byte": 1.1971548751958556, "num_chars": 36}, {"sum_logits": -28.422128677368164, "num_tokens": 8, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -35.792842864990234, "logits_per_token": -3.5527660846710205, "logits_per_char": -0.7287725301889273, "bits_per_byte": 1.0513965152404232, "num_chars": 39}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 4, "native_id": "MCAS_2006_9_44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -35.989906311035156, "logits_per_token_corr": -2.7684543316180887, "logits_per_char_corr": -0.5623422861099243, "bits_per_byte_corr": 0.8112884274535332}, "model_output": [{"sum_logits": -25.53935432434082, "num_tokens": 7, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -39.689544677734375, "logits_per_token": -3.6484791891915456, "logits_per_char": -0.7094265090094672, "bits_per_byte": 1.0234861064238632, "num_chars": 36}, {"sum_logits": -27.17875099182129, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -43.99378967285156, "logits_per_token": -2.4707955447110264, "logits_per_char": -0.4606567964715473, "bits_per_byte": 0.6645872758217752, "num_chars": 59}, {"sum_logits": -25.75200653076172, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -42.06632995605469, "logits_per_token": -2.34109150279652, "logits_per_char": -0.4517895882589775, "bits_per_byte": 0.6517945985069618, "num_chars": 57}, {"sum_logits": -35.989906311035156, "num_tokens": 13, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -53.584659576416016, "logits_per_token": -2.7684543316180887, "logits_per_char": -0.5623422861099243, "bits_per_byte": 0.8112884274535332, "num_chars": 64}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 5, "native_id": "Mercury_7270393", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -34.177978515625, "logits_per_token_corr": -4.882568359375, "logits_per_char_corr": -1.0052346622242647, "bits_per_byte_corr": 1.4502470621216808}, "model_output": [{"sum_logits": -37.76115036010742, "num_tokens": 10, "num_tokens_all": 246, "is_greedy": false, "sum_logits_uncond": -48.26201629638672, "logits_per_token": -3.7761150360107423, "logits_per_char": -1.0205716313542545, "bits_per_byte": 1.4723736314278002, "num_chars": 37}, {"sum_logits": -34.177978515625, "num_tokens": 7, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -45.91436004638672, "logits_per_token": -4.882568359375, "logits_per_char": -1.0052346622242647, "bits_per_byte": 1.4502470621216808, "num_chars": 34}, {"sum_logits": -17.725730895996094, "num_tokens": 8, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -29.71566390991211, "logits_per_token": -2.2157163619995117, "logits_per_char": -0.5213450263528263, "bits_per_byte": 0.7521418841118889, "num_chars": 34}, {"sum_logits": -13.589469909667969, "num_tokens": 5, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -26.584720611572266, "logits_per_token": -2.7178939819335937, "logits_per_char": -0.4383699970860635, "bits_per_byte": 0.6324342208710275, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 6, "native_id": "MCAS_2014_5_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.847463607788086, "logits_per_token_corr": -1.9861330552534624, "logits_per_char_corr": -0.3901332787105015, "bits_per_byte_corr": 0.5628433464817967}, "model_output": [{"sum_logits": -34.0494384765625, "num_tokens": 7, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -52.93547058105469, "logits_per_token": -4.864205496651786, "logits_per_char": -0.8960378546463815, "bits_per_byte": 1.2927093693480491, "num_chars": 38}, {"sum_logits": -14.768238067626953, "num_tokens": 8, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -29.639253616333008, "logits_per_token": -1.8460297584533691, "logits_per_char": -0.30767162640889484, "bits_per_byte": 0.4438763296426733, "num_chars": 48}, {"sum_logits": -21.847463607788086, "num_tokens": 11, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -41.66649627685547, "logits_per_token": -1.9861330552534624, "logits_per_char": -0.3901332787105015, "bits_per_byte": 0.5628433464817967, "num_chars": 56}, {"sum_logits": -30.510700225830078, "num_tokens": 11, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -57.98970031738281, "logits_per_token": -2.7737000205300073, "logits_per_char": -0.5260465556177599, "bits_per_byte": 0.7589247570670078, "num_chars": 58}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 7, "native_id": "Mercury_7086660", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.023048400878906, "logits_per_token_corr": -2.3358942667643228, "logits_per_char_corr": -0.48890810234602106, "bits_per_byte_corr": 0.7053452947055451}, "model_output": [{"sum_logits": -18.28241729736328, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -36.50001525878906, "logits_per_token": -2.0313796997070312, "logits_per_char": -0.4687799307016226, "bits_per_byte": 0.6763064812919888, "num_chars": 39}, {"sum_logits": -20.534156799316406, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -30.09766960144043, "logits_per_token": -2.9334509713309154, "logits_per_char": -0.7080743723902209, "bits_per_byte": 1.0215353856286706, "num_chars": 29}, {"sum_logits": -21.023048400878906, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -37.765464782714844, "logits_per_token": -2.3358942667643228, "logits_per_char": -0.48890810234602106, "bits_per_byte": 0.7053452947055451, "num_chars": 43}, {"sum_logits": -24.515487670898438, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -32.52372360229492, "logits_per_token": -3.0644359588623047, "logits_per_char": -0.66258074786212, "bits_per_byte": 0.9559019591298679, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 8, "native_id": "Mercury_7168805", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.889169692993164, "logits_per_token_corr": -2.7654632992214627, "logits_per_char_corr": -0.5530926598442926, "bits_per_byte_corr": 0.7979440375100205}, "model_output": [{"sum_logits": -16.864795684814453, "num_tokens": 7, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -31.895221710205078, "logits_per_token": -2.4092565264020647, "logits_per_char": -0.4438104127582751, "bits_per_byte": 0.6402830815817074, "num_chars": 38}, {"sum_logits": -24.889169692993164, "num_tokens": 9, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -36.66880416870117, "logits_per_token": -2.7654632992214627, "logits_per_char": -0.5530926598442926, "bits_per_byte": 0.7979440375100205, "num_chars": 45}, {"sum_logits": -23.124149322509766, "num_tokens": 10, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -35.75786590576172, "logits_per_token": -2.3124149322509764, "logits_per_char": -0.5138699849446614, "bits_per_byte": 0.7413576789418819, "num_chars": 45}, {"sum_logits": -30.0497989654541, "num_tokens": 10, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -41.51976776123047, "logits_per_token": -3.00497989654541, "logits_per_char": -0.6132612033766143, "bits_per_byte": 0.884748896881675, "num_chars": 49}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 9, "native_id": "MCAS_2003_8_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.296817779541016, "logits_per_token_corr": -4.148408889770508, "logits_per_char_corr": -0.921868642171224, "bits_per_byte_corr": 1.3299753184124228}, "model_output": [{"sum_logits": -8.296817779541016, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -16.314077377319336, "logits_per_token": -4.148408889770508, "logits_per_char": -0.921868642171224, "bits_per_byte": 1.3299753184124228, "num_chars": 9}, {"sum_logits": -8.858509063720703, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -15.742084503173828, "logits_per_token": -4.429254531860352, "logits_per_char": -0.6814237741323618, "bits_per_byte": 0.9830866996853057, "num_chars": 13}, {"sum_logits": -11.552789688110352, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -16.691814422607422, "logits_per_token": -5.776394844055176, "logits_per_char": -0.9627324740091959, "bits_per_byte": 1.3889293659568276, "num_chars": 12}, {"sum_logits": -13.768543243408203, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -16.94261360168457, "logits_per_token": -6.884271621704102, "logits_per_char": -1.2516857494007458, "bits_per_byte": 1.805800823413139, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 10, "native_id": "Mercury_7250058", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.07835865020752, "logits_per_token_corr": -6.03917932510376, "logits_per_char_corr": -0.7104916853063247, "bits_per_byte_corr": 1.025022830985013}, "model_output": [{"sum_logits": -10.997116088867188, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -21.210796356201172, "logits_per_token": -5.498558044433594, "logits_per_char": -0.7331410725911458, "bits_per_byte": 1.0576989897000215, "num_chars": 15}, {"sum_logits": -12.07835865020752, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.480403900146484, "logits_per_token": -6.03917932510376, "logits_per_char": -0.7104916853063247, "bits_per_byte": 1.025022830985013, "num_chars": 17}, {"sum_logits": -10.418801307678223, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -20.852466583251953, "logits_per_token": -5.209400653839111, "logits_per_char": -0.6128706651575425, "bits_per_byte": 0.8841854693297422, "num_chars": 17}, {"sum_logits": -13.596778869628906, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -20.258092880249023, "logits_per_token": -6.798389434814453, "logits_per_char": -0.7998105217428768, "bits_per_byte": 1.1538826733700918, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 11, "native_id": "Mercury_7012740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.710203170776367, "logits_per_token_corr": -2.588775396347046, "logits_per_char_corr": -0.4816326318785202, "bits_per_byte_corr": 0.69484900954194}, "model_output": [{"sum_logits": -20.710203170776367, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -42.977867126464844, "logits_per_token": -2.588775396347046, "logits_per_char": -0.4816326318785202, "bits_per_byte": 0.69484900954194, "num_chars": 43}, {"sum_logits": -19.553810119628906, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -35.91947937011719, "logits_per_token": -3.2589683532714844, "logits_per_char": -0.5925397005948153, "bits_per_byte": 0.8548540875785854, "num_chars": 33}, {"sum_logits": -24.48726463317871, "num_tokens": 12, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -45.53925323486328, "logits_per_token": -2.040605386098226, "logits_per_char": -0.445222993330522, "bits_per_byte": 0.6423210045681456, "num_chars": 55}, {"sum_logits": -19.527650833129883, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -30.62474822998047, "logits_per_token": -3.9055301666259767, "logits_per_char": -0.7232463271529587, "bits_per_byte": 1.04342388952548, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 12, "native_id": "Mercury_LBS10610", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.683749198913574, "logits_per_token_corr": -3.5612497329711914, "logits_per_char_corr": -1.7806248664855957, "bits_per_byte_corr": 2.2019131410550177}, "model_output": [{"sum_logits": -8.506207466125488, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -20.006343841552734, "logits_per_token": -2.835402488708496, "logits_per_char": -1.417701244354248, "bits_per_byte": 1.7531233325943907, "num_chars": 6}, {"sum_logits": -6.503768444061279, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -19.70915985107422, "logits_per_token": -2.1679228146870932, "logits_per_char": -1.0839614073435466, "bits_per_byte": 1.3404220687634398, "num_chars": 6}, {"sum_logits": -10.683749198913574, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -19.870899200439453, "logits_per_token": -3.5612497329711914, "logits_per_char": -1.7806248664855957, "bits_per_byte": 2.2019131410550177, "num_chars": 6}, {"sum_logits": -12.673158645629883, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -20.663951873779297, "logits_per_token": -4.224386215209961, "logits_per_char": -2.1121931076049805, "bits_per_byte": 2.611929018637494, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 13, "native_id": "Mercury_SC_407400", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.404638290405273, "logits_per_token_corr": -3.200662612915039, "logits_per_char_corr": -0.678928433042584, "bits_per_byte_corr": 0.9794866834697543}, "model_output": [{"sum_logits": -16.315486907958984, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -24.283519744873047, "logits_per_token": -4.078871726989746, "logits_per_char": -0.6275187272291917, "bits_per_byte": 0.9053181558391593, "num_chars": 26}, {"sum_logits": -16.973909378051758, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -23.876605987548828, "logits_per_token": -2.424844196864537, "logits_per_char": -0.5475454638081212, "bits_per_byte": 0.7899411252977914, "num_chars": 31}, {"sum_logits": -22.404638290405273, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -31.290645599365234, "logits_per_token": -3.200662612915039, "logits_per_char": -0.678928433042584, "bits_per_byte": 0.9794866834697543, "num_chars": 33}, {"sum_logits": -19.772369384765625, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -37.914798736572266, "logits_per_token": -2.471546173095703, "logits_per_char": -0.44937203147194604, "bits_per_byte": 0.6483068013192416, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 14, "native_id": "Mercury_7212993", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.5161468982696533, "logits_per_token_corr": -1.2580734491348267, "logits_per_char_corr": -0.20967890818913779, "bits_per_byte_corr": 0.3025027210236987}, "model_output": [{"sum_logits": -4.858006477355957, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.968596458435059, "logits_per_token": -4.858006477355957, "logits_per_char": -0.8096677462259928, "bits_per_byte": 1.1681036422488227, "num_chars": 6}, {"sum_logits": -3.421196937561035, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.3920316696167, "logits_per_token": -3.421196937561035, "logits_per_char": -0.4276496171951294, "bits_per_byte": 0.61696798196592, "num_chars": 8}, {"sum_logits": -2.5161468982696533, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.916326522827148, "logits_per_token": -1.2580734491348267, "logits_per_char": -0.20967890818913779, "bits_per_byte": 0.3025027210236987, "num_chars": 12}, {"sum_logits": -1.718050479888916, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": true, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -0.859025239944458, "logits_per_char": -0.11453669865926107, "bits_per_byte": 0.16524152715562823, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 15, "native_id": "Mercury_SC_413240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.078113079071045, "logits_per_token_corr": -0.815622615814209, "logits_per_char_corr": -0.22656183772616917, "bits_per_byte_corr": 0.3268596397424691}, "model_output": [{"sum_logits": -4.078113079071045, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -18.55349349975586, "logits_per_token": -0.815622615814209, "logits_per_char": -0.22656183772616917, "bits_per_byte": 0.3268596397424691, "num_chars": 18}, {"sum_logits": -6.254326820373535, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -19.482749938964844, "logits_per_token": -1.250865364074707, "logits_per_char": -0.32917509580913346, "bits_per_byte": 0.4748992783083274, "num_chars": 19}, {"sum_logits": -6.836759090423584, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -18.439041137695312, "logits_per_token": -1.3673518180847168, "logits_per_char": -0.3418379545211792, "bits_per_byte": 0.4931679217756866, "num_chars": 20}, {"sum_logits": -10.831106185913086, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -21.821352005004883, "logits_per_token": -2.1662212371826173, "logits_per_char": -0.5415553092956543, "bits_per_byte": 0.7812991590884906, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 16, "native_id": "Mercury_7186358", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.944374084472656, "logits_per_token_corr": -2.849196297781808, "logits_per_char_corr": -0.4864481484017721, "bits_per_byte_corr": 0.7017963313493593}, "model_output": [{"sum_logits": -25.88640785217285, "num_tokens": 6, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -45.787357330322266, "logits_per_token": -4.314401308695476, "logits_per_char": -0.7396116529192244, "bits_per_byte": 1.0670340638510207, "num_chars": 35}, {"sum_logits": -23.363346099853516, "num_tokens": 7, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -35.30583190917969, "logits_per_token": -3.337620871407645, "logits_per_char": -0.5840836524963379, "bits_per_byte": 0.8426545889213847, "num_chars": 40}, {"sum_logits": -19.944374084472656, "num_tokens": 7, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -32.6190071105957, "logits_per_token": -2.849196297781808, "logits_per_char": -0.4864481484017721, "bits_per_byte": 0.7017963313493593, "num_chars": 41}, {"sum_logits": -42.602542877197266, "num_tokens": 9, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -56.96841049194336, "logits_per_token": -4.733615875244141, "logits_per_char": -0.7345266013309873, "bits_per_byte": 1.0596978851420014, "num_chars": 58}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 17, "native_id": "Mercury_7166425", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.975483894348145, "logits_per_token_corr": -1.6625806490580242, "logits_per_char_corr": -0.41564516226450604, "bits_per_byte_corr": 0.5996492143689222}, "model_output": [{"sum_logits": -10.28618049621582, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.773303985595703, "logits_per_token": -1.71436341603597, "logits_per_char": -0.4285908540089925, "bits_per_byte": 0.6183258996495834, "num_chars": 24}, {"sum_logits": -9.975483894348145, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.365093231201172, "logits_per_token": -1.6625806490580242, "logits_per_char": -0.41564516226450604, "bits_per_byte": 0.5996492143689222, "num_chars": 24}, {"sum_logits": -9.939043045043945, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -39.10597610473633, "logits_per_token": -1.656507174173991, "logits_per_char": -0.35496582303728375, "bits_per_byte": 0.5121074325813265, "num_chars": 28}, {"sum_logits": -12.30093002319336, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -34.81952667236328, "logits_per_token": -2.05015500386556, "logits_per_char": -0.43931892939976286, "bits_per_byte": 0.6338032408141419, "num_chars": 28}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 18, "native_id": "MDSA_2007_8_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.524580955505371, "logits_per_token_corr": -1.9207634925842285, "logits_per_char_corr": -0.3389582633972168, "bits_per_byte_corr": 0.48901340567185103}, "model_output": [{"sum_logits": -11.524580955505371, "num_tokens": 6, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -29.857723236083984, "logits_per_token": -1.9207634925842285, "logits_per_char": -0.3389582633972168, "bits_per_byte": 0.48901340567185103, "num_chars": 34}, {"sum_logits": -13.122567176818848, "num_tokens": 6, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -31.824066162109375, "logits_per_token": -2.187094529469808, "logits_per_char": -0.38595785814173084, "bits_per_byte": 0.5568194879336011, "num_chars": 34}, {"sum_logits": -13.635043144226074, "num_tokens": 6, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -30.722074508666992, "logits_per_token": -2.2725071907043457, "logits_per_char": -0.3787511984507243, "bits_per_byte": 0.5464224757360041, "num_chars": 36}, {"sum_logits": -16.84088706970215, "num_tokens": 7, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -35.459754943847656, "logits_per_token": -2.4058410099574496, "logits_per_char": -0.45515910999195, "bits_per_byte": 0.6566557908012922, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 19, "native_id": "Mercury_7094290", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.448511123657227, "logits_per_token_corr": -4.816170374552409, "logits_per_char_corr": -0.5351300416169343, "bits_per_byte_corr": 0.7720294572720104}, "model_output": [{"sum_logits": -14.530427932739258, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -24.548311233520508, "logits_per_token": -2.4217379887898765, "logits_per_char": -0.5381639975088613, "bits_per_byte": 0.7764065303915725, "num_chars": 27}, {"sum_logits": -18.60297393798828, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -29.426883697509766, "logits_per_token": -4.65074348449707, "logits_per_char": -0.6643919263567243, "bits_per_byte": 0.9585149373622002, "num_chars": 28}, {"sum_logits": -14.448511123657227, "num_tokens": 3, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.188600540161133, "logits_per_token": -4.816170374552409, "logits_per_char": -0.5351300416169343, "bits_per_byte": 0.7720294572720104, "num_chars": 27}, {"sum_logits": -10.058653831481934, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -25.31551742553711, "logits_per_token": -1.676442305246989, "logits_per_char": -0.30480769186308887, "bits_per_byte": 0.4397445454760055, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 20, "native_id": "Mercury_7186568", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.68002986907959, "logits_per_token_corr": -3.8933432896931968, "logits_per_char_corr": -0.5561918985275995, "bits_per_byte_corr": 0.8024152937889619}, "model_output": [{"sum_logits": -14.275724411010742, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -21.687477111816406, "logits_per_token": -7.137862205505371, "logits_per_char": -0.7930958006117079, "bits_per_byte": 1.144195378493195, "num_chars": 18}, {"sum_logits": -11.68002986907959, "num_tokens": 3, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -27.598388671875, "logits_per_token": -3.8933432896931968, "logits_per_char": -0.5561918985275995, "bits_per_byte": 0.8024152937889619, "num_chars": 21}, {"sum_logits": -18.938486099243164, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -31.593997955322266, "logits_per_token": -3.156414349873861, "logits_per_char": -0.48560220767290163, "bits_per_byte": 0.700575896854931, "num_chars": 39}, {"sum_logits": -23.310802459716797, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -28.22214126586914, "logits_per_token": -3.3301146371023997, "logits_per_char": -0.518017832438151, "bits_per_byte": 0.7473417579511074, "num_chars": 45}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 21, "native_id": "Mercury_402216", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.95148468017578, "logits_per_token_corr": -0.8313118616739908, "logits_per_char_corr": -0.5700424194335938, "bits_per_byte_corr": 0.822397371613783}, "model_output": [{"sum_logits": -20.591373443603516, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -46.0301628112793, "logits_per_token": -1.8719430403275923, "logits_per_char": -1.0295686721801758, "bits_per_byte": 1.4853536176100417, "num_chars": 20}, {"sum_logits": -19.95148468017578, "num_tokens": 24, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -47.45807647705078, "logits_per_token": -0.8313118616739908, "logits_per_char": -0.5700424194335938, "bits_per_byte": 0.822397371613783, "num_chars": 35}, {"sum_logits": -25.810192108154297, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -48.06487274169922, "logits_per_token": -2.3463811007413, "logits_per_char": -1.6131370067596436, "bits_per_byte": 2.327264759928276, "num_chars": 16}, {"sum_logits": -19.112995147705078, "num_tokens": 19, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -42.30780029296875, "logits_per_token": -1.0059471130371094, "logits_per_char": -0.7351151979886569, "bits_per_byte": 1.0605470506211057, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 22, "native_id": "Mercury_404894", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.30237579345703, "logits_per_token_corr": -2.2093068903142754, "logits_per_char_corr": -0.41900647919753503, "bits_per_byte_corr": 0.6044985696390627}, "model_output": [{"sum_logits": -24.30237579345703, "num_tokens": 11, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -36.31758117675781, "logits_per_token": -2.2093068903142754, "logits_per_char": -0.41900647919753503, "bits_per_byte": 0.6044985696390627, "num_chars": 58}, {"sum_logits": -18.978046417236328, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.101024627685547, "logits_per_token": -3.1630077362060547, "logits_per_char": -0.5750923156738281, "bits_per_byte": 0.8296828318765782, "num_chars": 33}, {"sum_logits": -20.147680282592773, "num_tokens": 10, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.73128890991211, "logits_per_token": -2.0147680282592773, "logits_per_char": -0.4286740485658037, "bits_per_byte": 0.618445924024124, "num_chars": 47}, {"sum_logits": -16.680309295654297, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.975860595703125, "logits_per_token": -2.7800515492757163, "logits_per_char": -0.6415503575251653, "bits_per_byte": 0.9255615192827624, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 23, "native_id": "MCAS_2002_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.656027793884277, "logits_per_token_corr": -1.6640069484710693, "logits_per_char_corr": -1.1093379656473796, "bits_per_byte_corr": 1.3718026128946934}, "model_output": [{"sum_logits": -8.285113334655762, "num_tokens": 4, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -19.382898330688477, "logits_per_token": -2.0712783336639404, "logits_per_char": -1.380852222442627, "bits_per_byte": 1.7075559887313538, "num_chars": 6}, {"sum_logits": -6.468233108520508, "num_tokens": 4, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -18.440031051635742, "logits_per_token": -1.617058277130127, "logits_per_char": -1.0780388514200847, "bits_per_byte": 1.3330982612832922, "num_chars": 6}, {"sum_logits": -6.656027793884277, "num_tokens": 4, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -18.518333435058594, "logits_per_token": -1.6640069484710693, "logits_per_char": -1.1093379656473796, "bits_per_byte": 1.3718026128946934, "num_chars": 6}, {"sum_logits": -6.821323394775391, "num_tokens": 4, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -18.823036193847656, "logits_per_token": -1.7053308486938477, "logits_per_char": -1.1368872324625652, "bits_per_byte": 1.4058699191356279, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 24, "native_id": "Mercury_SC_405086", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.864956855773926, "logits_per_token_corr": -1.932478427886963, "logits_per_char_corr": -0.5521366936819894, "bits_per_byte_corr": 0.796564869868407}, "model_output": [{"sum_logits": -12.578811645507812, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -14.924893379211426, "logits_per_token": -6.289405822753906, "logits_per_char": -2.5157623291015625, "bits_per_byte": 3.6294778362527, "num_chars": 5}, {"sum_logits": -3.864956855773926, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -13.385858535766602, "logits_per_token": -1.932478427886963, "logits_per_char": -0.5521366936819894, "bits_per_byte": 0.796564869868407, "num_chars": 7}, {"sum_logits": -6.724225997924805, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.58793830871582, "logits_per_token": -3.3621129989624023, "logits_per_char": -1.1207043329874675, "bits_per_byte": 1.6168345835049545, "num_chars": 6}, {"sum_logits": -6.775669097900391, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.396564483642578, "logits_per_token": -3.3878345489501953, "logits_per_char": -0.8469586372375488, "bits_per_byte": 1.2219030257815642, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 25, "native_id": "Mercury_SC_408324", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.0575532913208, "logits_per_token_corr": -2.61151065826416, "logits_per_char_corr": -0.6872396469116211, "bits_per_byte_corr": 0.9914772305023902}, "model_output": [{"sum_logits": -11.07938289642334, "num_tokens": 3, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -23.420333862304688, "logits_per_token": -3.6931276321411133, "logits_per_char": -0.7913844926016671, "bits_per_byte": 1.141726482913674, "num_chars": 14}, {"sum_logits": -10.19736099243164, "num_tokens": 3, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -19.993356704711914, "logits_per_token": -3.399120330810547, "logits_per_char": -0.6798240661621093, "bits_per_byte": 0.9807788089297503, "num_chars": 15}, {"sum_logits": -17.75646209716797, "num_tokens": 5, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -26.738529205322266, "logits_per_token": -3.5512924194335938, "logits_per_char": -1.0444977704216452, "bits_per_byte": 1.5068917536079691, "num_chars": 17}, {"sum_logits": -13.0575532913208, "num_tokens": 5, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -26.69948959350586, "logits_per_token": -2.61151065826416, "logits_per_char": -0.6872396469116211, "bits_per_byte": 0.9914772305023902, "num_chars": 19}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 26, "native_id": "Mercury_7218820", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -36.84648895263672, "logits_per_token_corr": -3.684648895263672, "logits_per_char_corr": -0.6040408025022412, "bits_per_byte_corr": 0.8714466702651993}, "model_output": [{"sum_logits": -33.85580825805664, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -39.190818786621094, "logits_per_token": -3.385580825805664, "logits_per_char": -0.6269594121862341, "bits_per_byte": 0.9045112348003893, "num_chars": 54}, {"sum_logits": -36.84648895263672, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -44.040016174316406, "logits_per_token": -3.684648895263672, "logits_per_char": -0.6040408025022412, "bits_per_byte": 0.8714466702651993, "num_chars": 61}, {"sum_logits": -44.858367919921875, "num_tokens": 14, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -50.7349853515625, "logits_per_token": -3.204169137137277, "logits_per_char": -0.6144981906838614, "bits_per_byte": 0.8865334923354843, "num_chars": 73}, {"sum_logits": -39.06452941894531, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -51.501487731933594, "logits_per_token": -3.255377451578776, "logits_per_char": -0.6103832721710205, "bits_per_byte": 0.8805969198033423, "num_chars": 64}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 27, "native_id": "Mercury_412202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.3477783203125, "logits_per_token_corr": -1.2934722900390625, "logits_per_char_corr": -0.3568199420797414, "bits_per_byte_corr": 0.5147823609290999}, "model_output": [{"sum_logits": -7.0045671463012695, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -24.251243591308594, "logits_per_token": -0.8755708932876587, "logits_per_char": -0.24153679814831963, "bits_per_byte": 0.34846394088102967, "num_chars": 29}, {"sum_logits": -10.3477783203125, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -28.677139282226562, "logits_per_token": -1.2934722900390625, "logits_per_char": -0.3568199420797414, "bits_per_byte": 0.5147823609290999, "num_chars": 29}, {"sum_logits": -10.094551086425781, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -28.60715675354004, "logits_per_token": -1.2618188858032227, "logits_per_char": -0.3480879684974407, "bits_per_byte": 0.5021847859447323, "num_chars": 29}, {"sum_logits": -9.35944938659668, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -28.667564392089844, "logits_per_token": -1.169931173324585, "logits_per_char": -0.32273963402057515, "bits_per_byte": 0.46561486950013725, "num_chars": 29}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 28, "native_id": "Mercury_SC_409139", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.656718254089355, "logits_per_token_corr": -2.5313436508178713, "logits_per_char_corr": -0.5062687301635742, "bits_per_byte_corr": 0.730391386364666}, "model_output": [{"sum_logits": -17.31532096862793, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -29.84024429321289, "logits_per_token": -3.463064193725586, "logits_per_char": -0.7528400421142578, "bits_per_byte": 1.0861185953416583, "num_chars": 23}, {"sum_logits": -13.383766174316406, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -30.747100830078125, "logits_per_token": -2.230627695719401, "logits_per_char": -0.5147602374737079, "bits_per_byte": 0.7426420418506771, "num_chars": 26}, {"sum_logits": -12.656718254089355, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -34.71881103515625, "logits_per_token": -2.5313436508178713, "logits_per_char": -0.5062687301635742, "bits_per_byte": 0.730391386364666, "num_chars": 25}, {"sum_logits": -10.459554672241211, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -28.861778259277344, "logits_per_token": -2.0919109344482423, "logits_per_char": -0.3735555240086147, "bits_per_byte": 0.5389267019842937, "num_chars": 28}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 29, "native_id": "Mercury_400687", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.610417366027832, "logits_per_token_corr": -1.9220834732055665, "logits_per_char_corr": -0.5653186685898725, "bits_per_byte_corr": 0.8155824396971464}, "model_output": [{"sum_logits": -13.19515609741211, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.60218048095703, "logits_per_token": -3.2987890243530273, "logits_per_char": -0.8246972560882568, "bits_per_byte": 1.1897866415941185, "num_chars": 16}, {"sum_logits": -9.610417366027832, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.538087844848633, "logits_per_token": -1.9220834732055665, "logits_per_char": -0.5653186685898725, "bits_per_byte": 0.8155824396971464, "num_chars": 17}, {"sum_logits": -12.549049377441406, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.35934829711914, "logits_per_token": -3.1372623443603516, "logits_per_char": -0.836603291829427, "bits_per_byte": 1.2069634203145638, "num_chars": 15}, {"sum_logits": -17.96599769592285, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -23.8741512298584, "logits_per_token": -4.491499423980713, "logits_per_char": -0.9981109831068251, "bits_per_byte": 1.439969765586059, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 30, "native_id": "Mercury_7171605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.97439956665039, "logits_per_token_corr": -2.219377729627821, "logits_per_char_corr": -0.3768754635217055, "bits_per_byte_corr": 0.5437163622558846}, "model_output": [{"sum_logits": -18.45846939086914, "num_tokens": 4, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -32.33745193481445, "logits_per_token": -4.614617347717285, "logits_per_char": -0.802542147429093, "bits_per_byte": 1.1578235762011637, "num_chars": 23}, {"sum_logits": -10.679744720458984, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -29.63518524169922, "logits_per_token": -1.5256778172084264, "logits_per_char": -0.3682670593261719, "bits_per_byte": 0.5312970602130116, "num_chars": 29}, {"sum_logits": -10.878623962402344, "num_tokens": 6, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -27.016265869140625, "logits_per_token": -1.8131039937337239, "logits_per_char": -0.3296552715879498, "bits_per_byte": 0.4755920255231813, "num_chars": 33}, {"sum_logits": -19.97439956665039, "num_tokens": 9, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -35.689666748046875, "logits_per_token": -2.219377729627821, "logits_per_char": -0.3768754635217055, "bits_per_byte": 0.5437163622558846, "num_chars": 53}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 31, "native_id": "Mercury_7210245", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.239832878112793, "logits_per_token_corr": -1.4628332683018275, "logits_per_char_corr": -0.31029796600341797, "bits_per_byte_corr": 0.44766533675138487}, "model_output": [{"sum_logits": -8.549943923950195, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -26.564517974853516, "logits_per_token": -2.137485980987549, "logits_per_char": -0.38863381472500885, "bits_per_byte": 0.5606800772259333, "num_chars": 22}, {"sum_logits": -11.698354721069336, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -25.05443572998047, "logits_per_token": -2.924588680267334, "logits_per_char": -0.43327239707664206, "bits_per_byte": 0.6250799386169944, "num_chars": 27}, {"sum_logits": -10.239832878112793, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -37.04848861694336, "logits_per_token": -1.4628332683018275, "logits_per_char": -0.31029796600341797, "bits_per_byte": 0.44766533675138487, "num_chars": 33}, {"sum_logits": -10.555068016052246, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -28.1359806060791, "logits_per_token": -2.6387670040130615, "logits_per_char": -0.42220272064208986, "bits_per_byte": 0.6091097713206091, "num_chars": 25}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 32, "native_id": "AKDE&ED_2008_4_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.758966445922852, "logits_per_token_corr": -2.1084237779889787, "logits_per_char_corr": -0.5466283868860315, "bits_per_byte_corr": 0.788618062970178}, "model_output": [{"sum_logits": -14.758966445922852, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.59431266784668, "logits_per_token": -2.1084237779889787, "logits_per_char": -0.5466283868860315, "bits_per_byte": 0.788618062970178, "num_chars": 27}, {"sum_logits": -21.542858123779297, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.05905532836914, "logits_per_token": -3.0775511605398997, "logits_per_char": -0.7978836342140481, "bits_per_byte": 1.1511027622878978, "num_chars": 27}, {"sum_logits": -15.692314147949219, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.790842056274414, "logits_per_token": -3.1384628295898436, "logits_per_char": -0.9230773028205422, "bits_per_byte": 1.331719047137313, "num_chars": 17}, {"sum_logits": -15.924446105957031, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -21.136301040649414, "logits_per_token": -3.184889221191406, "logits_per_char": -0.9367321238798254, "bits_per_byte": 1.3514187897637813, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 33, "native_id": "AKDE&ED_2008_4_19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.199360847473145, "logits_per_token_corr": -2.171337263924735, "logits_per_char_corr": -0.44704002492568073, "bits_per_byte_corr": 0.6449424270396216}, "model_output": [{"sum_logits": -18.948978424072266, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.41138458251953, "logits_per_token": -3.158163070678711, "logits_per_char": -0.6767492294311523, "bits_per_byte": 0.9763427572264523, "num_chars": 28}, {"sum_logits": -24.41228485107422, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.424854278564453, "logits_per_token": -2.7124760945638022, "logits_per_char": -0.6974938528878348, "bits_per_byte": 1.0062709226125384, "num_chars": 35}, {"sum_logits": -15.199360847473145, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.622663497924805, "logits_per_token": -2.171337263924735, "logits_per_char": -0.44704002492568073, "bits_per_byte": 0.6449424270396216, "num_chars": 34}, {"sum_logits": -23.92898178100586, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.294570922851562, "logits_per_token": -2.9911227226257324, "logits_per_char": -0.6467292373244827, "bits_per_byte": 0.9330330634866031, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 34, "native_id": "Mercury_SC_400402", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.416645050048828, "logits_per_token_corr": -4.208322525024414, "logits_per_char_corr": -1.0520806312561035, "bits_per_byte_corr": 1.517831509329601}, "model_output": [{"sum_logits": -8.416645050048828, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -15.579060554504395, "logits_per_token": -4.208322525024414, "logits_per_char": -1.0520806312561035, "bits_per_byte": 1.517831509329601, "num_chars": 8}, {"sum_logits": -11.133853912353516, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -16.926267623901367, "logits_per_token": -3.711284637451172, "logits_per_char": -1.3917317390441895, "bits_per_byte": 2.0078444781682676, "num_chars": 8}, {"sum_logits": -10.36734390258789, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.338645935058594, "logits_per_token": -5.183671951293945, "logits_per_char": -1.036734390258789, "bits_per_byte": 1.495691563546473, "num_chars": 10}, {"sum_logits": -8.858299255371094, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.266429901123047, "logits_per_token": -4.429149627685547, "logits_per_char": -0.6327356610979352, "bits_per_byte": 0.9128446004602467, "num_chars": 14}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 35, "native_id": "Mercury_7234308", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.175006866455078, "logits_per_token_corr": -3.8968758583068848, "logits_per_char_corr": -0.7603660211330507, "bits_per_byte_corr": 1.0969762879499132}, "model_output": [{"sum_logits": -31.175006866455078, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -36.51225662231445, "logits_per_token": -3.8968758583068848, "logits_per_char": -0.7603660211330507, "bits_per_byte": 1.0969762879499132, "num_chars": 41}, {"sum_logits": -33.00011444091797, "num_tokens": 12, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -50.419212341308594, "logits_per_token": -2.750009536743164, "logits_per_char": -0.5689674903606546, "bits_per_byte": 0.8208465767709453, "num_chars": 58}, {"sum_logits": -28.630714416503906, "num_tokens": 10, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -48.197288513183594, "logits_per_token": -2.8630714416503906, "logits_per_char": -0.5726142883300781, "bits_per_byte": 0.8261077941165602, "num_chars": 50}, {"sum_logits": -41.51726150512695, "num_tokens": 12, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -59.79621887207031, "logits_per_token": -3.4597717920939126, "logits_per_char": -0.6387271000788762, "bits_per_byte": 0.9214884197658454, "num_chars": 65}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 36, "native_id": "ACTAAP_2014_5_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.44931411743164, "logits_per_token_corr": -2.342254932110126, "logits_per_char_corr": -0.5437377520969936, "bits_per_byte_corr": 0.7844477584950088}, "model_output": [{"sum_logits": -29.087129592895508, "num_tokens": 11, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -49.030677795410156, "logits_per_token": -2.6442845084450464, "logits_per_char": -0.5703358743705002, "bits_per_byte": 0.8228207375959825, "num_chars": 51}, {"sum_logits": -30.44931411743164, "num_tokens": 13, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -49.35626983642578, "logits_per_token": -2.342254932110126, "logits_per_char": -0.5437377520969936, "bits_per_byte": 0.7844477584950088, "num_chars": 56}, {"sum_logits": -32.212886810302734, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -51.327388763427734, "logits_per_token": -2.6844072341918945, "logits_per_char": -0.5459811323780125, "bits_per_byte": 0.7876842721012651, "num_chars": 59}, {"sum_logits": -33.33235168457031, "num_tokens": 14, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -54.57688903808594, "logits_per_token": -2.3808822631835938, "logits_per_char": -0.5464319948290215, "bits_per_byte": 0.7883347291234594, "num_chars": 61}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 37, "native_id": "Mercury_400407", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.68303871154785, "logits_per_token_corr": -2.0758931901719837, "logits_per_char_corr": -0.4556838710133622, "bits_per_byte_corr": 0.657412860924536}, "model_output": [{"sum_logits": -17.5009708404541, "num_tokens": 9, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.178924560546875, "logits_per_token": -1.9445523156060114, "logits_per_char": -0.4487428420629257, "bits_per_byte": 0.6473990728790674, "num_chars": 39}, {"sum_logits": -18.68303871154785, "num_tokens": 9, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.70156478881836, "logits_per_token": -2.0758931901719837, "logits_per_char": -0.4556838710133622, "bits_per_byte": 0.657412860924536, "num_chars": 41}, {"sum_logits": -16.465614318847656, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -31.037498474121094, "logits_per_token": -1.6465614318847657, "logits_per_char": -0.304918783682364, "bits_per_byte": 0.4399048170927572, "num_chars": 54}, {"sum_logits": -18.670209884643555, "num_tokens": 11, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.3330078125, "logits_per_token": -1.6972918076948686, "logits_per_char": -0.3333966050829206, "bits_per_byte": 0.4809896288026913, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 38, "native_id": "Mercury_7116288", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.354227066040039, "logits_per_token_corr": -1.9192783832550049, "logits_per_char_corr": -0.3133515727763273, "bits_per_byte_corr": 0.4520707600994893}, "model_output": [{"sum_logits": -16.178638458251953, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.729904174804688, "logits_per_token": -2.696439743041992, "logits_per_char": -0.4372604988716744, "bits_per_byte": 0.6308335532992521, "num_chars": 37}, {"sum_logits": -22.38164520263672, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -37.133018493652344, "logits_per_token": -3.1973778860909596, "logits_per_char": -0.5328963143484933, "bits_per_byte": 0.7688068700191297, "num_chars": 42}, {"sum_logits": -15.354227066040039, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.71287536621094, "logits_per_token": -1.9192783832550049, "logits_per_char": -0.3133515727763273, "bits_per_byte": 0.4520707600994893, "num_chars": 49}, {"sum_logits": -22.08469009399414, "num_tokens": 10, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.93186950683594, "logits_per_token": -2.208469009399414, "logits_per_char": -0.408975742481373, "bits_per_byte": 0.5900272755221825, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 39, "native_id": "MCAS_2004_9_15-v1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.55195426940918, "logits_per_token_corr": -1.2732272514930139, "logits_per_char_corr": -0.23312611647055181, "bits_per_byte_corr": 0.3363298921340096}, "model_output": [{"sum_logits": -18.40915870666504, "num_tokens": 14, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.484092712402344, "logits_per_token": -1.3149399076189314, "logits_per_char": -0.2707229221568388, "bits_per_byte": 0.3905706172509208, "num_chars": 68}, {"sum_logits": -16.55195426940918, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -38.20074462890625, "logits_per_token": -1.2732272514930139, "logits_per_char": -0.23312611647055181, "bits_per_byte": 0.3363298921340096, "num_chars": 71}, {"sum_logits": -14.879508972167969, "num_tokens": 15, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -37.438720703125, "logits_per_token": -0.9919672648111979, "logits_per_char": -0.19324037626192167, "bits_per_byte": 0.2787869325327921, "num_chars": 77}, {"sum_logits": -27.56914520263672, "num_tokens": 17, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -50.739871978759766, "logits_per_token": -1.621714423684513, "logits_per_char": -0.30976567643412045, "bits_per_byte": 0.4468974052294419, "num_chars": 89}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 40, "native_id": "NYSEDREGENTS_2015_4_26", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.637088775634766, "logits_per_token_corr": -2.6592721939086914, "logits_per_char_corr": -0.3545696258544922, "bits_per_byte_corr": 0.5115358408704986}, "model_output": [{"sum_logits": -16.290708541870117, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -29.122669219970703, "logits_per_token": -4.072677135467529, "logits_per_char": -0.6265657131488507, "bits_per_byte": 0.903943247151553, "num_chars": 26}, {"sum_logits": -16.023107528686523, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.60602569580078, "logits_per_token": -3.204621505737305, "logits_per_char": -0.7283230694857511, "bits_per_byte": 1.050748080512876, "num_chars": 22}, {"sum_logits": -10.637088775634766, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -30.123680114746094, "logits_per_token": -2.6592721939086914, "logits_per_char": -0.3545696258544922, "bits_per_byte": 0.5115358408704986, "num_chars": 30}, {"sum_logits": -11.292915344238281, "num_tokens": 3, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -23.574756622314453, "logits_per_token": -3.7643051147460938, "logits_per_char": -0.4705381393432617, "bits_per_byte": 0.6788430401801314, "num_chars": 24}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 41, "native_id": "Mercury_SC_401620", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.080622673034668, "logits_per_token_corr": -2.770155668258667, "logits_per_char_corr": -0.6925389170646667, "bits_per_byte_corr": 0.9991224612725257}, "model_output": [{"sum_logits": -11.080622673034668, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -23.40793228149414, "logits_per_token": -2.770155668258667, "logits_per_char": -0.6925389170646667, "bits_per_byte": 0.9991224612725257, "num_chars": 16}, {"sum_logits": -18.253067016601562, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -27.64499282836914, "logits_per_token": -3.6506134033203126, "logits_per_char": -0.8691936674572173, "bits_per_byte": 1.2539813936135191, "num_chars": 21}, {"sum_logits": -16.886638641357422, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -28.081073760986328, "logits_per_token": -2.412376948765346, "logits_per_char": -0.4690732955932617, "bits_per_byte": 0.6767297173663277, "num_chars": 36}, {"sum_logits": -12.528643608093262, "num_tokens": 9, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -24.527883529663086, "logits_per_token": -1.3920715120103624, "logits_per_char": -0.338611989407926, "bits_per_byte": 0.488513837904712, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 42, "native_id": "Mercury_400877", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.871840000152588, "logits_per_token_corr": -5.871840000152588, "logits_per_char_corr": -1.9572800000508626, "bits_per_byte_corr": 2.823758149706558}, "model_output": [{"sum_logits": -1.2663320302963257, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": true, "sum_logits_uncond": -7.0804243087768555, "logits_per_token": -1.2663320302963257, "logits_per_char": -0.6331660151481628, "bits_per_byte": 0.9134654701143371, "num_chars": 2}, {"sum_logits": -5.293652057647705, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -7.217063903808594, "logits_per_token": -5.293652057647705, "logits_per_char": -2.6468260288238525, "bits_per_byte": 3.818562785882744, "num_chars": 2}, {"sum_logits": -5.871840000152588, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -8.674481391906738, "logits_per_token": -5.871840000152588, "logits_per_char": -1.9572800000508626, "bits_per_byte": 2.823758149706558, "num_chars": 3}, {"sum_logits": -9.52211856842041, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -9.025639533996582, "logits_per_token": -9.52211856842041, "logits_per_char": -3.1740395228068032, "bits_per_byte": 4.579171079142236, "num_chars": 3}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 43, "native_id": "Mercury_7174213", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.3538179397583, "logits_per_token_corr": -1.9076882771083288, "logits_per_char_corr": -0.29675150977240666, "bits_per_byte_corr": 0.42812193152527145}, "model_output": [{"sum_logits": -4.765695571899414, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -21.740312576293945, "logits_per_token": -0.9531391143798829, "logits_per_char": -0.18329598353459284, "bits_per_byte": 0.26444020646041216, "num_chars": 26}, {"sum_logits": -5.5873236656188965, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -22.318143844604492, "logits_per_token": -1.1174647331237793, "logits_per_char": -0.20693791354144062, "bits_per_byte": 0.29854830163835994, "num_chars": 27}, {"sum_logits": -17.189102172851562, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -30.808582305908203, "logits_per_token": -2.8648503621419272, "logits_per_char": -0.5055618286132812, "bits_per_byte": 0.7293715430036609, "num_chars": 34}, {"sum_logits": -13.3538179397583, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -34.082305908203125, "logits_per_token": -1.9076882771083288, "logits_per_char": -0.29675150977240666, "bits_per_byte": 0.42812193152527145, "num_chars": 45}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 44, "native_id": "NYSEDREGENTS_2008_8_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.817684173583984, "logits_per_token_corr": -1.4241871303982205, "logits_per_char_corr": -0.2248716521681401, "bits_per_byte_corr": 0.3244212174197167}, "model_output": [{"sum_logits": -13.686487197875977, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -27.246740341186523, "logits_per_token": -1.5207207997639973, "logits_per_char": -0.24011381048905223, "bits_per_byte": 0.34641100364175686, "num_chars": 57}, {"sum_logits": -12.817684173583984, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -26.5340576171875, "logits_per_token": -1.4241871303982205, "logits_per_char": -0.2248716521681401, "bits_per_byte": 0.3244212174197167, "num_chars": 57}, {"sum_logits": -20.176753997802734, "num_tokens": 8, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -32.31493377685547, "logits_per_token": -2.522094249725342, "logits_per_char": -0.3956226274078968, "bits_per_byte": 0.5707628026252448, "num_chars": 51}, {"sum_logits": -19.712697982788086, "num_tokens": 8, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -29.801342010498047, "logits_per_token": -2.4640872478485107, "logits_per_char": -0.3865234898585899, "bits_per_byte": 0.5576355220064838, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 45, "native_id": "Mercury_7212398", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.5129923820495605, "logits_per_token_corr": -3.7564961910247803, "logits_per_char_corr": -1.25216539700826, "bits_per_byte_corr": 1.8064928086378746}, "model_output": [{"sum_logits": -10.523460388183594, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.491741180419922, "logits_per_token": -5.261730194091797, "logits_per_char": -2.104692077636719, "bits_per_byte": 3.0364288230069647, "num_chars": 5}, {"sum_logits": -7.5129923820495605, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.48202896118164, "logits_per_token": -3.7564961910247803, "logits_per_char": -1.25216539700826, "bits_per_byte": 1.8064928086378746, "num_chars": 6}, {"sum_logits": -3.1072287559509277, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -17.470237731933594, "logits_per_token": -1.0357429186503093, "logits_per_char": -0.31072287559509276, "bits_per_byte": 0.4482783517121207, "num_chars": 10}, {"sum_logits": -12.471851348876953, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.490497589111328, "logits_per_token": -4.157283782958984, "logits_per_char": -0.7336383146398208, "bits_per_byte": 1.0584163583377668, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 46, "native_id": "Mercury_SC_401290", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.2642083168029785, "logits_per_token_corr": -6.2642083168029785, "logits_per_char_corr": -0.6264208316802978, "bits_per_byte_corr": 0.903734227375355}, "model_output": [{"sum_logits": -6.88906717300415, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.953829765319824, "logits_per_token": -6.88906717300415, "logits_per_char": -1.1481778621673584, "bits_per_byte": 1.6564705078085298, "num_chars": 6}, {"sum_logits": -5.680278301239014, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.68713665008545, "logits_per_token": -5.680278301239014, "logits_per_char": -0.7100347876548767, "bits_per_byte": 1.0243636670090748, "num_chars": 8}, {"sum_logits": -6.2642083168029785, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.818089485168457, "logits_per_token": -6.2642083168029785, "logits_per_char": -0.6264208316802978, "bits_per_byte": 0.903734227375355, "num_chars": 10}, {"sum_logits": -8.965742111206055, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.986641883850098, "logits_per_token": -8.965742111206055, "logits_per_char": -1.4942903518676758, "bits_per_byte": 2.1558052802892687, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 47, "native_id": "Mercury_SC_402120", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.729632377624512, "logits_per_token_corr": -4.364816188812256, "logits_per_char_corr": -0.8729632377624512, "bits_per_byte_corr": 1.2594197339991662}, "model_output": [{"sum_logits": -10.278144836425781, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.302153587341309, "logits_per_token": -10.278144836425781, "logits_per_char": -1.2847681045532227, "bits_per_byte": 1.8535285731325792, "num_chars": 8}, {"sum_logits": -9.038250923156738, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -18.184391021728516, "logits_per_token": -4.519125461578369, "logits_per_char": -0.9038250923156739, "bits_per_byte": 1.3039439785157692, "num_chars": 10}, {"sum_logits": -8.729632377624512, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.300281524658203, "logits_per_token": -4.364816188812256, "logits_per_char": -0.8729632377624512, "bits_per_byte": 1.2594197339991662, "num_chars": 10}, {"sum_logits": -3.492037057876587, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -16.698421478271484, "logits_per_token": -1.7460185289382935, "logits_per_char": -0.29100308815638226, "bits_per_byte": 0.41982871216688816, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 48, "native_id": "Mercury_184975", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.204995155334473, "logits_per_token_corr": -4.734998385111491, "logits_per_char_corr": -0.6456815979697488, "bits_per_byte_corr": 0.9315216393848872}, "model_output": [{"sum_logits": -16.217967987060547, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.575870513916016, "logits_per_token": -5.405989329020183, "logits_per_char": -0.7371803630482067, "bits_per_byte": 1.0635264540111375, "num_chars": 22}, {"sum_logits": -11.470941543579102, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -24.495838165283203, "logits_per_token": -3.8236471811930337, "logits_per_char": -0.49873658885126526, "bits_per_byte": 0.7195248034461152, "num_chars": 23}, {"sum_logits": -14.204995155334473, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.681041717529297, "logits_per_token": -4.734998385111491, "logits_per_char": -0.6456815979697488, "bits_per_byte": 0.9315216393848872, "num_chars": 22}, {"sum_logits": -13.385804176330566, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.96424674987793, "logits_per_token": -2.6771608352661134, "logits_per_char": -0.6084456443786621, "bits_per_byte": 0.8778015137962163, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 49, "native_id": "Mercury_SC_400578", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.782413482666016, "logits_per_token_corr": -3.3974876403808594, "logits_per_char_corr": -1.081018794666637, "bits_per_byte_corr": 1.5595804541744422}, "model_output": [{"sum_logits": -23.782413482666016, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -25.785552978515625, "logits_per_token": -3.3974876403808594, "logits_per_char": -1.081018794666637, "bits_per_byte": 1.5595804541744422, "num_chars": 22}, {"sum_logits": -27.66146469116211, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.576393127441406, "logits_per_token": -3.4576830863952637, "logits_per_char": -0.9879094532557896, "bits_per_byte": 1.4252520690604789, "num_chars": 28}, {"sum_logits": -25.0461483001709, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.26107406616211, "logits_per_token": -3.1307685375213623, "logits_per_char": -0.8348716100056967, "bits_per_byte": 1.2044651315350685, "num_chars": 30}, {"sum_logits": -28.11096954345703, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -34.995697021484375, "logits_per_token": -2.811096954345703, "logits_per_char": -0.739762356406764, "bits_per_byte": 1.0672514830251392, "num_chars": 38}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 50, "native_id": "MCAS_2001_8_4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.99182891845703, "logits_per_token_corr": -3.2213143242730036, "logits_per_char_corr": -0.6742285794990007, "bits_per_byte_corr": 0.9727062280695173}, "model_output": [{"sum_logits": -26.405900955200195, "num_tokens": 10, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -48.21470642089844, "logits_per_token": -2.6405900955200194, "logits_per_char": -0.5618276798978765, "bits_per_byte": 0.8105460076234008, "num_chars": 47}, {"sum_logits": -33.8827018737793, "num_tokens": 10, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -54.11217498779297, "logits_per_token": -3.3882701873779295, "logits_per_char": -0.6515904206496018, "bits_per_byte": 0.9400462685626095, "num_chars": 52}, {"sum_logits": -25.26235580444336, "num_tokens": 10, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -48.527992248535156, "logits_per_token": -2.526235580444336, "logits_per_char": -0.5262990792592367, "bits_per_byte": 0.7592890716722738, "num_chars": 48}, {"sum_logits": -28.99182891845703, "num_tokens": 9, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -51.120235443115234, "logits_per_token": -3.2213143242730036, "logits_per_char": -0.6742285794990007, "bits_per_byte": 0.9727062280695173, "num_chars": 43}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 51, "native_id": "MCAS_2003_5_33", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.550479888916016, "logits_per_token_corr": -3.221497126988002, "logits_per_char_corr": -0.7047024965286255, "bits_per_byte_corr": 1.0166707970446505}, "model_output": [{"sum_logits": -16.91519546508789, "num_tokens": 3, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -31.724632263183594, "logits_per_token": -5.63839848836263, "logits_per_char": -0.7354432810907778, "bits_per_byte": 1.0610203744855353, "num_chars": 23}, {"sum_logits": -13.827291488647461, "num_tokens": 4, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -23.320878982543945, "logits_per_token": -3.4568228721618652, "logits_per_char": -0.49383183888026644, "bits_per_byte": 0.7124487449861499, "num_chars": 28}, {"sum_logits": -22.550479888916016, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -32.612571716308594, "logits_per_token": -3.221497126988002, "logits_per_char": -0.7047024965286255, "bits_per_byte": 1.0166707970446505, "num_chars": 32}, {"sum_logits": -29.70758819580078, "num_tokens": 11, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -40.031349182128906, "logits_per_token": -2.700689835981889, "logits_per_char": -0.5401379671963779, "bits_per_byte": 0.7792543666706198, "num_chars": 55}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 52, "native_id": "Mercury_7068513", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.97393798828125, "logits_per_token_corr": -6.32464599609375, "logits_per_char_corr": -0.8624517267400568, "bits_per_byte_corr": 1.2442548291748972}, "model_output": [{"sum_logits": -18.97393798828125, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -25.706989288330078, "logits_per_token": -6.32464599609375, "logits_per_char": -0.8624517267400568, "bits_per_byte": 1.2442548291748972, "num_chars": 22}, {"sum_logits": -20.446992874145508, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -26.27315902709961, "logits_per_token": -6.815664291381836, "logits_per_char": -0.9736663273402623, "bits_per_byte": 1.4047035819353757, "num_chars": 21}, {"sum_logits": -15.25103759765625, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -27.137954711914062, "logits_per_token": -7.625518798828125, "logits_per_char": -0.8971198586856618, "bits_per_byte": 1.2942703712097419, "num_chars": 17}, {"sum_logits": -11.410557746887207, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -24.650184631347656, "logits_per_token": -3.8035192489624023, "logits_per_char": -1.0373234315352007, "bits_per_byte": 1.4965413704748314, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 53, "native_id": "AKDE&ED_2008_4_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.60049057006836, "logits_per_token_corr": -2.955610063340929, "logits_per_char_corr": -0.7600140162876674, "bits_per_byte_corr": 1.0964684523051094}, "model_output": [{"sum_logits": -28.94708251953125, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.20380401611328, "logits_per_token": -3.6183853149414062, "logits_per_char": -0.9045963287353516, "bits_per_byte": 1.3050566374737917, "num_chars": 32}, {"sum_logits": -31.54029083251953, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -41.34250259399414, "logits_per_token": -5.256715138753255, "logits_per_char": -0.9557663888642283, "bits_per_byte": 1.3788794294637654, "num_chars": 33}, {"sum_logits": -26.60049057006836, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -39.133277893066406, "logits_per_token": -2.955610063340929, "logits_per_char": -0.7600140162876674, "bits_per_byte": 1.0964684523051094, "num_chars": 35}, {"sum_logits": -21.09381866455078, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -35.51343536376953, "logits_per_token": -2.6367273330688477, "logits_per_char": -0.5701032071500212, "bits_per_byte": 0.8224850697508198, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 54, "native_id": "Mercury_7235638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.664426326751709, "logits_per_token_corr": -3.3322131633758545, "logits_per_char_corr": -0.39202507804421816, "bits_per_byte_corr": 0.5655726359989087}, "model_output": [{"sum_logits": -4.793519496917725, "num_tokens": 3, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.095630645751953, "logits_per_token": -1.5978398323059082, "logits_per_char": -0.25229049983777496, "bits_per_byte": 0.3639782529796173, "num_chars": 19}, {"sum_logits": -10.127062797546387, "num_tokens": 4, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -21.88492202758789, "logits_per_token": -2.5317656993865967, "logits_per_char": -0.5330033051340204, "bits_per_byte": 0.7689612250948307, "num_chars": 19}, {"sum_logits": -6.664426326751709, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -20.449840545654297, "logits_per_token": -3.3322131633758545, "logits_per_char": -0.39202507804421816, "bits_per_byte": 0.5655726359989087, "num_chars": 17}, {"sum_logits": -8.419965744018555, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -18.312068939208984, "logits_per_token": -4.209982872009277, "logits_per_char": -0.6476896726168119, "bits_per_byte": 0.934418678719942, "num_chars": 13}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 55, "native_id": "MDSA_2009_5_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.394514083862305, "logits_per_token_corr": -3.549314260482788, "logits_per_char_corr": -0.6603375368340071, "bits_per_byte_corr": 0.9526656897039396}, "model_output": [{"sum_logits": -22.315982818603516, "num_tokens": 10, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -32.366180419921875, "logits_per_token": -2.2315982818603515, "logits_per_char": -0.6198884116278754, "bits_per_byte": 0.8943099373607148, "num_chars": 36}, {"sum_logits": -28.394514083862305, "num_tokens": 8, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -35.80659484863281, "logits_per_token": -3.549314260482788, "logits_per_char": -0.6603375368340071, "bits_per_byte": 0.9526656897039396, "num_chars": 43}, {"sum_logits": -23.783353805541992, "num_tokens": 11, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -33.70530319213867, "logits_per_token": -2.1621230732310903, "logits_per_char": -0.4487425246328678, "bits_per_byte": 0.647398614924297, "num_chars": 53}, {"sum_logits": -41.498294830322266, "num_tokens": 16, "num_tokens_all": 250, "is_greedy": false, "sum_logits_uncond": -54.8763313293457, "logits_per_token": -2.5936434268951416, "logits_per_char": -0.5187286853790283, "bits_per_byte": 0.748367301963713, "num_chars": 80}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 56, "native_id": "Mercury_178325", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.037837982177734, "logits_per_token_corr": -3.518918991088867, "logits_per_char_corr": -0.8797297477722168, "bits_per_byte_corr": 1.2691817444343876}, "model_output": [{"sum_logits": -9.723987579345703, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -13.390792846679688, "logits_per_token": -4.861993789672852, "logits_per_char": -1.6206645965576172, "bits_per_byte": 2.338124776399667, "num_chars": 6}, {"sum_logits": -8.990615844726562, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -16.039546966552734, "logits_per_token": -4.495307922363281, "logits_per_char": -0.8990615844726563, "bits_per_byte": 1.2970716893734069, "num_chars": 10}, {"sum_logits": -7.037837982177734, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -15.207113265991211, "logits_per_token": -3.518918991088867, "logits_per_char": -0.8797297477722168, "bits_per_byte": 1.2691817444343876, "num_chars": 8}, {"sum_logits": -4.99616813659668, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -15.746980667114258, "logits_per_token": -2.49808406829834, "logits_per_char": -0.624521017074585, "bits_per_byte": 0.9009933742650826, "num_chars": 8}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 57, "native_id": "Mercury_7212678", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.893017768859863, "logits_per_token_corr": -3.1786035537719726, "logits_per_char_corr": -0.4966568052768707, "bits_per_byte_corr": 0.7165243099972117}, "model_output": [{"sum_logits": -15.893017768859863, "num_tokens": 5, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -25.672618865966797, "logits_per_token": -3.1786035537719726, "logits_per_char": -0.4966568052768707, "bits_per_byte": 0.7165243099972117, "num_chars": 32}, {"sum_logits": -18.49870491027832, "num_tokens": 7, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -27.322357177734375, "logits_per_token": -2.64267213003976, "logits_per_char": -0.47432576693021333, "bits_per_byte": 0.6843074317165647, "num_chars": 39}, {"sum_logits": -21.081165313720703, "num_tokens": 5, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -27.549089431762695, "logits_per_token": -4.2162330627441404, "logits_per_char": -0.8783818880716959, "bits_per_byte": 1.2672371939286307, "num_chars": 24}, {"sum_logits": -30.962203979492188, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -44.9049072265625, "logits_per_token": -3.8702754974365234, "logits_per_char": -0.8368163237700591, "bits_per_byte": 1.2072707604388648, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 58, "native_id": "TAKS_2009_8_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.616048812866211, "logits_per_token_corr": -4.3080244064331055, "logits_per_char_corr": -0.718004067738851, "bits_per_byte_corr": 1.0358609078656877}, "model_output": [{"sum_logits": -8.616048812866211, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.673465728759766, "logits_per_token": -4.3080244064331055, "logits_per_char": -0.718004067738851, "bits_per_byte": 1.0358609078656877, "num_chars": 12}, {"sum_logits": -7.09998083114624, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.242633819580078, "logits_per_token": -3.54999041557312, "logits_per_char": -0.709998083114624, "bits_per_byte": 1.024310713550874, "num_chars": 10}, {"sum_logits": -8.554243087768555, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -14.550726890563965, "logits_per_token": -4.277121543884277, "logits_per_char": -0.7776584625244141, "bits_per_byte": 1.121924007390114, "num_chars": 11}, {"sum_logits": -11.282556533813477, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.438355445861816, "logits_per_token": -5.641278266906738, "logits_per_char": -0.7051597833633423, "bits_per_byte": 1.0173305224933606, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 59, "native_id": "Mercury_412681", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.63336944580078, "logits_per_token_corr": -1.157972584600034, "logits_per_char_corr": -0.36484067733973674, "bits_per_byte_corr": 0.5263538359129868}, "model_output": [{"sum_logits": -26.740251541137695, "num_tokens": 24, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -64.19469451904297, "logits_per_token": -1.114177147547404, "logits_per_char": -0.3713923825158013, "bits_per_byte": 0.5358059484798684, "num_chars": 72}, {"sum_logits": -29.968456268310547, "num_tokens": 23, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -68.68914794921875, "logits_per_token": -1.302976359491763, "logits_per_char": -0.4105267981960349, "bits_per_byte": 0.5922649759098693, "num_chars": 73}, {"sum_logits": -26.63336944580078, "num_tokens": 23, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -54.905418395996094, "logits_per_token": -1.157972584600034, "logits_per_char": -0.36484067733973674, "bits_per_byte": 0.5263538359129868, "num_chars": 73}, {"sum_logits": -20.71044921875, "num_tokens": 22, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -53.108970642089844, "logits_per_token": -0.9413840553977273, "logits_per_char": -0.2798709353885135, "bits_per_byte": 0.403768410574254, "num_chars": 74}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 60, "native_id": "Mercury_400440", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.651640892028809, "logits_per_token_corr": -1.6645201274326868, "logits_per_char_corr": -0.8322600637163434, "bits_per_byte_corr": 1.200697466654364}, "model_output": [{"sum_logits": -17.64615249633789, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.904462814331055, "logits_per_token": -2.941025416056315, "logits_per_char": -1.4705127080281575, "bits_per_byte": 2.1215013914379472, "num_chars": 12}, {"sum_logits": -12.362178802490234, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.161197662353516, "logits_per_token": -3.0905447006225586, "logits_per_char": -1.2362178802490233, "bits_per_byte": 1.7834854052948137, "num_chars": 10}, {"sum_logits": -5.730216979980469, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.494260787963867, "logits_per_token": -1.4325542449951172, "logits_per_char": -0.5730216979980469, "bits_per_byte": 0.8266955620241495, "num_chars": 10}, {"sum_logits": -11.651640892028809, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.329086303710938, "logits_per_token": -1.6645201274326868, "logits_per_char": -0.8322600637163434, "bits_per_byte": 1.200697466654364, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 61, "native_id": "Mercury_SC_416529", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.884638786315918, "logits_per_token_corr": -2.6282129287719727, "logits_per_char_corr": -0.3942319393157959, "bits_per_byte_corr": 0.5687564638113461}, "model_output": [{"sum_logits": -14.603615760803223, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -24.16238021850586, "logits_per_token": -4.867871920267741, "logits_per_char": -0.859036221223719, "bits_per_byte": 1.2393272963043442, "num_chars": 17}, {"sum_logits": -7.884638786315918, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -24.503755569458008, "logits_per_token": -2.6282129287719727, "logits_per_char": -0.3942319393157959, "bits_per_byte": 0.5687564638113461, "num_chars": 20}, {"sum_logits": -8.633441925048828, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -28.682437896728516, "logits_per_token": -2.158360481262207, "logits_per_char": -0.4543916802657278, "bits_per_byte": 0.65554862374104, "num_chars": 19}, {"sum_logits": -10.879344940185547, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -21.631074905395508, "logits_per_token": -3.626448313395182, "logits_per_char": -0.639961467069738, "bits_per_byte": 0.9232692349022, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 62, "native_id": "MCAS_2006_8_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.0081536769866943, "logits_per_token_corr": -2.0081536769866943, "logits_per_char_corr": -0.1673461397488912, "bits_per_byte_corr": 0.24142944592781024}, "model_output": [{"sum_logits": -5.461014270782471, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.082311630249023, "logits_per_token": -2.7305071353912354, "logits_per_char": -0.4550845225652059, "bits_per_byte": 0.6565481838906158, "num_chars": 12}, {"sum_logits": -6.867293357849121, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -14.774696350097656, "logits_per_token": -6.867293357849121, "logits_per_char": -0.6867293357849121, "bits_per_byte": 0.9907410071705762, "num_chars": 10}, {"sum_logits": -8.161394119262695, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -17.389202117919922, "logits_per_token": -2.7204647064208984, "logits_per_char": -0.8161394119262695, "bits_per_byte": 1.1774402822609098, "num_chars": 10}, {"sum_logits": -2.0081536769866943, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -14.805025100708008, "logits_per_token": -2.0081536769866943, "logits_per_char": -0.1673461397488912, "bits_per_byte": 0.24142944592781024, "num_chars": 12}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 63, "native_id": "TIMSS_2003_8_pg80", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.390524864196777, "logits_per_token_corr": -2.6952624320983887, "logits_per_char_corr": -1.0781049728393555, "bits_per_byte_corr": 1.555376697874186}, "model_output": [{"sum_logits": -6.900800704956055, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.98502254486084, "logits_per_token": -6.900800704956055, "logits_per_char": -0.9858286721365792, "bits_per_byte": 1.4222501364586164, "num_chars": 7}, {"sum_logits": -5.87127685546875, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.046712875366211, "logits_per_token": -5.87127685546875, "logits_per_char": -0.978546142578125, "bits_per_byte": 1.4117436671794996, "num_chars": 6}, {"sum_logits": -5.390524864196777, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.303171157836914, "logits_per_token": -2.6952624320983887, "logits_per_char": -1.0781049728393555, "bits_per_byte": 1.555376697874186, "num_chars": 5}, {"sum_logits": -5.571140289306641, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.759955406188965, "logits_per_token": -5.571140289306641, "logits_per_char": -1.1142280578613282, "bits_per_byte": 1.607491293497034, "num_chars": 5}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 64, "native_id": "Mercury_416645", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.851741790771484, "logits_per_token_corr": -2.4814677238464355, "logits_per_char_corr": -0.4962935447692871, "bits_per_byte_corr": 0.7160002358643697}, "model_output": [{"sum_logits": -13.655404090881348, "num_tokens": 8, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -25.185606002807617, "logits_per_token": -1.7069255113601685, "logits_per_char": -0.41380012396610144, "bits_per_byte": 0.5969873867655617, "num_chars": 33}, {"sum_logits": -17.730587005615234, "num_tokens": 7, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -32.3696403503418, "logits_per_token": -2.532941000802176, "logits_per_char": -0.5540808439254761, "bits_per_byte": 0.7993696857834304, "num_chars": 32}, {"sum_logits": -12.475701332092285, "num_tokens": 8, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -22.973995208740234, "logits_per_token": -1.5594626665115356, "logits_per_char": -0.32830792979190226, "bits_per_byte": 0.4736482221956396, "num_chars": 38}, {"sum_logits": -19.851741790771484, "num_tokens": 8, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -36.191368103027344, "logits_per_token": -2.4814677238464355, "logits_per_char": -0.4962935447692871, "bits_per_byte": 0.7160002358643697, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 65, "native_id": "Mercury_406777", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.50916862487793, "logits_per_token_corr": -2.938646078109741, "logits_per_char_corr": -0.6186623322336298, "bits_per_byte_corr": 0.8925410786988992}, "model_output": [{"sum_logits": -23.50916862487793, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -39.090476989746094, "logits_per_token": -2.938646078109741, "logits_per_char": -0.6186623322336298, "bits_per_byte": 0.8925410786988992, "num_chars": 38}, {"sum_logits": -21.28201675415039, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -33.29152297973633, "logits_per_token": -3.5470027923583984, "logits_per_char": -0.608057621547154, "bits_per_byte": 0.8772417151814474, "num_chars": 35}, {"sum_logits": -24.326021194458008, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -33.27567672729492, "logits_per_token": -3.4751458849225725, "logits_per_char": -0.5791909808204287, "bits_per_byte": 0.8355959557578476, "num_chars": 42}, {"sum_logits": -25.0775089263916, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -34.30259323120117, "logits_per_token": -3.5825012751988004, "logits_per_char": -0.6430130493946564, "bits_per_byte": 0.9276717375892274, "num_chars": 39}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 66, "native_id": "Mercury_LBS11018", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.253894805908203, "logits_per_token_corr": -1.5685358914462002, "logits_per_char_corr": -0.3594561417897542, "bits_per_byte_corr": 0.5185855931775311}, "model_output": [{"sum_logits": -17.253894805908203, "num_tokens": 11, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -47.84928512573242, "logits_per_token": -1.5685358914462002, "logits_per_char": -0.3594561417897542, "bits_per_byte": 0.5185855931775311, "num_chars": 48}, {"sum_logits": -15.255148887634277, "num_tokens": 10, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -46.0528564453125, "logits_per_token": -1.5255148887634278, "logits_per_char": -0.3178156018257141, "bits_per_byte": 0.45851099267142853, "num_chars": 48}, {"sum_logits": -11.029729843139648, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -34.94002151489258, "logits_per_token": -2.2059459686279297, "logits_per_char": -0.648807637831744, "bits_per_byte": 0.9360315615914121, "num_chars": 17}, {"sum_logits": -10.844376564025879, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -35.29957962036133, "logits_per_token": -2.168875312805176, "logits_per_char": -0.6379045037662282, "bits_per_byte": 0.9203016641449336, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 67, "native_id": "Mercury_7139878", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.878360748291016, "logits_per_token_corr": -3.469590187072754, "logits_per_char_corr": -0.7710200415717231, "bits_per_byte_corr": 1.1123467904023265}, "model_output": [{"sum_logits": -19.514625549316406, "num_tokens": 4, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -27.381755828857422, "logits_per_token": -4.878656387329102, "logits_per_char": -1.0841458638509114, "bits_per_byte": 1.5640918613791148, "num_chars": 18}, {"sum_logits": -11.613293647766113, "num_tokens": 2, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -22.97757911682129, "logits_per_token": -5.806646823883057, "logits_per_char": -0.6451829804314507, "bits_per_byte": 0.9308022863350838, "num_chars": 18}, {"sum_logits": -13.878360748291016, "num_tokens": 4, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -27.664875030517578, "logits_per_token": -3.469590187072754, "logits_per_char": -0.7710200415717231, "bits_per_byte": 1.1123467904023265, "num_chars": 18}, {"sum_logits": -11.991239547729492, "num_tokens": 3, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -23.89813232421875, "logits_per_token": -3.997079849243164, "logits_per_char": -0.5995619773864747, "bits_per_byte": 0.8649850914816692, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 68, "native_id": "Mercury_417147", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.323319435119629, "logits_per_token_corr": -2.264663887023926, "logits_per_char_corr": -0.539205687386649, "bits_per_byte_corr": 0.777909371212402}, "model_output": [{"sum_logits": -12.084738731384277, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.099205017089844, "logits_per_token": -3.0211846828460693, "logits_per_char": -0.8631956236703056, "bits_per_byte": 1.2453280455871005, "num_chars": 14}, {"sum_logits": -11.323319435119629, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.13888168334961, "logits_per_token": -2.264663887023926, "logits_per_char": -0.539205687386649, "bits_per_byte": 0.777909371212402, "num_chars": 21}, {"sum_logits": -10.741567611694336, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -23.905166625976562, "logits_per_token": -3.5805225372314453, "logits_per_char": -0.5370783805847168, "bits_per_byte": 0.7748403162388029, "num_chars": 20}, {"sum_logits": -15.61973762512207, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -34.19613265991211, "logits_per_token": -2.6032896041870117, "logits_per_char": -0.5578477723257882, "bits_per_byte": 0.8048042147059483, "num_chars": 28}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 69, "native_id": "Mercury_7016765", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.3793306350708, "logits_per_token_corr": -1.9113329478672572, "logits_per_char_corr": -0.4778332369668143, "bits_per_byte_corr": 0.6893676413444392}, "model_output": [{"sum_logits": -13.3793306350708, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -23.04439926147461, "logits_per_token": -1.9113329478672572, "logits_per_char": -0.4778332369668143, "bits_per_byte": 0.6893676413444392, "num_chars": 28}, {"sum_logits": -25.898256301879883, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -41.18195724487305, "logits_per_token": -4.3163760503133135, "logits_per_char": -0.7399501800537109, "bits_per_byte": 1.0675224552691513, "num_chars": 35}, {"sum_logits": -25.02617073059082, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -33.0743293762207, "logits_per_token": -4.17102845509847, "logits_per_char": -0.7360638450173771, "bits_per_byte": 1.0619156589849954, "num_chars": 34}, {"sum_logits": -19.870559692382812, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.048006057739258, "logits_per_token": -2.4838199615478516, "logits_per_char": -0.5370421538481841, "bits_per_byte": 0.7747880521056596, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 70, "native_id": "Mercury_415303", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.9397292137145996, "logits_per_token_corr": -3.9397292137145996, "logits_per_char_corr": -1.9698646068572998, "bits_per_byte_corr": 2.8419138995377557}, "model_output": [{"sum_logits": -3.9397292137145996, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -3.9397292137145996, "logits_per_char": -1.9698646068572998, "bits_per_byte": 2.8419138995377557, "num_chars": 2}, {"sum_logits": -4.556565761566162, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -6.575865745544434, "logits_per_token": -4.556565761566162, "logits_per_char": -2.278282880783081, "bits_per_byte": 3.286867413850334, "num_chars": 2}, {"sum_logits": -5.162064075469971, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -6.3205461502075195, "logits_per_token": -5.162064075469971, "logits_per_char": -2.5810320377349854, "bits_per_byte": 3.7236421212184743, "num_chars": 2}, {"sum_logits": -4.01470422744751, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -5.19705867767334, "logits_per_token": -4.01470422744751, "logits_per_char": -2.007352113723755, "bits_per_byte": 2.8959969397893204, "num_chars": 2}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 71, "native_id": "Mercury_7215845", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.62840747833252, "logits_per_token_corr": -2.90710186958313, "logits_per_char_corr": -0.6120214462280273, "bits_per_byte_corr": 0.8829603053915008}, "model_output": [{"sum_logits": -13.224583625793457, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.947711944580078, "logits_per_token": -4.408194541931152, "logits_per_char": -0.8816389083862305, "bits_per_byte": 1.2719360809844877, "num_chars": 15}, {"sum_logits": -13.41884708404541, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.188812255859375, "logits_per_token": -4.472949028015137, "logits_per_char": -0.8386779427528381, "bits_per_byte": 1.2099565089133468, "num_chars": 16}, {"sum_logits": -11.62840747833252, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -22.517019271850586, "logits_per_token": -2.90710186958313, "logits_per_char": -0.6120214462280273, "bits_per_byte": 0.8829603053915008, "num_chars": 19}, {"sum_logits": -5.864645004272461, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.272117614746094, "logits_per_token": -1.4661612510681152, "logits_per_char": -0.3086655265406558, "bits_per_byte": 0.4453102244339048, "num_chars": 19}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 72, "native_id": "Mercury_7136885", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.474227905273438, "logits_per_token_corr": -2.4963182721819197, "logits_per_char_corr": -0.5295220577355587, "bits_per_byte_corr": 0.7639388467369588}, "model_output": [{"sum_logits": -12.091885566711426, "num_tokens": 5, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -25.467113494873047, "logits_per_token": -2.4183771133422853, "logits_per_char": -0.5496311621232466, "bits_per_byte": 0.7929501519138153, "num_chars": 22}, {"sum_logits": -17.474227905273438, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -37.03956985473633, "logits_per_token": -2.4963182721819197, "logits_per_char": -0.5295220577355587, "bits_per_byte": 0.7639388467369588, "num_chars": 33}, {"sum_logits": -14.526322364807129, "num_tokens": 6, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -32.3619384765625, "logits_per_token": -2.421053727467855, "logits_per_char": -0.3926033071569494, "bits_per_byte": 0.5664068442723443, "num_chars": 37}, {"sum_logits": -16.183815002441406, "num_tokens": 8, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -34.93296813964844, "logits_per_token": -2.022976875305176, "logits_per_char": -0.3853289286295573, "bits_per_byte": 0.5559121344453191, "num_chars": 42}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 73, "native_id": "Mercury_SC_400059", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.496575355529785, "logits_per_token_corr": -1.899315071105957, "logits_per_char_corr": -0.3798630142211914, "bits_per_byte_corr": 0.5480264868344403}, "model_output": [{"sum_logits": -9.736169815063477, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -25.05889320373535, "logits_per_token": -3.245389938354492, "logits_per_char": -0.6954407010759626, "bits_per_byte": 1.003308850675356, "num_chars": 14}, {"sum_logits": -12.861698150634766, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -25.143259048461914, "logits_per_token": -4.287232716878255, "logits_per_char": -0.9186927250453404, "bits_per_byte": 1.3253934385246329, "num_chars": 14}, {"sum_logits": -17.07952117919922, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -31.096485137939453, "logits_per_token": -3.415904235839844, "logits_per_char": -0.6831808471679688, "bits_per_byte": 0.9856216202402575, "num_chars": 25}, {"sum_logits": -9.496575355529785, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.827247619628906, "logits_per_token": -1.899315071105957, "logits_per_char": -0.3798630142211914, "bits_per_byte": 0.5480264868344403, "num_chars": 25}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 74, "native_id": "Mercury_7044328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.946895599365234, "logits_per_token_corr": -3.1183619499206543, "logits_per_char_corr": -0.5091203183543925, "bits_per_byte_corr": 0.7345053585062201}, "model_output": [{"sum_logits": -20.333391189575195, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.49129867553711, "logits_per_token": -3.3888985315958657, "logits_per_char": -0.5809540339878627, "bits_per_byte": 0.83813950381933, "num_chars": 35}, {"sum_logits": -16.38726806640625, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -24.45665740966797, "logits_per_token": -3.27745361328125, "logits_per_char": -0.4552018907335069, "bits_per_byte": 0.656717510364982, "num_chars": 36}, {"sum_logits": -22.943845748901367, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.650428771972656, "logits_per_token": -2.867980718612671, "logits_per_char": -0.45887691497802735, "bits_per_byte": 0.6620194496177022, "num_chars": 50}, {"sum_logits": -24.946895599365234, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.22669982910156, "logits_per_token": -3.1183619499206543, "logits_per_char": -0.5091203183543925, "bits_per_byte": 0.7345053585062201, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 75, "native_id": "MEA_2010_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.15511703491211, "logits_per_token_corr": -4.051705678304036, "logits_per_char_corr": -0.9350090026855469, "bits_per_byte_corr": 1.348932851361943}, "model_output": [{"sum_logits": -12.15511703491211, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -20.506973266601562, "logits_per_token": -4.051705678304036, "logits_per_char": -0.9350090026855469, "bits_per_byte": 1.348932851361943, "num_chars": 13}, {"sum_logits": -7.081275939941406, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.57065773010254, "logits_per_token": -1.4162551879882812, "logits_per_char": -0.337203616187686, "bits_per_byte": 0.4864819848441495, "num_chars": 21}, {"sum_logits": -13.395231246948242, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -28.48578453063965, "logits_per_token": -1.4883590274386935, "logits_per_char": -0.3720897568596734, "bits_per_byte": 0.5368120469874166, "num_chars": 36}, {"sum_logits": -17.593555450439453, "num_tokens": 12, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.20307540893555, "logits_per_token": -1.4661296208699544, "logits_per_char": -0.399853532964533, "bits_per_byte": 0.5768667090902779, "num_chars": 44}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 76, "native_id": "Mercury_414099", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.5111026763916, "logits_per_token_corr": -2.68888783454895, "logits_per_char_corr": -0.5002582017765489, "bits_per_byte_corr": 0.721720026867576}, "model_output": [{"sum_logits": -10.842877388000488, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -32.27641296386719, "logits_per_token": -1.5489824840000697, "logits_per_char": -0.3011910385555691, "bits_per_byte": 0.4345268176846283, "num_chars": 36}, {"sum_logits": -21.5111026763916, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -45.255924224853516, "logits_per_token": -2.68888783454895, "logits_per_char": -0.5002582017765489, "bits_per_byte": 0.721720026867576, "num_chars": 43}, {"sum_logits": -22.543136596679688, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -46.94055938720703, "logits_per_token": -3.2204480852399553, "logits_per_char": -0.5242589906204579, "bits_per_byte": 0.7563458459101315, "num_chars": 43}, {"sum_logits": -18.73607635498047, "num_tokens": 10, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -38.64329528808594, "logits_per_token": -1.8736076354980469, "logits_per_char": -0.3903349240620931, "bits_per_byte": 0.5631342592305564, "num_chars": 48}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 77, "native_id": "Mercury_410807", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.17453384399414, "logits_per_token_corr": -2.1067758039994673, "logits_per_char_corr": -0.37991039088514983, "bits_per_byte_corr": 0.5480948369125871}, "model_output": [{"sum_logits": -16.747600555419922, "num_tokens": 8, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -43.501827239990234, "logits_per_token": -2.0934500694274902, "logits_per_char": -0.42942565526717746, "bits_per_byte": 0.6195302632848956, "num_chars": 39}, {"sum_logits": -23.17453384399414, "num_tokens": 11, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -38.858192443847656, "logits_per_token": -2.1067758039994673, "logits_per_char": -0.37991039088514983, "bits_per_byte": 0.5480948369125871, "num_chars": 61}, {"sum_logits": -18.615402221679688, "num_tokens": 10, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -33.28993606567383, "logits_per_token": -1.8615402221679687, "logits_per_char": -0.35798850426307094, "bits_per_byte": 0.516468239795961, "num_chars": 52}, {"sum_logits": -19.790563583374023, "num_tokens": 11, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -44.22532272338867, "logits_per_token": -1.799142143943093, "logits_per_char": -0.3598284287886186, "bits_per_byte": 0.5191226897845805, "num_chars": 55}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 78, "native_id": "Mercury_403234", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.943378448486328, "logits_per_token_corr": -3.1347683497837613, "logits_per_char_corr": -0.5352043524021055, "bits_per_byte_corr": 0.7721366650732615}, "model_output": [{"sum_logits": -22.233837127685547, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.32525634765625, "logits_per_token": -2.2233837127685545, "logits_per_char": -0.4730603644188414, "bits_per_byte": 0.6824818417886787, "num_chars": 47}, {"sum_logits": -21.943378448486328, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -29.181997299194336, "logits_per_token": -3.1347683497837613, "logits_per_char": -0.5352043524021055, "bits_per_byte": 0.7721366650732615, "num_chars": 41}, {"sum_logits": -16.24163055419922, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -28.407751083374023, "logits_per_token": -2.0302038192749023, "logits_per_char": -0.4640465872628348, "bits_per_byte": 0.6694777101860204, "num_chars": 35}, {"sum_logits": -20.57232666015625, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -33.65513610839844, "logits_per_token": -2.057232666015625, "logits_per_char": -0.39562166654146635, "bits_per_byte": 0.5707614163880107, "num_chars": 52}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 79, "native_id": "Mercury_7011323", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.65982723236084, "logits_per_token_corr": -4.32991361618042, "logits_per_char_corr": -0.8659827232360839, "bits_per_byte_corr": 1.2493489803091156}, "model_output": [{"sum_logits": -8.65982723236084, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.08713150024414, "logits_per_token": -4.32991361618042, "logits_per_char": -0.8659827232360839, "bits_per_byte": 1.2493489803091156, "num_chars": 10}, {"sum_logits": -10.088163375854492, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.795501708984375, "logits_per_token": -5.044081687927246, "logits_per_char": -0.9171057614413175, "bits_per_byte": 1.323103934003036, "num_chars": 11}, {"sum_logits": -8.990238189697266, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -14.474889755249023, "logits_per_token": -8.990238189697266, "logits_per_char": -0.8172943808815696, "bits_per_byte": 1.1791065502451032, "num_chars": 11}, {"sum_logits": -12.699324607849121, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -19.491352081298828, "logits_per_token": -4.233108202616374, "logits_per_char": -0.8466216405232747, "bits_per_byte": 1.2214168422930847, "num_chars": 15}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 80, "native_id": "Mercury_7109463", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.727275848388672, "logits_per_token_corr": -2.431818962097168, "logits_per_char_corr": -0.37412599416879505, "bits_per_byte_corr": 0.5397497164553616}, "model_output": [{"sum_logits": -16.46012306213379, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -23.010231018066406, "logits_per_token": -5.48670768737793, "logits_per_char": -0.9144512812296549, "bits_per_byte": 1.3192743285655297, "num_chars": 18}, {"sum_logits": -10.090192794799805, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -24.57965660095215, "logits_per_token": -3.3633975982666016, "logits_per_char": -0.43870403455651324, "bits_per_byte": 0.6329161350731168, "num_chars": 23}, {"sum_logits": -8.779983520507812, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.226089477539062, "logits_per_token": -2.194995880126953, "logits_per_char": -0.3511993408203125, "bits_per_byte": 0.5066735473653018, "num_chars": 25}, {"sum_logits": -9.727275848388672, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.839561462402344, "logits_per_token": -2.431818962097168, "logits_per_char": -0.37412599416879505, "bits_per_byte": 0.5397497164553616, "num_chars": 26}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 81, "native_id": "Mercury_SC_401277", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.12551212310791, "logits_per_token_corr": -2.562756061553955, "logits_per_char_corr": -0.3203445076942444, "bits_per_byte_corr": 0.4621594326268348}, "model_output": [{"sum_logits": -9.471467018127441, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -24.231689453125, "logits_per_token": -2.3678667545318604, "logits_per_char": -0.37885868072509765, "bits_per_byte": 0.5465775398802262, "num_chars": 25}, {"sum_logits": -7.331517696380615, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -22.698070526123047, "logits_per_token": -3.6657588481903076, "logits_per_char": -0.43126574684591856, "bits_per_byte": 0.6221849542803288, "num_chars": 17}, {"sum_logits": -5.12551212310791, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -25.948286056518555, "logits_per_token": -2.562756061553955, "logits_per_char": -0.3203445076942444, "bits_per_byte": 0.4621594326268348, "num_chars": 16}, {"sum_logits": -7.953770637512207, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -28.758155822753906, "logits_per_token": -3.9768853187561035, "logits_per_char": -0.4186195072374846, "bits_per_byte": 0.6039402871113344, "num_chars": 19}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 82, "native_id": "MCAS_2005_5_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.040339469909668, "logits_per_token_corr": -3.3467798233032227, "logits_per_char_corr": -1.1155932744344075, "bits_per_byte_corr": 1.6094608846767564}, "model_output": [{"sum_logits": -11.427751541137695, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -17.49311065673828, "logits_per_token": -3.809250513712565, "logits_per_char": -1.6325359344482422, "bits_per_byte": 2.3552514967032008, "num_chars": 7}, {"sum_logits": -7.974546909332275, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -17.326549530029297, "logits_per_token": -2.6581823031107583, "logits_per_char": -0.7974546909332275, "bits_per_byte": 1.1504839279438348, "num_chars": 10}, {"sum_logits": -10.040339469909668, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -17.53607177734375, "logits_per_token": -3.3467798233032227, "logits_per_char": -1.1155932744344075, "bits_per_byte": 1.6094608846767564, "num_chars": 9}, {"sum_logits": -13.349164962768555, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -20.60080337524414, "logits_per_token": -4.449721654256185, "logits_per_char": -1.4832405514187283, "bits_per_byte": 2.139863787978748, "num_chars": 9}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 83, "native_id": "Mercury_SC_401272", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.439719200134277, "logits_per_token_corr": -2.1099298000335693, "logits_per_char_corr": -0.3669443130493164, "bits_per_byte_corr": 0.5293887407190364}, "model_output": [{"sum_logits": -15.475858688354492, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -19.963598251342773, "logits_per_token": -5.158619562784831, "logits_per_char": -1.190450668334961, "bits_per_byte": 1.7174572756310342, "num_chars": 13}, {"sum_logits": -17.88964080810547, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -24.780845642089844, "logits_per_token": -5.963213602701823, "logits_per_char": -1.1926427205403647, "bits_per_byte": 1.7206197384771422, "num_chars": 15}, {"sum_logits": -13.575145721435547, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -20.51348876953125, "logits_per_token": -4.525048573811849, "logits_per_char": -0.7985379836138558, "bits_per_byte": 1.1520467889220096, "num_chars": 17}, {"sum_logits": -8.439719200134277, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.012863159179688, "logits_per_token": -2.1099298000335693, "logits_per_char": -0.3669443130493164, "bits_per_byte": 0.5293887407190364, "num_chars": 23}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 84, "native_id": "Mercury_7103600", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.350514888763428, "logits_per_token_corr": -2.116838296254476, "logits_per_char_corr": -0.3342376257243909, "bits_per_byte_corr": 0.48220296511142663}, "model_output": [{"sum_logits": -11.839873313903809, "num_tokens": 3, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -19.769004821777344, "logits_per_token": -3.946624437967936, "logits_per_char": -0.8457052367074149, "bits_per_byte": 1.220094751052491, "num_chars": 14}, {"sum_logits": -11.682680130004883, "num_tokens": 3, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -21.854450225830078, "logits_per_token": -3.8942267100016275, "logits_per_char": -0.7301675081253052, "bits_per_byte": 1.0534090429913865, "num_chars": 16}, {"sum_logits": -3.5718436241149902, "num_tokens": 3, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -22.445871353149414, "logits_per_token": -1.1906145413716633, "logits_per_char": -0.17008779162452334, "bits_per_byte": 0.24538481349263147, "num_chars": 21}, {"sum_logits": -6.350514888763428, "num_tokens": 3, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -21.083728790283203, "logits_per_token": -2.116838296254476, "logits_per_char": -0.3342376257243909, "bits_per_byte": 0.48220296511142663, "num_chars": 19}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 85, "native_id": "MDSA_2009_8_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.2825865745544434, "logits_per_token_corr": -1.6412932872772217, "logits_per_char_corr": -0.3647318416171604, "bits_per_byte_corr": 0.5261968191557541}, "model_output": [{"sum_logits": -3.2825865745544434, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -11.705997467041016, "logits_per_token": -1.6412932872772217, "logits_per_char": -0.3647318416171604, "bits_per_byte": 0.5261968191557541, "num_chars": 9}, {"sum_logits": -6.144822120666504, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -12.234697341918945, "logits_per_token": -3.072411060333252, "logits_per_char": -0.768102765083313, "bits_per_byte": 1.1081380500795923, "num_chars": 8}, {"sum_logits": -11.93793773651123, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -14.167972564697266, "logits_per_token": -5.968968868255615, "logits_per_char": -1.3264375262790256, "bits_per_byte": 1.913644841213149, "num_chars": 9}, {"sum_logits": -5.019430160522461, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -10.506362915039062, "logits_per_token": -2.5097150802612305, "logits_per_char": -0.7170614515032087, "bits_per_byte": 1.0345010000970642, "num_chars": 7}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 86, "native_id": "Mercury_7127943", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.388626098632812, "logits_per_token_corr": -2.6388626098632812, "logits_per_char_corr": -0.43981043497721356, "bits_per_byte_corr": 0.6345123334732997}, "model_output": [{"sum_logits": -26.388626098632812, "num_tokens": 10, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -43.859798431396484, "logits_per_token": -2.6388626098632812, "logits_per_char": -0.43981043497721356, "bits_per_byte": 0.6345123334732997, "num_chars": 60}, {"sum_logits": -41.48644256591797, "num_tokens": 8, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -50.494102478027344, "logits_per_token": -5.185805320739746, "logits_per_char": -0.9219209459092882, "bits_per_byte": 1.330050776755948, "num_chars": 45}, {"sum_logits": -24.6087589263916, "num_tokens": 8, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -29.165386199951172, "logits_per_token": -3.07609486579895, "logits_per_char": -0.5722967192184093, "bits_per_byte": 0.8256496387340158, "num_chars": 43}, {"sum_logits": -29.239498138427734, "num_tokens": 10, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -50.766014099121094, "logits_per_token": -2.9239498138427735, "logits_per_char": -0.4641190180702815, "bits_per_byte": 0.6695822057527313, "num_chars": 63}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 87, "native_id": "ACTAAP_2009_7_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -48.11369705200195, "logits_per_token_corr": -2.091899871826172, "logits_per_char_corr": -0.4811369705200195, "bits_per_byte_corr": 0.6941339213580703}, "model_output": [{"sum_logits": -36.357933044433594, "num_tokens": 15, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -34.399009704589844, "logits_per_token": -2.4238622029622396, "logits_per_char": -0.5049712922837999, "bits_per_byte": 0.7285195791696528, "num_chars": 72}, {"sum_logits": -37.00074768066406, "num_tokens": 14, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -40.337039947509766, "logits_per_token": -2.6429105486188615, "logits_per_char": -0.48052919065797484, "bits_per_byte": 0.6932570803651456, "num_chars": 77}, {"sum_logits": -57.32040786743164, "num_tokens": 16, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -47.30369567871094, "logits_per_token": -3.5825254917144775, "logits_per_char": -0.6823858079456148, "bits_per_byte": 0.9844746210968544, "num_chars": 84}, {"sum_logits": -48.11369705200195, "num_tokens": 23, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -47.04694747924805, "logits_per_token": -2.091899871826172, "logits_per_char": -0.4811369705200195, "bits_per_byte": 0.6941339213580703, "num_chars": 100}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 88, "native_id": "MCAS_2006_9_43", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.481903076171875, "logits_per_token_corr": -2.0602378845214844, "logits_per_char_corr": -1.2678386981670673, "bits_per_byte_corr": 1.8291046024940614}, "model_output": [{"sum_logits": -15.599346160888672, "num_tokens": 8, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -32.836402893066406, "logits_per_token": -1.949918270111084, "logits_per_char": -1.0399564107259114, "bits_per_byte": 1.5003399564960362, "num_chars": 15}, {"sum_logits": -16.541439056396484, "num_tokens": 8, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -33.78506088256836, "logits_per_token": -2.0676798820495605, "logits_per_char": -1.2724183889535756, "bits_per_byte": 1.8357116996805665, "num_chars": 13}, {"sum_logits": -16.481903076171875, "num_tokens": 8, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -32.25706100463867, "logits_per_token": -2.0602378845214844, "logits_per_char": -1.2678386981670673, "bits_per_byte": 1.8291046024940614, "num_chars": 13}, {"sum_logits": -17.860191345214844, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.317684173583984, "logits_per_token": -2.976698557535807, "logits_per_char": -1.3738608727088342, "bits_per_byte": 1.9820622679298425, "num_chars": 13}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 89, "native_id": "Mercury_7252088", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.423084259033203, "logits_per_token_corr": -4.6846168518066404, "logits_per_char_corr": -0.8365387235369001, "bits_per_byte_corr": 1.2068702679591363}, "model_output": [{"sum_logits": -14.057840347290039, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.85848617553711, "logits_per_token": -3.5144600868225098, "logits_per_char": -0.5206607536033347, "bits_per_byte": 0.7511546872095812, "num_chars": 27}, {"sum_logits": -19.902786254882812, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.72796058654785, "logits_per_token": -4.975696563720703, "logits_per_char": -0.7371402316623263, "bits_per_byte": 1.063468556659744, "num_chars": 27}, {"sum_logits": -23.423084259033203, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -36.454673767089844, "logits_per_token": -4.6846168518066404, "logits_per_char": -0.8365387235369001, "bits_per_byte": 1.2068702679591363, "num_chars": 28}, {"sum_logits": -14.108294486999512, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -29.831430435180664, "logits_per_token": -2.821658897399902, "logits_per_char": -0.48649291334481076, "bits_per_byte": 0.7018609135106869, "num_chars": 29}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 90, "native_id": "Mercury_7084665", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.109559059143066, "logits_per_token_corr": -1.7773897647857666, "logits_per_char_corr": -0.3385504313877651, "bits_per_byte_corr": 0.48842502845429886}, "model_output": [{"sum_logits": -7.109559059143066, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -19.91975212097168, "logits_per_token": -1.7773897647857666, "logits_per_char": -0.3385504313877651, "bits_per_byte": 0.48842502845429886, "num_chars": 21}, {"sum_logits": -9.717817306518555, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -26.242740631103516, "logits_per_token": -1.943563461303711, "logits_per_char": -0.42251379593558935, "bits_per_byte": 0.6095585581038842, "num_chars": 23}, {"sum_logits": -7.720889091491699, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.30880355834961, "logits_per_token": -3.8604445457458496, "logits_per_char": -0.7720889091491699, "bits_per_byte": 1.113888840355677, "num_chars": 10}, {"sum_logits": -5.849308490753174, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -11.782609939575195, "logits_per_token": -2.924654245376587, "logits_per_char": -0.9748847484588623, "bits_per_byte": 1.4064613920408957, "num_chars": 6}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 91, "native_id": "FCAT_2008_5_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -38.26934051513672, "logits_per_token_corr": -4.252148946126302, "logits_per_char_corr": -0.6378223419189453, "bits_per_byte_corr": 0.9201831296553082}, "model_output": [{"sum_logits": -25.92779541015625, "num_tokens": 7, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -34.27661895751953, "logits_per_token": -3.7039707728794644, "logits_per_char": -0.7625822179457721, "bits_per_byte": 1.1001735841012625, "num_chars": 34}, {"sum_logits": -38.80394744873047, "num_tokens": 6, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -37.443946838378906, "logits_per_token": -6.467324574788411, "logits_per_char": -1.0211565118086965, "bits_per_byte": 1.4732174355589371, "num_chars": 38}, {"sum_logits": -45.36991882324219, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -43.07866668701172, "logits_per_token": -5.671239852905273, "logits_per_char": -0.9653174217711104, "bits_per_byte": 1.3926586572739013, "num_chars": 47}, {"sum_logits": -38.26934051513672, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -40.72058868408203, "logits_per_token": -4.252148946126302, "logits_per_char": -0.6378223419189453, "bits_per_byte": 0.9201831296553082, "num_chars": 60}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 92, "native_id": "Mercury_SC_414041", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.520122528076172, "logits_per_token_corr": -3.5040245056152344, "logits_per_char_corr": -0.6257186617170062, "bits_per_byte_corr": 0.9027212102514522}, "model_output": [{"sum_logits": -32.61888122558594, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -38.5236930847168, "logits_per_token": -3.624320136176215, "logits_per_char": -0.8583916111996299, "bits_per_byte": 1.2383973206192829, "num_chars": 38}, {"sum_logits": -33.96107482910156, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -43.237571716308594, "logits_per_token": -3.7734527587890625, "logits_per_char": -0.8283188982707698, "bits_per_byte": 1.1950115668107077, "num_chars": 41}, {"sum_logits": -16.067752838134766, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -31.513587951660156, "logits_per_token": -3.213550567626953, "logits_per_char": -0.5540604426943022, "bits_per_byte": 0.7993402530283877, "num_chars": 29}, {"sum_logits": -17.520122528076172, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -34.17620849609375, "logits_per_token": -3.5040245056152344, "logits_per_char": -0.6257186617170062, "bits_per_byte": 0.9027212102514522, "num_chars": 28}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 93, "native_id": "MCAS_2014_8_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.40737533569336, "logits_per_token_corr": -3.05092191696167, "logits_per_char_corr": -0.5547130758112128, "bits_per_byte_corr": 0.8002818035896752}, "model_output": [{"sum_logits": -24.40737533569336, "num_tokens": 8, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -42.839378356933594, "logits_per_token": -3.05092191696167, "logits_per_char": -0.5547130758112128, "bits_per_byte": 0.8002818035896752, "num_chars": 44}, {"sum_logits": -27.634843826293945, "num_tokens": 10, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -46.80588912963867, "logits_per_token": -2.7634843826293944, "logits_per_char": -0.5117563671535916, "bits_per_byte": 0.7383083730363686, "num_chars": 54}, {"sum_logits": -27.4632568359375, "num_tokens": 10, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -42.46247863769531, "logits_per_token": -2.74632568359375, "logits_per_char": -0.4818115234375, "bits_per_byte": 0.6951070955069372, "num_chars": 57}, {"sum_logits": -29.842594146728516, "num_tokens": 11, "num_tokens_all": 240, "is_greedy": false, "sum_logits_uncond": -51.91929244995117, "logits_per_token": -2.712963104248047, "logits_per_char": -0.47369197058299234, "bits_per_byte": 0.6833930568694948, "num_chars": 63}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 94, "native_id": "Mercury_SC_401116", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.5213623046875, "logits_per_token_corr": -1.8357069227430556, "logits_per_char_corr": -0.34419504801432294, "bits_per_byte_corr": 0.4965684888691591}, "model_output": [{"sum_logits": -17.030590057373047, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.41472625732422, "logits_per_token": -2.128823757171631, "logits_per_char": -0.3870588649402965, "bits_per_byte": 0.558407904981878, "num_chars": 44}, {"sum_logits": -16.5213623046875, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -45.8640022277832, "logits_per_token": -1.8357069227430556, "logits_per_char": -0.34419504801432294, "bits_per_byte": 0.4965684888691591, "num_chars": 48}, {"sum_logits": -25.45232391357422, "num_tokens": 12, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -44.99506378173828, "logits_per_token": -2.1210269927978516, "logits_per_char": -0.44653199848375824, "bits_per_byte": 0.644209499811219, "num_chars": 57}, {"sum_logits": -32.26094436645508, "num_tokens": 11, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -50.07695007324219, "logits_per_token": -2.932813124223189, "logits_per_char": -0.565981480113247, "bits_per_byte": 0.8165386745949635, "num_chars": 57}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 95, "native_id": "Mercury_7064680", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.976297378540039, "logits_per_token_corr": -2.4940743446350098, "logits_per_char_corr": -0.4534680626609109, "bits_per_byte_corr": 0.6542161252028919}, "model_output": [{"sum_logits": -12.15887451171875, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -22.12078857421875, "logits_per_token": -3.0397186279296875, "logits_per_char": -0.6754930284288194, "bits_per_byte": 0.9745304422700255, "num_chars": 18}, {"sum_logits": -7.199161529541016, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -20.394582748413086, "logits_per_token": -1.799790382385254, "logits_per_char": -0.3999534183078342, "bits_per_byte": 0.5770108131797161, "num_chars": 18}, {"sum_logits": -9.976297378540039, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -23.076400756835938, "logits_per_token": -2.4940743446350098, "logits_per_char": -0.4534680626609109, "bits_per_byte": 0.6542161252028919, "num_chars": 22}, {"sum_logits": -14.763834953308105, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -25.867630004882812, "logits_per_token": -3.6909587383270264, "logits_per_char": -0.641905867535135, "bits_per_byte": 0.9260744118111325, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 96, "native_id": "Mercury_7211680", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.695068359375, "logits_per_token_corr": -2.711883544921875, "logits_per_char_corr": -0.4615971991356383, "bits_per_byte_corr": 0.6659439900816991}, "model_output": [{"sum_logits": -28.36392593383789, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -43.58467102050781, "logits_per_token": -3.5454907417297363, "logits_per_char": -0.5561554104674096, "bits_per_byte": 0.8023626526454741, "num_chars": 51}, {"sum_logits": -31.042978286743164, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -50.625667572021484, "logits_per_token": -3.8803722858428955, "logits_per_char": -0.5857165714479843, "bits_per_byte": 0.8450103929951003, "num_chars": 53}, {"sum_logits": -21.695068359375, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -37.93955993652344, "logits_per_token": -2.711883544921875, "logits_per_char": -0.4615971991356383, "bits_per_byte": 0.6659439900816991, "num_chars": 47}, {"sum_logits": -20.292705535888672, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.757843017578125, "logits_per_token": -2.8989579336983815, "logits_per_char": -0.4509490119086372, "bits_per_byte": 0.6505819031748363, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 97, "native_id": "Mercury_180373", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.40270233154297, "logits_per_token_corr": -3.480540466308594, "logits_per_char_corr": -0.5438344478607178, "bits_per_byte_corr": 0.7845872609938087}, "model_output": [{"sum_logits": -21.608625411987305, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -35.51120376586914, "logits_per_token": -3.086946487426758, "logits_per_char": -0.8643450164794921, "bits_per_byte": 1.2469862688929485, "num_chars": 25}, {"sum_logits": -17.40270233154297, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -30.387243270874023, "logits_per_token": -3.480540466308594, "logits_per_char": -0.5438344478607178, "bits_per_byte": 0.7845872609938087, "num_chars": 32}, {"sum_logits": -20.091106414794922, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.18828582763672, "logits_per_token": -4.018221282958985, "logits_per_char": -0.5580862892998589, "bits_per_byte": 0.8051483219616082, "num_chars": 36}, {"sum_logits": -28.87568473815918, "num_tokens": 9, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -39.17220687866211, "logits_per_token": -3.20840941535102, "logits_per_char": -0.7218921184539795, "bits_per_byte": 1.0414701793511325, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 98, "native_id": "Mercury_7216248", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.216743469238281, "logits_per_token_corr": -1.1520929336547852, "logits_per_char_corr": -0.24254588076942846, "bits_per_byte_corr": 0.3499197393743516}, "model_output": [{"sum_logits": -11.15008544921875, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -30.397584915161133, "logits_per_token": -1.5928693498883928, "logits_per_char": -0.37166951497395834, "bits_per_byte": 0.5362057661029213, "num_chars": 30}, {"sum_logits": -9.216743469238281, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.307910919189453, "logits_per_token": -1.1520929336547852, "logits_per_char": -0.24254588076942846, "bits_per_byte": 0.3499197393743516, "num_chars": 38}, {"sum_logits": -10.594551086425781, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -29.236371994018555, "logits_per_token": -1.513507298060826, "logits_per_char": -0.31160444371840534, "bits_per_byte": 0.44955018567183047, "num_chars": 34}, {"sum_logits": -15.723684310913086, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -32.669105529785156, "logits_per_token": -1.7470760345458984, "logits_per_char": -0.46246130326214957, "bits_per_byte": 0.6671906288198295, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 99, "native_id": "Mercury_SC_417677", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.811914920806885, "logits_per_token_corr": -1.562382984161377, "logits_per_char_corr": -0.3124765968322754, "bits_per_byte_corr": 0.4508084366441076}, "model_output": [{"sum_logits": -21.8804988861084, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -32.31975173950195, "logits_per_token": -4.37609977722168, "logits_per_char": -0.9945681311867454, "bits_per_byte": 1.4348585106903524, "num_chars": 22}, {"sum_logits": -7.811914920806885, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -27.651599884033203, "logits_per_token": -1.562382984161377, "logits_per_char": -0.3124765968322754, "bits_per_byte": 0.4508084366441076, "num_chars": 25}, {"sum_logits": -23.996644973754883, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -36.612770080566406, "logits_per_token": -2.3996644973754884, "logits_per_char": -0.533258777194553, "bits_per_byte": 0.7693297933696469, "num_chars": 45}, {"sum_logits": -25.794517517089844, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -40.14368438720703, "logits_per_token": -2.5794517517089846, "logits_per_char": -0.5862390344793146, "bits_per_byte": 0.8457641478194488, "num_chars": 44}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 100, "native_id": "Mercury_7221655", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.232254981994629, "logits_per_token_corr": -1.5387091636657715, "logits_per_char_corr": -0.20982397686351428, "bits_per_byte_corr": 0.3027120108808101}, "model_output": [{"sum_logits": -22.99658203125, "num_tokens": 7, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -30.05013656616211, "logits_per_token": -3.2852260044642856, "logits_per_char": -0.5226495916193182, "bits_per_byte": 0.7540239739523741, "num_chars": 44}, {"sum_logits": -16.088966369628906, "num_tokens": 7, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -28.37798309326172, "logits_per_token": -2.2984237670898438, "logits_per_char": -0.392413813893388, "bits_per_byte": 0.5661334632807222, "num_chars": 41}, {"sum_logits": -9.232254981994629, "num_tokens": 6, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -24.683937072753906, "logits_per_token": -1.5387091636657715, "logits_per_char": -0.20982397686351428, "bits_per_byte": 0.3027120108808101, "num_chars": 44}, {"sum_logits": -20.840015411376953, "num_tokens": 7, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -28.199609756469727, "logits_per_token": -2.9771450587681363, "logits_per_char": -0.5632436597669447, "bits_per_byte": 0.8125888347585054, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 101, "native_id": "MCAS_2006_9_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.731806755065918, "logits_per_token_corr": -1.9664758443832397, "logits_per_char_corr": -0.41399491460699783, "bits_per_byte_corr": 0.5972684102571948}, "model_output": [{"sum_logits": -20.317668914794922, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -56.73561096191406, "logits_per_token": -2.257518768310547, "logits_per_char": -0.5491261868863493, "bits_per_byte": 0.7922216266437714, "num_chars": 37}, {"sum_logits": -25.191783905029297, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -42.22579574584961, "logits_per_token": -2.7990871005588107, "logits_per_char": -0.6459431770520333, "bits_per_byte": 0.9318990182296997, "num_chars": 39}, {"sum_logits": -22.209457397460938, "num_tokens": 8, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -55.31178283691406, "logits_per_token": -2.776182174682617, "logits_per_char": -0.6169293721516927, "bits_per_byte": 0.8900409457826284, "num_chars": 36}, {"sum_logits": -15.731806755065918, "num_tokens": 8, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -48.93328857421875, "logits_per_token": -1.9664758443832397, "logits_per_char": -0.41399491460699783, "bits_per_byte": 0.5972684102571948, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 102, "native_id": "MCAS_2004_9_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.802066802978516, "logits_per_token_corr": -2.560413360595703, "logits_per_char_corr": -0.5566116001295007, "bits_per_byte_corr": 0.8030207952086783}, "model_output": [{"sum_logits": -12.802066802978516, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -33.45425796508789, "logits_per_token": -2.560413360595703, "logits_per_char": -0.5566116001295007, "bits_per_byte": 0.8030207952086783, "num_chars": 23}, {"sum_logits": -17.591327667236328, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -38.21683120727539, "logits_per_token": -4.397831916809082, "logits_per_char": -0.5674621828140751, "bits_per_byte": 0.8186748770384807, "num_chars": 31}, {"sum_logits": -12.214815139770508, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -29.620742797851562, "logits_per_token": -3.053703784942627, "logits_per_char": -0.6786008410983615, "bits_per_byte": 0.9790140681963889, "num_chars": 18}, {"sum_logits": -12.851425170898438, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -26.45696449279785, "logits_per_token": -2.5702850341796877, "logits_per_char": -0.49428558349609375, "bits_per_byte": 0.7131033600932344, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 103, "native_id": "Mercury_180005", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.922550201416016, "logits_per_token_corr": -2.640850067138672, "logits_per_char_corr": -0.4401416778564453, "bits_per_byte_corr": 0.6349902159324975}, "model_output": [{"sum_logits": -8.382465362548828, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -29.67691421508789, "logits_per_token": -2.7941551208496094, "logits_per_char": -0.6985387802124023, "bits_per_byte": 1.0077784340817824, "num_chars": 12}, {"sum_logits": -7.922550201416016, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.181507110595703, "logits_per_token": -2.640850067138672, "logits_per_char": -0.4401416778564453, "bits_per_byte": 0.6349902159324975, "num_chars": 18}, {"sum_logits": -6.447012901306152, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -23.657756805419922, "logits_per_token": -2.1490043004353843, "logits_per_char": -0.3581673834058974, "bits_per_byte": 0.5167263078482354, "num_chars": 18}, {"sum_logits": -9.35993480682373, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.939817428588867, "logits_per_token": -2.3399837017059326, "logits_per_char": -0.4457111812773205, "bits_per_byte": 0.6430253108980141, "num_chars": 21}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 104, "native_id": "Mercury_7071523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.563880920410156, "logits_per_token_corr": -6.781940460205078, "logits_per_char_corr": -0.8477425575256348, "bits_per_byte_corr": 1.2230339836936388}, "model_output": [{"sum_logits": -9.11030101776123, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.70672607421875, "logits_per_token": -4.555150508880615, "logits_per_char": -0.7007923859816331, "bits_per_byte": 1.0110296999491728, "num_chars": 13}, {"sum_logits": -10.463735580444336, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -20.25806427001953, "logits_per_token": -5.231867790222168, "logits_per_char": -0.8049027369572566, "bits_per_byte": 1.1612291870070222, "num_chars": 13}, {"sum_logits": -13.563880920410156, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -23.147411346435547, "logits_per_token": -6.781940460205078, "logits_per_char": -0.8477425575256348, "bits_per_byte": 1.2230339836936388, "num_chars": 16}, {"sum_logits": -12.524696350097656, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -20.019393920898438, "logits_per_token": -6.262348175048828, "logits_per_char": -1.1386087590997869, "bits_per_byte": 1.642665210267179, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 105, "native_id": "Mercury_7263375", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.364524841308594, "logits_per_token_corr": -1.818280537923177, "logits_per_char_corr": -0.48130955415613513, "bits_per_byte_corr": 0.694382906914033}, "model_output": [{"sum_logits": -13.636953353881836, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -32.06000518798828, "logits_per_token": -1.515217039320204, "logits_per_char": -0.40108686334946575, "bits_per_byte": 0.5786460287203993, "num_chars": 34}, {"sum_logits": -16.364524841308594, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -33.32347869873047, "logits_per_token": -1.818280537923177, "logits_per_char": -0.48130955415613513, "bits_per_byte": 0.694382906914033, "num_chars": 34}, {"sum_logits": -23.78911590576172, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -33.17311096191406, "logits_per_token": -3.3984451293945312, "logits_per_char": -0.7673908356697329, "bits_per_byte": 1.1071109530451564, "num_chars": 31}, {"sum_logits": -25.365875244140625, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.13567352294922, "logits_per_token": -3.6236964634486606, "logits_per_char": -0.8182540401335685, "bits_per_byte": 1.1804910458889062, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 106, "native_id": "TIMSS_2011_8_pg102", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.299280643463135, "logits_per_token_corr": -3.6496403217315674, "logits_per_char_corr": -0.486618709564209, "bits_per_byte_corr": 0.7020423990925755}, "model_output": [{"sum_logits": -8.681037902832031, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -18.79083251953125, "logits_per_token": -4.340518951416016, "logits_per_char": -1.085129737854004, "bits_per_byte": 1.565511291524237, "num_chars": 8}, {"sum_logits": -9.407740592956543, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.890351295471191, "logits_per_token": -9.407740592956543, "logits_per_char": -1.1759675741195679, "bits_per_byte": 1.6965625874297439, "num_chars": 8}, {"sum_logits": -5.319215774536133, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -5.319215774536133, "logits_per_char": -0.4432679812113444, "bits_per_byte": 0.6395005182789283, "num_chars": 12}, {"sum_logits": -7.299280643463135, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -18.63492202758789, "logits_per_token": -3.6496403217315674, "logits_per_char": -0.486618709564209, "bits_per_byte": 0.7020423990925755, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 107, "native_id": "Mercury_406550", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.64529037475586, "logits_per_token_corr": -2.058662761341442, "logits_per_char_corr": -0.47177688280741376, "bits_per_byte_corr": 0.6806301692327985}, "model_output": [{"sum_logits": -19.802413940429688, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -47.92909622192383, "logits_per_token": -2.2002682156032987, "logits_per_char": -0.5211161563270971, "bits_per_byte": 0.7518116944607609, "num_chars": 38}, {"sum_logits": -22.64529037475586, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -56.04669952392578, "logits_per_token": -2.058662761341442, "logits_per_char": -0.47177688280741376, "bits_per_byte": 0.6806301692327985, "num_chars": 48}, {"sum_logits": -24.47541618347168, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -48.68883514404297, "logits_per_token": -2.447541618347168, "logits_per_char": -0.53207426485808, "bits_per_byte": 0.7676209032959443, "num_chars": 46}, {"sum_logits": -17.262401580810547, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -38.01789093017578, "logits_per_token": -2.1578001976013184, "logits_per_char": -0.4795111550225152, "bits_per_byte": 0.6917883654024186, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 108, "native_id": "Mercury_SC_400057", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.833643913269043, "logits_per_token_corr": -1.9667287826538087, "logits_per_char_corr": -0.3933457565307617, "bits_per_byte_corr": 0.5674779723020552}, "model_output": [{"sum_logits": -14.64948558807373, "num_tokens": 6, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -28.363628387451172, "logits_per_token": -2.4415809313456216, "logits_per_char": -0.6369341560032057, "bits_per_byte": 0.9189017482392824, "num_chars": 23}, {"sum_logits": -21.76531219482422, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.996559143066406, "logits_per_token": -4.353062438964844, "logits_per_char": -0.7773325783865792, "bits_per_byte": 1.121453855960555, "num_chars": 28}, {"sum_logits": -20.082759857177734, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -30.287246704101562, "logits_per_token": -4.016551971435547, "logits_per_char": -0.8033103942871094, "bits_per_byte": 1.1589319221334031, "num_chars": 25}, {"sum_logits": -9.833643913269043, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -25.811052322387695, "logits_per_token": -1.9667287826538087, "logits_per_char": -0.3933457565307617, "bits_per_byte": 0.5674779723020552, "num_chars": 25}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 109, "native_id": "TAKS_2009_5_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.298696517944336, "logits_per_token_corr": -10.298696517944336, "logits_per_char_corr": -1.7164494196573894, "bits_per_byte_corr": 2.476313065678234}, "model_output": [{"sum_logits": -10.298696517944336, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.608048439025879, "logits_per_token": -10.298696517944336, "logits_per_char": -1.7164494196573894, "bits_per_byte": 2.476313065678234, "num_chars": 6}, {"sum_logits": -14.282049179077148, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -21.983427047729492, "logits_per_token": -4.760683059692383, "logits_per_char": -0.8401205399457146, "bits_per_byte": 1.2120377367295114, "num_chars": 17}, {"sum_logits": -10.853440284729004, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -15.463203430175781, "logits_per_token": -3.6178134282430015, "logits_per_char": -0.7752457346235003, "bits_per_byte": 1.1184431768124488, "num_chars": 14}, {"sum_logits": -6.908055305480957, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.932027816772461, "logits_per_token": -3.4540276527404785, "logits_per_char": -0.9868650436401367, "bits_per_byte": 1.4237453044873185, "num_chars": 7}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 110, "native_id": "LEAP_2007_8_10417", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.499984741210938, "logits_per_token_corr": -1.7857121058872767, "logits_per_char_corr": -0.328946966873972, "bits_per_byte_corr": 0.4745701578248865}, "model_output": [{"sum_logits": -12.499984741210938, "num_tokens": 7, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -36.053123474121094, "logits_per_token": -1.7857121058872767, "logits_per_char": -0.328946966873972, "bits_per_byte": 0.4745701578248865, "num_chars": 38}, {"sum_logits": -15.272261619567871, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -40.308815002441406, "logits_per_token": -1.9090327024459839, "logits_per_char": -0.39159645178379154, "bits_per_byte": 0.564954259018596, "num_chars": 39}, {"sum_logits": -18.21239471435547, "num_tokens": 13, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -57.52995300292969, "logits_per_token": -1.4009534395658052, "logits_per_char": -0.2718267867814249, "bits_per_byte": 0.3921631572706251, "num_chars": 67}, {"sum_logits": -38.86272430419922, "num_tokens": 18, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -80.22429656982422, "logits_per_token": -2.159040239122179, "logits_per_char": -0.46265147981189547, "bits_per_byte": 0.6674649955850415, "num_chars": 84}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 111, "native_id": "Mercury_7027405", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.367500305175781, "logits_per_token_corr": -8.367500305175781, "logits_per_char_corr": -1.0459375381469727, "bits_per_byte_corr": 1.5089688993653325}, "model_output": [{"sum_logits": -8.367500305175781, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -13.302153587341309, "logits_per_token": -8.367500305175781, "logits_per_char": -1.0459375381469727, "bits_per_byte": 1.5089688993653325, "num_chars": 8}, {"sum_logits": -9.108332633972168, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.61407470703125, "logits_per_token": -9.108332633972168, "logits_per_char": -0.8280302394520153, "bits_per_byte": 1.1945951201643816, "num_chars": 11}, {"sum_logits": -8.61198616027832, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -21.551082611083984, "logits_per_token": -4.30599308013916, "logits_per_char": -0.7176655133565267, "bits_per_byte": 1.035372477137237, "num_chars": 12}, {"sum_logits": -9.428694725036621, "num_tokens": 3, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -19.280683517456055, "logits_per_token": -3.1428982416788735, "logits_per_char": -0.7252842096182016, "bits_per_byte": 1.0463639324520027, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 112, "native_id": "Mercury_7058415", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.686370849609375, "logits_per_token_corr": -3.2686370849609374, "logits_per_char_corr": -0.6167239782945165, "bits_per_byte_corr": 0.889744625083451}, "model_output": [{"sum_logits": -32.686370849609375, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -54.826759338378906, "logits_per_token": -3.2686370849609374, "logits_per_char": -0.6167239782945165, "bits_per_byte": 0.889744625083451, "num_chars": 53}, {"sum_logits": -32.26735305786133, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -55.250579833984375, "logits_per_token": -3.226735305786133, "logits_per_char": -0.5975435751455801, "bits_per_byte": 0.8620731525782094, "num_chars": 54}, {"sum_logits": -20.958648681640625, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -44.06840896606445, "logits_per_token": -2.0958648681640626, "logits_per_char": -0.4366385142008464, "bits_per_byte": 0.6299362190991389, "num_chars": 48}, {"sum_logits": -28.174243927001953, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.97882843017578, "logits_per_token": -3.1304715474446616, "logits_per_char": -0.6708153315952846, "bits_per_byte": 0.967781952245498, "num_chars": 42}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 113, "native_id": "Mercury_7215828", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.756679534912109, "logits_per_token_corr": -3.8783397674560547, "logits_per_char_corr": -0.7051526849920099, "bits_per_byte_corr": 1.017320281708241}, "model_output": [{"sum_logits": -7.756679534912109, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.663679122924805, "logits_per_token": -3.8783397674560547, "logits_per_char": -0.7051526849920099, "bits_per_byte": 1.017320281708241, "num_chars": 11}, {"sum_logits": -12.616573333740234, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.11147117614746, "logits_per_token": -4.205524444580078, "logits_per_char": -0.7421513725729549, "bits_per_byte": 1.0706981048007087, "num_chars": 17}, {"sum_logits": -4.852203845977783, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -2.4261019229888916, "logits_per_char": -0.3234802563985189, "bits_per_byte": 0.4666833617319689, "num_chars": 15}, {"sum_logits": -12.231317520141602, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.24773597717285, "logits_per_token": -6.115658760070801, "logits_per_char": -1.2231317520141602, "bits_per_byte": 1.764606112985926, "num_chars": 10}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 114, "native_id": "Mercury_7064575", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.656973838806152, "logits_per_token_corr": -1.6094956398010254, "logits_per_char_corr": -0.25413089049489873, "bits_per_byte_corr": 0.36663337545395003}, "model_output": [{"sum_logits": -13.855615615844727, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -34.28153991699219, "logits_per_token": -2.3092692693074546, "logits_per_char": -0.4075181063483743, "bits_per_byte": 0.5879243511016832, "num_chars": 34}, {"sum_logits": -9.656973838806152, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.918582916259766, "logits_per_token": -1.6094956398010254, "logits_per_char": -0.25413089049489873, "bits_per_byte": 0.36663337545395003, "num_chars": 38}, {"sum_logits": -19.21814727783203, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -34.708194732666016, "logits_per_token": -3.2030245463053384, "logits_per_char": -0.5194093858873522, "bits_per_byte": 0.7493493452114033, "num_chars": 37}, {"sum_logits": -12.632675170898438, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.257875442504883, "logits_per_token": -2.1054458618164062, "logits_per_char": -0.3509076436360677, "bits_per_byte": 0.5062527172841502, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 115, "native_id": "Mercury_7097493", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.307654857635498, "logits_per_token_corr": -1.217942476272583, "logits_per_char_corr": -0.19750418534149994, "bits_per_byte_corr": 0.2849383087472014}, "model_output": [{"sum_logits": -7.206527233123779, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -27.85989761352539, "logits_per_token": -1.2010878721872966, "logits_per_char": -0.28826108932495115, "bits_per_byte": 0.41587284405065633, "num_chars": 25}, {"sum_logits": -17.56818962097168, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.58967590332031, "logits_per_token": -2.92803160349528, "logits_per_char": -0.439204740524292, "bits_per_byte": 0.6336385010897753, "num_chars": 40}, {"sum_logits": -13.734686851501465, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -33.14505386352539, "logits_per_token": -1.9620981216430664, "logits_per_char": -0.39241962432861327, "bits_per_byte": 0.5661418459668072, "num_chars": 35}, {"sum_logits": -7.307654857635498, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.128019332885742, "logits_per_token": -1.217942476272583, "logits_per_char": -0.19750418534149994, "bits_per_byte": 0.2849383087472014, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 116, "native_id": "AKDE&ED_2008_8_47", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.517337799072266, "logits_per_token_corr": -2.752889633178711, "logits_per_char_corr": -0.44641453511006124, "bits_per_byte_corr": 0.6440400359845001}, "model_output": [{"sum_logits": -11.650985717773438, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.99180793762207, "logits_per_token": -2.3301971435546873, "logits_per_char": -0.43151798954716436, "bits_per_byte": 0.6225488635745169, "num_chars": 27}, {"sum_logits": -16.517337799072266, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.764671325683594, "logits_per_token": -2.752889633178711, "logits_per_char": -0.44641453511006124, "bits_per_byte": 0.6440400359845001, "num_chars": 37}, {"sum_logits": -16.337018966674805, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -36.4710578918457, "logits_per_token": -3.267403793334961, "logits_per_char": -0.4299215517546001, "bits_per_byte": 0.620245690688095, "num_chars": 38}, {"sum_logits": -19.988096237182617, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -39.39408874511719, "logits_per_token": -3.3313493728637695, "logits_per_char": -0.4875145423703077, "bits_per_byte": 0.7033348126394007, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 117, "native_id": "Mercury_405136", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.615324020385742, "logits_per_token_corr": -1.0615324020385741, "logits_per_char_corr": -0.26538310050964353, "bits_per_byte_corr": 0.38286688304127514}, "model_output": [{"sum_logits": -17.36595916748047, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -25.97651481628418, "logits_per_token": -2.480851309640067, "logits_per_char": -0.6202128274100167, "bits_per_byte": 0.8947779704007965, "num_chars": 28}, {"sum_logits": -18.129730224609375, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -25.58055305480957, "logits_per_token": -2.589961460658482, "logits_per_char": -0.6474903651646206, "bits_per_byte": 0.9341311388470532, "num_chars": 28}, {"sum_logits": -10.615324020385742, "num_tokens": 10, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.668720245361328, "logits_per_token": -1.0615324020385741, "logits_per_char": -0.26538310050964353, "bits_per_byte": 0.38286688304127514, "num_chars": 40}, {"sum_logits": -19.510459899902344, "num_tokens": 8, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -29.20827865600586, "logits_per_token": -2.438807487487793, "logits_per_char": -0.6727744793069774, "bits_per_byte": 0.9706084049335282, "num_chars": 29}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 118, "native_id": "Mercury_415086", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.475772857666016, "logits_per_token_corr": -1.868943214416504, "logits_per_char_corr": -0.8306414286295573, "bits_per_byte_corr": 1.198362269841647}, "model_output": [{"sum_logits": -8.886816024780273, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -22.23722267150879, "logits_per_token": -2.2217040061950684, "logits_per_char": -1.1108520030975342, "bits_per_byte": 1.6026206760315354, "num_chars": 8}, {"sum_logits": -8.002839088439941, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -22.30037498474121, "logits_per_token": -2.0007097721099854, "logits_per_char": -0.8892043431599935, "bits_per_byte": 1.2828506962147723, "num_chars": 9}, {"sum_logits": -6.351197719573975, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -19.669963836669922, "logits_per_token": -1.5877994298934937, "logits_per_char": -0.7056886355082194, "bits_per_byte": 1.0180934948601388, "num_chars": 9}, {"sum_logits": -7.475772857666016, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -21.503400802612305, "logits_per_token": -1.868943214416504, "logits_per_char": -0.8306414286295573, "bits_per_byte": 1.198362269841647, "num_chars": 9}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 119, "native_id": "Mercury_7228725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.430866241455078, "logits_per_token_corr": -3.9288582801818848, "logits_per_char_corr": -0.6162914949304917, "bits_per_byte_corr": 0.8891206834789049}, "model_output": [{"sum_logits": -22.024112701416016, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -36.237884521484375, "logits_per_token": -3.6706854502360025, "logits_per_char": -0.5121886674747911, "bits_per_byte": 0.7389320505659382, "num_chars": 43}, {"sum_logits": -31.116682052612305, "num_tokens": 8, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -44.91240692138672, "logits_per_token": -3.889585256576538, "logits_per_char": -0.5556550366537911, "bits_per_byte": 0.8016407658259755, "num_chars": 56}, {"sum_logits": -31.430866241455078, "num_tokens": 8, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -42.488609313964844, "logits_per_token": -3.9288582801818848, "logits_per_char": -0.6162914949304917, "bits_per_byte": 0.8891206834789049, "num_chars": 51}, {"sum_logits": -29.265207290649414, "num_tokens": 10, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -36.28541564941406, "logits_per_token": -2.9265207290649413, "logits_per_char": -0.5853041458129883, "bits_per_byte": 0.8444153885767556, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 120, "native_id": "Mercury_7201740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.431840896606445, "logits_per_token_corr": -2.270204544067383, "logits_per_char_corr": -0.43472001907673286, "bits_per_byte_corr": 0.6271684156976086}, "model_output": [{"sum_logits": -20.431840896606445, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -42.000152587890625, "logits_per_token": -2.270204544067383, "logits_per_char": -0.43472001907673286, "bits_per_byte": 0.6271684156976086, "num_chars": 47}, {"sum_logits": -18.586994171142578, "num_tokens": 5, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -31.36306381225586, "logits_per_token": -3.717398834228516, "logits_per_char": -0.8448633714155718, "bits_per_byte": 1.2188801961708515, "num_chars": 22}, {"sum_logits": -38.282135009765625, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -47.19081115722656, "logits_per_token": -5.468876429966518, "logits_per_char": -0.9570533752441406, "bits_per_byte": 1.380736158331758, "num_chars": 40}, {"sum_logits": -18.107078552246094, "num_tokens": 5, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -24.614898681640625, "logits_per_token": -3.621415710449219, "logits_per_char": -0.6706325389720775, "bits_per_byte": 0.9675182382344858, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 121, "native_id": "NYSEDREGENTS_2010_4_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.420076847076416, "logits_per_token_corr": -6.420076847076416, "logits_per_char_corr": -0.9171538352966309, "bits_per_byte_corr": 1.3231732899156932}, "model_output": [{"sum_logits": -6.420076847076416, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -15.093934059143066, "logits_per_token": -6.420076847076416, "logits_per_char": -0.9171538352966309, "bits_per_byte": 1.3231732899156932, "num_chars": 7}, {"sum_logits": -5.901927471160889, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -13.745198249816895, "logits_per_token": -5.901927471160889, "logits_per_char": -0.843132495880127, "bits_per_byte": 1.2163830706194674, "num_chars": 7}, {"sum_logits": -10.894593238830566, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -19.51186752319336, "logits_per_token": -2.7236483097076416, "logits_per_char": -0.6052551799350314, "bits_per_byte": 0.8731986465652544, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 122, "native_id": "MEAP_2005_8_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.280563354492188, "logits_per_token_corr": -3.1829376220703125, "logits_per_char_corr": -0.6021773879592484, "bits_per_byte_corr": 0.8687583313449011}, "model_output": [{"sum_logits": -19.504337310791016, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -30.304012298583984, "logits_per_token": -4.876084327697754, "logits_per_char": -0.8865607868541371, "bits_per_byte": 1.2790368506419998, "num_chars": 22}, {"sum_logits": -16.567289352416992, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -32.34395980834961, "logits_per_token": -2.7612148920694985, "logits_per_char": -0.5344286887876449, "bits_per_byte": 0.7710176190232805, "num_chars": 31}, {"sum_logits": -15.48009967803955, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -28.740583419799805, "logits_per_token": -3.8700249195098877, "logits_per_char": -0.46909392963756213, "bits_per_byte": 0.6767594859997135, "num_chars": 33}, {"sum_logits": -22.280563354492188, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -40.19301223754883, "logits_per_token": -3.1829376220703125, "logits_per_char": -0.6021773879592484, "bits_per_byte": 0.8687583313449011, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 123, "native_id": "Mercury_7026355", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.133880615234375, "logits_per_token_corr": -2.348208957248264, "logits_per_char_corr": -0.48031546852805396, "bits_per_byte_corr": 0.6929487445081802}, "model_output": [{"sum_logits": -21.133880615234375, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -39.32264709472656, "logits_per_token": -2.348208957248264, "logits_per_char": -0.48031546852805396, "bits_per_byte": 0.6929487445081802, "num_chars": 44}, {"sum_logits": -28.822216033935547, "num_tokens": 13, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -42.88315200805664, "logits_per_token": -2.217093541071965, "logits_per_char": -0.5651414908614814, "bits_per_byte": 0.8153268262670403, "num_chars": 51}, {"sum_logits": -30.64021873474121, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -39.358219146728516, "logits_per_token": -2.785474430431019, "logits_per_char": -0.5193257412668002, "bits_per_byte": 0.7492286715321358, "num_chars": 59}, {"sum_logits": -29.11090087890625, "num_tokens": 15, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -50.346519470214844, "logits_per_token": -1.9407267252604166, "logits_per_char": -0.45485782623291016, "bits_per_byte": 0.6562211302162247, "num_chars": 64}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 124, "native_id": "Mercury_7249708", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -38.56645202636719, "logits_per_token_corr": -3.2138710021972656, "logits_per_char_corr": -0.6536686784130031, "bits_per_byte_corr": 0.9430445607315597}, "model_output": [{"sum_logits": -26.261484146118164, "num_tokens": 11, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -38.74967956542969, "logits_per_token": -2.387407649647106, "logits_per_char": -0.4235723249373897, "bits_per_byte": 0.6110856926454198, "num_chars": 62}, {"sum_logits": -38.56645202636719, "num_tokens": 12, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -51.88306427001953, "logits_per_token": -3.2138710021972656, "logits_per_char": -0.6536686784130031, "bits_per_byte": 0.9430445607315597, "num_chars": 59}, {"sum_logits": -35.139671325683594, "num_tokens": 11, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.85502624511719, "logits_per_token": -3.194515575062145, "logits_per_char": -0.5760601856669442, "bits_per_byte": 0.831079173115873, "num_chars": 61}, {"sum_logits": -31.02627944946289, "num_tokens": 11, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -44.60081481933594, "logits_per_token": -2.820570859042081, "logits_per_char": -0.5443206920958402, "bits_per_byte": 0.7852887631404812, "num_chars": 57}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 125, "native_id": "Mercury_7107170", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.517473220825195, "logits_per_token_corr": -1.6453533172607422, "logits_per_char_corr": -0.3290706634521484, "bits_per_byte_corr": 0.47474861426479664}, "model_output": [{"sum_logits": -18.075061798095703, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -28.660306930541992, "logits_per_token": -4.518765449523926, "logits_per_char": -0.7531275749206543, "bits_per_byte": 1.0865334174955399, "num_chars": 24}, {"sum_logits": -18.380691528320312, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -41.45112609863281, "logits_per_token": -3.0634485880533853, "logits_per_char": -0.5743966102600098, "bits_per_byte": 0.8286791411261422, "num_chars": 32}, {"sum_logits": -14.655567169189453, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.45389938354492, "logits_per_token": -2.0936524527413503, "logits_per_char": -0.444108096036044, "bits_per_byte": 0.6407125477703005, "num_chars": 33}, {"sum_logits": -11.517473220825195, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.434904098510742, "logits_per_token": -1.6453533172607422, "logits_per_char": -0.3290706634521484, "bits_per_byte": 0.47474861426479664, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 126, "native_id": "Mercury_183820", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.1776740550994873, "logits_per_token_corr": -3.1776740550994873, "logits_per_char_corr": -0.6355348110198975, "bits_per_byte_corr": 0.9168829201713694}, "model_output": [{"sum_logits": -3.1776740550994873, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.902543067932129, "logits_per_token": -3.1776740550994873, "logits_per_char": -0.6355348110198975, "bits_per_byte": 0.9168829201713694, "num_chars": 5}, {"sum_logits": -2.909374952316284, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.585593223571777, "logits_per_token": -2.909374952316284, "logits_per_char": -0.4156249931880406, "bits_per_byte": 0.5996201165423262, "num_chars": 7}, {"sum_logits": -3.717421293258667, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -13.882975578308105, "logits_per_token": -3.717421293258667, "logits_per_char": -0.41304681036207413, "bits_per_byte": 0.5959005849647966, "num_chars": 9}, {"sum_logits": -2.022137403488159, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -13.386828422546387, "logits_per_token": -2.022137403488159, "logits_per_char": -0.22468193372090658, "bits_per_byte": 0.32414751155672755, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 127, "native_id": "Mercury_SC_401357", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -0.4997285008430481, "logits_per_token_corr": -0.4997285008430481, "logits_per_char_corr": -0.04164404173692068, "bits_per_byte_corr": 0.06007965249647164}, "model_output": [{"sum_logits": -2.9981110095977783, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -2.9981110095977783, "logits_per_char": -0.2306239238152137, "bits_per_byte": 0.33271999119880197, "num_chars": 13}, {"sum_logits": -0.4997285008430481, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": true, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -0.4997285008430481, "logits_per_char": -0.04164404173692068, "bits_per_byte": 0.06007965249647164, "num_chars": 12}, {"sum_logits": -3.5817158222198486, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.91162395477295, "logits_per_token": -3.5817158222198486, "logits_per_char": -0.2558368444442749, "bits_per_byte": 0.3690945467567017, "num_chars": 14}, {"sum_logits": -3.7611846923828125, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.411052703857422, "logits_per_token": -1.8805923461914062, "logits_per_char": -0.2686560494559152, "bits_per_byte": 0.38758875025514744, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 128, "native_id": "NYSEDREGENTS_2008_8_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.423233032226562, "logits_per_token_corr": -4.211616516113281, "logits_per_char_corr": -0.5615488688151041, "bits_per_byte_corr": 0.8101437682569398}, "model_output": [{"sum_logits": -5.052539825439453, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -5.052539825439453, "logits_per_char": -0.561393313937717, "bits_per_byte": 0.8099193500067471, "num_chars": 9}, {"sum_logits": -6.90445613861084, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -6.90445613861084, "logits_per_char": -0.9863508769444057, "bits_per_byte": 1.4230035187451966, "num_chars": 7}, {"sum_logits": -4.49707555770874, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -2.24853777885437, "logits_per_char": -0.29980503718058266, "bits_per_byte": 0.43252724037426865, "num_chars": 15}, {"sum_logits": -8.423233032226562, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -22.741100311279297, "logits_per_token": -4.211616516113281, "logits_per_char": -0.5615488688151041, "bits_per_byte": 0.8101437682569398, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 129, "native_id": "Mercury_416650", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.113313674926758, "logits_per_token_corr": -2.6226627349853517, "logits_per_char_corr": -0.5043582182664138, "bits_per_byte_corr": 0.7276351003250714}, "model_output": [{"sum_logits": -6.453009605407715, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.693988800048828, "logits_per_token": -2.1510032018025718, "logits_per_char": -0.35850053363376194, "bits_per_byte": 0.517206942029847, "num_chars": 18}, {"sum_logits": -9.754945755004883, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.468576431274414, "logits_per_token": -3.2516485850016275, "logits_per_char": -0.5134181976318359, "bits_per_byte": 0.7407058876261315, "num_chars": 19}, {"sum_logits": -13.113313674926758, "num_tokens": 5, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -31.288532257080078, "logits_per_token": -2.6226627349853517, "logits_per_char": -0.5043582182664138, "bits_per_byte": 0.7276351003250714, "num_chars": 26}, {"sum_logits": -13.874471664428711, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -29.663785934448242, "logits_per_token": -3.4686179161071777, "logits_per_char": -0.5549788665771485, "bits_per_byte": 0.800665258609605, "num_chars": 25}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 130, "native_id": "NCEOGA_2013_5_20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.450906753540039, "logits_per_token_corr": -5.2254533767700195, "logits_per_char_corr": -0.9500824321400035, "bits_per_byte_corr": 1.370679213285093}, "model_output": [{"sum_logits": -7.8809404373168945, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.373722076416016, "logits_per_token": -2.6269801457722983, "logits_per_char": -0.8756600485907661, "bits_per_byte": 1.2633104096073946, "num_chars": 9}, {"sum_logits": -10.450906753540039, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.21230125427246, "logits_per_token": -5.2254533767700195, "logits_per_char": -0.9500824321400035, "bits_per_byte": 1.370679213285093, "num_chars": 11}, {"sum_logits": -3.7919580936431885, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.212127685546875, "logits_per_token": -1.8959790468215942, "logits_per_char": -0.37919580936431885, "bits_per_byte": 0.5470639136961726, "num_chars": 10}, {"sum_logits": -7.316878795623779, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.046520233154297, "logits_per_token": -3.6584393978118896, "logits_per_char": -0.9146098494529724, "bits_per_byte": 1.3195030941549526, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 131, "native_id": "Mercury_400500", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.853588104248047, "logits_per_token_corr": -3.7707176208496094, "logits_per_char_corr": -0.7541435241699219, "bits_per_byte_corr": 1.0879991224392542}, "model_output": [{"sum_logits": -17.315645217895508, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -27.077219009399414, "logits_per_token": -3.4631290435791016, "logits_per_char": -0.8245545341855004, "bits_per_byte": 1.1895807374127854, "num_chars": 21}, {"sum_logits": -19.415142059326172, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -30.750003814697266, "logits_per_token": -3.8830284118652343, "logits_per_char": -0.9245305742536273, "bits_per_byte": 1.3338156746268919, "num_chars": 21}, {"sum_logits": -18.853588104248047, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -26.163293838500977, "logits_per_token": -3.7707176208496094, "logits_per_char": -0.7541435241699219, "bits_per_byte": 1.0879991224392542, "num_chars": 25}, {"sum_logits": -14.583227157592773, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -25.16657066345215, "logits_per_token": -2.9166454315185546, "logits_per_char": -0.5028699019859577, "bits_per_byte": 0.7254879138079815, "num_chars": 29}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 132, "native_id": "Mercury_SC_401366", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.306501388549805, "logits_per_token_corr": -2.051083564758301, "logits_per_char_corr": -0.3326081456364812, "bits_per_byte_corr": 0.4798521222693703}, "model_output": [{"sum_logits": -14.302835464477539, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -26.50203514099121, "logits_per_token": -2.860567092895508, "logits_per_char": -0.8413432626163259, "bits_per_byte": 1.2138017526627862, "num_chars": 17}, {"sum_logits": -14.728632926940918, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -27.82333755493164, "logits_per_token": -2.454772154490153, "logits_per_char": -0.6403753446496051, "bits_per_byte": 0.9238663340342098, "num_chars": 23}, {"sum_logits": -15.397660255432129, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.704261779785156, "logits_per_token": -2.566276709238688, "logits_per_char": -0.5702837131641529, "bits_per_byte": 0.8227454848822586, "num_chars": 27}, {"sum_logits": -12.306501388549805, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -29.27129364013672, "logits_per_token": -2.051083564758301, "logits_per_char": -0.3326081456364812, "bits_per_byte": 0.4798521222693703, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 133, "native_id": "Mercury_7141610", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.5295515060424805, "logits_per_token_corr": -4.5295515060424805, "logits_per_char_corr": -0.6470787865774972, "bits_per_byte_corr": 0.9335373564604738}, "model_output": [{"sum_logits": -3.752345085144043, "num_tokens": 1, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -12.437054634094238, "logits_per_token": -3.752345085144043, "logits_per_char": -0.7504690170288086, "bits_per_byte": 1.082697929209055, "num_chars": 5}, {"sum_logits": -4.5295515060424805, "num_tokens": 1, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -11.144539833068848, "logits_per_token": -4.5295515060424805, "logits_per_char": -0.6470787865774972, "bits_per_byte": 0.9335373564604738, "num_chars": 7}, {"sum_logits": -9.297184944152832, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -21.06118392944336, "logits_per_token": -4.648592472076416, "logits_per_char": -0.7151680726271409, "bits_per_byte": 1.0317694317820354, "num_chars": 13}, {"sum_logits": -9.427695274353027, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -15.562935829162598, "logits_per_token": -4.713847637176514, "logits_per_char": -0.5545703102560604, "bits_per_byte": 0.800075836431247, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 134, "native_id": "Mercury_7247013", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -36.82484436035156, "logits_per_token_corr": -3.3477131236683237, "logits_per_char_corr": -0.7835073268159907, "bits_per_byte_corr": 1.13036213489841}, "model_output": [{"sum_logits": -25.93783187866211, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -34.18856430053711, "logits_per_token": -3.2422289848327637, "logits_per_char": -0.6484457969665527, "bits_per_byte": 0.9355095355696093, "num_chars": 40}, {"sum_logits": -36.82484436035156, "num_tokens": 11, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -45.752567291259766, "logits_per_token": -3.3477131236683237, "logits_per_char": -0.7835073268159907, "bits_per_byte": 1.13036213489841, "num_chars": 47}, {"sum_logits": -33.552162170410156, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -37.942726135253906, "logits_per_token": -3.7280180189344616, "logits_per_char": -0.6213363364890769, "bits_per_byte": 0.8963988513775516, "num_chars": 54}, {"sum_logits": -45.72126007080078, "num_tokens": 13, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -53.71831512451172, "logits_per_token": -3.5170200054462137, "logits_per_char": -0.7374396785613029, "bits_per_byte": 1.0639005672159072, "num_chars": 62}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 135, "native_id": "NYSEDREGENTS_2008_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.142749786376953, "logits_per_token_corr": -1.357124964396159, "logits_per_char_corr": -0.2544609308242798, "bits_per_byte_corr": 0.36710952300044175}, "model_output": [{"sum_logits": -12.369768142700195, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -36.382164001464844, "logits_per_token": -2.0616280237833657, "logits_per_char": -0.3748414588697029, "bits_per_byte": 0.5407819138312933, "num_chars": 33}, {"sum_logits": -8.142749786376953, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -36.805625915527344, "logits_per_token": -1.357124964396159, "logits_per_char": -0.2544609308242798, "bits_per_byte": 0.36710952300044175, "num_chars": 32}, {"sum_logits": -15.119775772094727, "num_tokens": 5, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -26.94671630859375, "logits_per_token": -3.023955154418945, "logits_per_char": -0.5399919918605259, "bits_per_byte": 0.7790437687774939, "num_chars": 28}, {"sum_logits": -15.687774658203125, "num_tokens": 4, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -25.908836364746094, "logits_per_token": -3.9219436645507812, "logits_per_char": -0.6033759483924279, "bits_per_byte": 0.8704874885380562, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 136, "native_id": "ACTAAP_2011_5_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.024923324584961, "logits_per_token_corr": -2.4049846649169924, "logits_per_char_corr": -0.5010384718577067, "bits_per_byte_corr": 0.7228457186442172}, "model_output": [{"sum_logits": -22.58969497680664, "num_tokens": 4, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -28.796770095825195, "logits_per_token": -5.64742374420166, "logits_per_char": -0.9821606511655061, "bits_per_byte": 1.4169583007937687, "num_chars": 23}, {"sum_logits": -12.024923324584961, "num_tokens": 5, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -27.29891014099121, "logits_per_token": -2.4049846649169924, "logits_per_char": -0.5010384718577067, "bits_per_byte": 0.7228457186442172, "num_chars": 24}, {"sum_logits": -18.01012420654297, "num_tokens": 5, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -29.23672866821289, "logits_per_token": -3.6020248413085936, "logits_per_char": -0.6926970848670373, "bits_per_byte": 0.9993506491766341, "num_chars": 26}, {"sum_logits": -18.50204086303711, "num_tokens": 4, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -27.821285247802734, "logits_per_token": -4.625510215759277, "logits_per_char": -0.711616956270658, "bits_per_byte": 1.0266462538249141, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 137, "native_id": "Mercury_7093153", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.05191421508789, "logits_per_token_corr": -2.1051914215087892, "logits_per_char_corr": -0.5262978553771973, "bits_per_byte_corr": 0.759287305983725}, "model_output": [{"sum_logits": -15.250947952270508, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -29.679603576660156, "logits_per_token": -1.694549772474501, "logits_per_char": -0.401340735586066, "bits_per_byte": 0.5790122889371621, "num_chars": 38}, {"sum_logits": -21.05191421508789, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -41.375694274902344, "logits_per_token": -2.1051914215087892, "logits_per_char": -0.5262978553771973, "bits_per_byte": 0.759287305983725, "num_chars": 40}, {"sum_logits": -17.648298263549805, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -33.646602630615234, "logits_per_token": -1.6043907512318005, "logits_per_char": -0.4644289016723633, "bits_per_byte": 0.6700292732887079, "num_chars": 38}, {"sum_logits": -17.406423568725586, "num_tokens": 12, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -33.399166107177734, "logits_per_token": -1.4505352973937988, "logits_per_char": -0.43516058921813966, "bits_per_byte": 0.6278040240557804, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 138, "native_id": "Mercury_7013965", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.792877197265625, "logits_per_token_corr": -2.965479532877604, "logits_per_char_corr": -0.49424658881293404, "bits_per_byte_corr": 0.7130471026572188}, "model_output": [{"sum_logits": -17.792877197265625, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -34.912803649902344, "logits_per_token": -2.965479532877604, "logits_per_char": -0.49424658881293404, "bits_per_byte": 0.7130471026572188, "num_chars": 36}, {"sum_logits": -21.146778106689453, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -34.563514709472656, "logits_per_token": -2.6433472633361816, "logits_per_char": -0.5034947168259394, "bits_per_byte": 0.7263893310790975, "num_chars": 42}, {"sum_logits": -24.19256591796875, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -31.16509437561035, "logits_per_token": -3.456080845424107, "logits_per_char": -0.5760134742373512, "bits_per_byte": 0.8310117827680462, "num_chars": 42}, {"sum_logits": -19.251535415649414, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.6449089050293, "logits_per_token": -2.4064419269561768, "logits_per_char": -0.40960713650317904, "bits_per_byte": 0.5909381845462897, "num_chars": 47}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 139, "native_id": "Mercury_7034843", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.57740306854248, "logits_per_token_corr": -3.19246768951416, "logits_per_char_corr": -0.598587691783905, "bits_per_byte_corr": 0.8635794944744315}, "model_output": [{"sum_logits": -5.979696273803711, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -19.96857452392578, "logits_per_token": -1.9932320912679036, "logits_per_char": -0.3322053485446506, "bits_per_byte": 0.4792710089025014, "num_chars": 18}, {"sum_logits": -9.57740306854248, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -18.518112182617188, "logits_per_token": -3.19246768951416, "logits_per_char": -0.598587691783905, "bits_per_byte": 0.8635794944744315, "num_chars": 16}, {"sum_logits": -5.2831926345825195, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -18.65208625793457, "logits_per_token": -2.6415963172912598, "logits_per_char": -0.44026605288187665, "bits_per_byte": 0.6351696511648979, "num_chars": 12}, {"sum_logits": -8.296198844909668, "num_tokens": 1, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -12.482002258300781, "logits_per_token": -8.296198844909668, "logits_per_char": -0.6913499037424723, "bits_per_byte": 0.9974070776490436, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 140, "native_id": "Mercury_SC_407610", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.222085952758789, "logits_per_token_corr": -1.5222085952758788, "logits_per_char_corr": -0.3459564989263361, "bits_per_byte_corr": 0.49910972536469167}, "model_output": [{"sum_logits": -17.91948699951172, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -35.748226165771484, "logits_per_token": -2.5599267142159596, "logits_per_char": -0.5780479677261845, "bits_per_byte": 0.8339469364351091, "num_chars": 31}, {"sum_logits": -18.22955322265625, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -31.897462844848633, "logits_per_token": -2.2786941528320312, "logits_per_char": -0.4797250848067434, "bits_per_byte": 0.6920970008412234, "num_chars": 38}, {"sum_logits": -24.438283920288086, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -38.013675689697266, "logits_per_token": -2.7153648800320096, "logits_per_char": -0.6266226646227714, "bits_per_byte": 0.9040254107605499, "num_chars": 39}, {"sum_logits": -15.222085952758789, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -40.19147491455078, "logits_per_token": -1.5222085952758788, "logits_per_char": -0.3459564989263361, "bits_per_byte": 0.49910972536469167, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 141, "native_id": "Mercury_405947", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.356441497802734, "logits_per_token_corr": -2.1937773568289622, "logits_per_char_corr": -0.4150389594000739, "bits_per_byte_corr": 0.5987746485026326}, "model_output": [{"sum_logits": -17.475128173828125, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.96393585205078, "logits_per_token": -2.9125213623046875, "logits_per_char": -0.5825042724609375, "bits_per_byte": 0.8403760251766319, "num_chars": 30}, {"sum_logits": -14.798230171203613, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.249488830566406, "logits_per_token": -2.114032881600516, "logits_per_char": -0.38942710976851613, "bits_per_byte": 0.5618245600511639, "num_chars": 38}, {"sum_logits": -15.356441497802734, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -30.618370056152344, "logits_per_token": -2.1937773568289622, "logits_per_char": -0.4150389594000739, "bits_per_byte": 0.5987746485026326, "num_chars": 37}, {"sum_logits": -24.286439895629883, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -34.74744415283203, "logits_per_token": -3.0358049869537354, "logits_per_char": -0.5519645430824973, "bits_per_byte": 0.7963165090522336, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 142, "native_id": "AKDE&ED_2012_8_6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.3452301025390625, "logits_per_token_corr": -1.8363075256347656, "logits_per_char_corr": -0.30605125427246094, "bits_per_byte_corr": 0.4415386267970438}, "model_output": [{"sum_logits": -7.3452301025390625, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.7098388671875, "logits_per_token": -1.8363075256347656, "logits_per_char": -0.30605125427246094, "bits_per_byte": 0.4415386267970438, "num_chars": 24}, {"sum_logits": -12.030418395996094, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -32.44385528564453, "logits_per_token": -2.406083679199219, "logits_per_char": -0.5012674331665039, "bits_per_byte": 0.7231760399889746, "num_chars": 24}, {"sum_logits": -16.144996643066406, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -32.278175354003906, "logits_per_token": -4.036249160766602, "logits_per_char": -0.5766070229666573, "bits_per_byte": 0.8318680925763428, "num_chars": 28}, {"sum_logits": -9.523006439208984, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -26.11602020263672, "logits_per_token": -1.587167739868164, "logits_per_char": -0.3401073728288923, "bits_per_byte": 0.49067122015036924, "num_chars": 28}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 143, "native_id": "Mercury_7011130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.133941650390625, "logits_per_token_corr": -3.641742706298828, "logits_per_char_corr": -0.7283485412597657, "bits_per_byte_corr": 1.0507848285149295}, "model_output": [{"sum_logits": -29.133941650390625, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -34.02447509765625, "logits_per_token": -3.641742706298828, "logits_per_char": -0.7283485412597657, "bits_per_byte": 1.0507848285149295, "num_chars": 40}, {"sum_logits": -26.862558364868164, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -31.839902877807617, "logits_per_token": -3.3578197956085205, "logits_per_char": -0.6715639591217041, "bits_per_byte": 0.9688619934653371, "num_chars": 40}, {"sum_logits": -26.932613372802734, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -31.45380210876465, "logits_per_token": -3.366576671600342, "logits_per_char": -0.690579830071865, "bits_per_byte": 0.9962960961833384, "num_chars": 39}, {"sum_logits": -23.352569580078125, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -28.286619186401367, "logits_per_token": -2.9190711975097656, "logits_per_char": -0.5987838353866186, "bits_per_byte": 0.8638624698773687, "num_chars": 39}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 144, "native_id": "Mercury_LBS11022", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.861726760864258, "logits_per_token_corr": -3.2872422536214194, "logits_per_char_corr": -0.46960603623163133, "bits_per_byte_corr": 0.6774982996433841}, "model_output": [{"sum_logits": -9.861726760864258, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -21.574125289916992, "logits_per_token": -3.2872422536214194, "logits_per_char": -0.46960603623163133, "bits_per_byte": 0.6774982996433841, "num_chars": 21}, {"sum_logits": -8.154838562011719, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.996932983398438, "logits_per_token": -4.077419281005859, "logits_per_char": -0.5824884687151227, "bits_per_byte": 0.8403532251909174, "num_chars": 14}, {"sum_logits": -16.42762565612793, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -21.294376373291016, "logits_per_token": -4.106906414031982, "logits_per_char": -1.095175043741862, "bits_per_byte": 1.580003604512873, "num_chars": 15}, {"sum_logits": -17.01771354675293, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.003665924072266, "logits_per_token": -8.508856773376465, "logits_per_char": -1.54706486788663, "bits_per_byte": 2.2319428128351837, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 145, "native_id": "TIMSS_1995_8_J1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -59.41878890991211, "logits_per_token_corr": -3.3010438283284507, "logits_per_char_corr": -0.5713345087491549, "bits_per_byte_corr": 0.82426146246173}, "model_output": [{"sum_logits": -54.0135383605957, "num_tokens": 20, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -73.34089660644531, "logits_per_token": -2.700676918029785, "logits_per_char": -0.4738029680754009, "bits_per_byte": 0.6835531924013438, "num_chars": 114}, {"sum_logits": -43.22999572753906, "num_tokens": 15, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -64.60490417480469, "logits_per_token": -2.881999715169271, "logits_per_char": -0.5688157332570929, "bits_per_byte": 0.820627637550217, "num_chars": 76}, {"sum_logits": -59.41878890991211, "num_tokens": 18, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -71.46790313720703, "logits_per_token": -3.3010438283284507, "logits_per_char": -0.5713345087491549, "bits_per_byte": 0.82426146246173, "num_chars": 104}, {"sum_logits": -53.884315490722656, "num_tokens": 17, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -70.47434997558594, "logits_per_token": -3.169665617101333, "logits_per_char": -0.5921353350628863, "bits_per_byte": 0.8542707114309646, "num_chars": 91}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 146, "native_id": "Mercury_SC_408366", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.142451286315918, "logits_per_token_corr": -12.142451286315918, "logits_per_char_corr": -1.2142451286315918, "bits_per_byte_corr": 1.7517854255016376}, "model_output": [{"sum_logits": -5.067686080932617, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -12.895293235778809, "logits_per_token": -5.067686080932617, "logits_per_char": -1.0135372161865235, "bits_per_byte": 1.4622251155497532, "num_chars": 5}, {"sum_logits": -12.142451286315918, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -13.90109920501709, "logits_per_token": -12.142451286315918, "logits_per_char": -1.2142451286315918, "bits_per_byte": 1.7517854255016376, "num_chars": 10}, {"sum_logits": -5.40272331237793, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -16.335420608520508, "logits_per_token": -2.701361656188965, "logits_per_char": -0.49115666476162995, "bits_per_byte": 0.7085892845516757, "num_chars": 11}, {"sum_logits": -8.807535171508789, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -16.247581481933594, "logits_per_token": -4.4037675857543945, "logits_per_char": -0.8006850155917081, "bits_per_byte": 1.1551443013090896, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 147, "native_id": "Mercury_7009993", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.741109848022461, "logits_per_token_corr": -1.7901849746704102, "logits_per_char_corr": -0.34648741445233744, "bits_per_byte_corr": 0.4998756745611853}, "model_output": [{"sum_logits": -10.741109848022461, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -30.97785186767578, "logits_per_token": -1.7901849746704102, "logits_per_char": -0.34648741445233744, "bits_per_byte": 0.4998756745611853, "num_chars": 31}, {"sum_logits": -25.54595184326172, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -43.216163635253906, "logits_per_token": -3.193243980407715, "logits_per_char": -0.7513515248018152, "bits_per_byte": 1.0839711187967185, "num_chars": 34}, {"sum_logits": -17.082735061645508, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.67512512207031, "logits_per_token": -2.8471225102742515, "logits_per_char": -0.517658638231682, "bits_per_byte": 0.7468235502507181, "num_chars": 33}, {"sum_logits": -11.589073181152344, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -41.96253204345703, "logits_per_token": -1.6555818830217635, "logits_per_char": -0.373841070359753, "bits_per_byte": 0.5393386582890252, "num_chars": 31}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 148, "native_id": "Mercury_401699", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.954768419265747, "logits_per_token_corr": -1.954768419265747, "logits_per_char_corr": -0.9773842096328735, "bits_per_byte_corr": 1.4100673522815388}, "model_output": [{"sum_logits": -3.56462025642395, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -6.575865745544434, "logits_per_token": -3.56462025642395, "logits_per_char": -1.782310128211975, "bits_per_byte": 2.5713299832994365, "num_chars": 2}, {"sum_logits": -3.8597042560577393, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -6.782100677490234, "logits_per_token": -3.8597042560577393, "logits_per_char": -1.9298521280288696, "bits_per_byte": 2.7841880947582633, "num_chars": 2}, {"sum_logits": -1.954768419265747, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": true, "sum_logits_uncond": -7.0804243087768555, "logits_per_token": -1.954768419265747, "logits_per_char": -0.9773842096328735, "bits_per_byte": 1.4100673522815388, "num_chars": 2}, {"sum_logits": -2.2991783618927, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -7.313617706298828, "logits_per_token": -2.2991783618927, "logits_per_char": -1.14958918094635, "bits_per_byte": 1.658506610412096, "num_chars": 2}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 149, "native_id": "Mercury_7056858", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.7529497146606445, "logits_per_token_corr": -2.7529497146606445, "logits_per_char_corr": -0.45882495244344074, "bits_per_byte_corr": 0.661944483526742}, "model_output": [{"sum_logits": -2.7529497146606445, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -13.454791069030762, "logits_per_token": -2.7529497146606445, "logits_per_char": -0.45882495244344074, "bits_per_byte": 0.661944483526742, "num_chars": 6}, {"sum_logits": -5.864619255065918, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -13.502662658691406, "logits_per_token": -5.864619255065918, "logits_per_char": -0.9774365425109863, "bits_per_byte": 1.4101428526652675, "num_chars": 6}, {"sum_logits": -3.3650197982788086, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -12.759955406188965, "logits_per_token": -3.3650197982788086, "logits_per_char": -0.6730039596557618, "bits_per_byte": 0.970939475094701, "num_chars": 5}, {"sum_logits": -6.688396453857422, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -13.69958209991455, "logits_per_token": -6.688396453857422, "logits_per_char": -0.8360495567321777, "bits_per_byte": 1.2061645494357955, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 150, "native_id": "Mercury_7027160", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.981508731842041, "logits_per_token_corr": -3.4907543659210205, "logits_per_char_corr": -0.46543391545613605, "bits_per_byte_corr": 0.671479201690583}, "model_output": [{"sum_logits": -5.273132801055908, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.5797119140625, "logits_per_token": -2.636566400527954, "logits_per_char": -0.47937570918690076, "bits_per_byte": 0.6915929583670685, "num_chars": 11}, {"sum_logits": -4.95782995223999, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -17.778453826904297, "logits_per_token": -2.478914976119995, "logits_per_char": -0.4131524960199992, "bits_per_byte": 0.5960530571393783, "num_chars": 12}, {"sum_logits": -6.981508731842041, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.327972412109375, "logits_per_token": -3.4907543659210205, "logits_per_char": -0.46543391545613605, "bits_per_byte": 0.671479201690583, "num_chars": 15}, {"sum_logits": -2.9384374618530273, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.287256240844727, "logits_per_token": -1.4692187309265137, "logits_per_char": -0.29384374618530273, "bits_per_byte": 0.4239269154180761, "num_chars": 10}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 151, "native_id": "Mercury_400811", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.009136438369751, "logits_per_token_corr": -3.009136438369751, "logits_per_char_corr": -0.18807102739810944, "bits_per_byte_corr": 0.27132913856233976}, "model_output": [{"sum_logits": -1.9913694858551025, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -12.272923469543457, "logits_per_token": -1.9913694858551025, "logits_per_char": -0.3982738971710205, "bits_per_byte": 0.574587776364565, "num_chars": 5}, {"sum_logits": -3.8193037509918213, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -12.887895584106445, "logits_per_token": -3.8193037509918213, "logits_per_char": -0.4243670834435357, "bits_per_byte": 0.6122322868009417, "num_chars": 9}, {"sum_logits": -3.9112226963043213, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.95900821685791, "logits_per_token": -3.9112226963043213, "logits_per_char": -0.35556569966402923, "bits_per_byte": 0.5129728716158781, "num_chars": 11}, {"sum_logits": -3.009136438369751, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.868720054626465, "logits_per_token": -3.009136438369751, "logits_per_char": -0.18807102739810944, "bits_per_byte": 0.27132913856233976, "num_chars": 16}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 152, "native_id": "Mercury_SC_400062", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.64200210571289, "logits_per_token_corr": -2.806000300816127, "logits_per_char_corr": -0.7554616194504958, "bits_per_byte_corr": 1.0899007319639586}, "model_output": [{"sum_logits": -16.51810646057129, "num_tokens": 6, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -29.71179962158203, "logits_per_token": -2.7530177434285483, "logits_per_char": -0.7865764981224423, "bits_per_byte": 1.1347900131218698, "num_chars": 21}, {"sum_logits": -16.184280395507812, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -30.751928329467773, "logits_per_token": -3.2368560791015626, "logits_per_char": -0.7706800188337054, "bits_per_byte": 1.1118562412843984, "num_chars": 21}, {"sum_logits": -14.509092330932617, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.54662322998047, "logits_per_token": -2.9018184661865236, "logits_per_char": -0.604545513788859, "bits_per_byte": 0.872174814735484, "num_chars": 24}, {"sum_logits": -19.64200210571289, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -37.88856506347656, "logits_per_token": -2.806000300816127, "logits_per_char": -0.7554616194504958, "bits_per_byte": 1.0899007319639586, "num_chars": 26}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 153, "native_id": "Mercury_400699", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.023399353027344, "logits_per_token_corr": -2.337233225504557, "logits_per_char_corr": -0.3595743423853165, "bits_per_byte_corr": 0.518756120590579}, "model_output": [{"sum_logits": -10.68710708618164, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -31.619449615478516, "logits_per_token": -1.7811845143636067, "logits_per_char": -0.2968640857272678, "bits_per_byte": 0.42828434429707307, "num_chars": 36}, {"sum_logits": -14.023399353027344, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.66438674926758, "logits_per_token": -2.337233225504557, "logits_per_char": -0.3595743423853165, "bits_per_byte": 0.518756120590579, "num_chars": 39}, {"sum_logits": -12.794139862060547, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -31.90947723388672, "logits_per_token": -2.132356643676758, "logits_per_char": -0.32805486825796276, "bits_per_byte": 0.4732831315755851, "num_chars": 39}, {"sum_logits": -16.95868682861328, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -35.26957702636719, "logits_per_token": -2.8264478047688804, "logits_per_char": -0.423967170715332, "bits_per_byte": 0.6116553346911735, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 154, "native_id": "Mercury_7029803", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.571887969970703, "logits_per_token_corr": -1.5065352699973367, "logits_per_char_corr": -0.3126771315088812, "bits_per_byte_corr": 0.45109774702757327}, "model_output": [{"sum_logits": -17.183698654174805, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -35.49116897583008, "logits_per_token": -1.7183698654174804, "logits_per_char": -0.40913568224225727, "bits_per_byte": 0.5902580198220514, "num_chars": 42}, {"sum_logits": -16.571887969970703, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -40.60392761230469, "logits_per_token": -1.5065352699973367, "logits_per_char": -0.3126771315088812, "bits_per_byte": 0.45109774702757327, "num_chars": 53}, {"sum_logits": -19.440673828125, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.450923919677734, "logits_per_token": -1.767333984375, "logits_per_char": -0.36680516656839623, "bits_per_byte": 0.5291879947810556, "num_chars": 53}, {"sum_logits": -16.610403060913086, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.06031036376953, "logits_per_token": -1.5100366419011897, "logits_per_char": -0.30760005668357565, "bits_per_byte": 0.44377307635487745, "num_chars": 54}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 155, "native_id": "Mercury_SC_401372", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.290153503417969, "logits_per_token_corr": -5.096717834472656, "logits_per_char_corr": -0.899420794318704, "bits_per_byte_corr": 1.297589919636939}, "model_output": [{"sum_logits": -17.60475730895996, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.04833984375, "logits_per_token": -4.40118932723999, "logits_per_char": -0.9780420727199979, "bits_per_byte": 1.4110164480949177, "num_chars": 18}, {"sum_logits": -13.507181167602539, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.906225204467773, "logits_per_token": -3.3767952919006348, "logits_per_char": -0.675359058380127, "bits_per_byte": 0.9743371643451492, "num_chars": 20}, {"sum_logits": -12.780108451843262, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -23.84425163269043, "logits_per_token": -3.1950271129608154, "logits_per_char": -0.7100060251024034, "bits_per_byte": 1.0243221714172581, "num_chars": 18}, {"sum_logits": -15.290153503417969, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.90717315673828, "logits_per_token": -5.096717834472656, "logits_per_char": -0.899420794318704, "bits_per_byte": 1.297589919636939, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 156, "native_id": "Mercury_7271128", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.290739059448242, "logits_per_token_corr": -1.645369529724121, "logits_per_char_corr": -0.4113423824310303, "bits_per_byte_corr": 0.5934416152411252}, "model_output": [{"sum_logits": -5.260573387145996, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.947834014892578, "logits_per_token": -1.7535244623819988, "logits_per_char": -0.7515104838779995, "bits_per_byte": 1.084200448267634, "num_chars": 7}, {"sum_logits": -1.477401852607727, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": true, "sum_logits_uncond": -19.52284049987793, "logits_per_token": -0.7387009263038635, "logits_per_char": -0.18467523157596588, "bits_per_byte": 0.2664300407698583, "num_chars": 8}, {"sum_logits": -3.290739059448242, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -19.047475814819336, "logits_per_token": -1.645369529724121, "logits_per_char": -0.4113423824310303, "bits_per_byte": 0.5934416152411252, "num_chars": 8}, {"sum_logits": -2.6468288898468018, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.345060348510742, "logits_per_token": -1.3234144449234009, "logits_per_char": -0.3308536112308502, "bits_per_byte": 0.4773208641832956, "num_chars": 8}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 157, "native_id": "Mercury_407260", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.425228118896484, "logits_per_token_corr": -3.489318302699498, "logits_per_char_corr": -1.0177178382873535, "bits_per_byte_corr": 1.4682564783224559}, "model_output": [{"sum_logits": -16.760202407836914, "num_tokens": 5, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -29.429872512817383, "logits_per_token": -3.352040481567383, "logits_per_char": -0.644623169532189, "bits_per_byte": 0.9299946499268827, "num_chars": 26}, {"sum_logits": -24.425228118896484, "num_tokens": 7, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.657997131347656, "logits_per_token": -3.489318302699498, "logits_per_char": -1.0177178382873535, "bits_per_byte": 1.4682564783224559, "num_chars": 24}, {"sum_logits": -22.240524291992188, "num_tokens": 8, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -31.189128875732422, "logits_per_token": -2.7800655364990234, "logits_per_char": -0.717436267483619, "bits_per_byte": 1.0350417452532485, "num_chars": 31}, {"sum_logits": -14.277668952941895, "num_tokens": 6, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -26.894611358642578, "logits_per_token": -2.3796114921569824, "logits_per_char": -0.41993143979240866, "bits_per_byte": 0.6058330057023056, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 158, "native_id": "Mercury_SC_416155", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.694104194641113, "logits_per_token_corr": -2.231368064880371, "logits_per_char_corr": -0.33470520973205564, "bits_per_byte_corr": 0.482877546240484}, "model_output": [{"sum_logits": -4.011873722076416, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.463062286376953, "logits_per_token": -1.002968430519104, "logits_per_char": -0.174429292264192, "bits_per_byte": 0.2516482749355022, "num_chars": 23}, {"sum_logits": -4.6467461585998535, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.409992218017578, "logits_per_token": -1.5489153861999512, "logits_per_char": -0.24456558729472913, "bits_per_byte": 0.3528335599624561, "num_chars": 19}, {"sum_logits": -6.694104194641113, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -19.041671752929688, "logits_per_token": -2.231368064880371, "logits_per_char": -0.33470520973205564, "bits_per_byte": 0.482877546240484, "num_chars": 20}, {"sum_logits": -5.949254512786865, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -17.060606002807617, "logits_per_token": -1.9830848375956218, "logits_per_char": -0.2832978339422317, "bits_per_byte": 0.4087123801233363, "num_chars": 21}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 159, "native_id": "Mercury_402145", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.518110275268555, "logits_per_token_corr": -3.903622055053711, "logits_per_char_corr": -0.8871868306940253, "bits_per_byte_corr": 1.2799400409851862}, "model_output": [{"sum_logits": -12.762104034423828, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -20.656295776367188, "logits_per_token": -6.381052017211914, "logits_per_char": -0.8508069356282552, "bits_per_byte": 1.227454946785701, "num_chars": 15}, {"sum_logits": -19.518110275268555, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -30.455020904541016, "logits_per_token": -3.903622055053711, "logits_per_char": -0.8871868306940253, "bits_per_byte": 1.2799400409851862, "num_chars": 22}, {"sum_logits": -10.827515602111816, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -25.28559684753418, "logits_per_token": -2.1655031204223634, "logits_per_char": -0.4164429077735314, "bits_per_byte": 0.6008001178586853, "num_chars": 26}, {"sum_logits": -18.259571075439453, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.61328125, "logits_per_token": -3.0432618459065757, "logits_per_char": -0.5370462081011604, "bits_per_byte": 0.7747939011563231, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 160, "native_id": "AIMS_2009_4_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.322540283203125, "logits_per_token_corr": -4.830635070800781, "logits_per_char_corr": -0.6440846761067708, "bits_per_byte_corr": 0.92921776813248}, "model_output": [{"sum_logits": -19.322540283203125, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.370994567871094, "logits_per_token": -4.830635070800781, "logits_per_char": -0.6440846761067708, "bits_per_byte": 0.92921776813248, "num_chars": 30}, {"sum_logits": -19.44985580444336, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -34.379127502441406, "logits_per_token": -2.778550829206194, "logits_per_char": -0.5893895698316169, "bits_per_byte": 0.8503094095483641, "num_chars": 33}, {"sum_logits": -21.59630012512207, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.53820037841797, "logits_per_token": -2.699537515640259, "logits_per_char": -0.6170371464320592, "bits_per_byte": 0.8901964312024485, "num_chars": 35}, {"sum_logits": -27.61441421508789, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.27241897583008, "logits_per_token": -3.944916316441127, "logits_per_char": -0.7080619029509716, "bits_per_byte": 1.021517396030503, "num_chars": 39}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 161, "native_id": "TIMSS_2003_4_pg7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.939417839050293, "logits_per_token_corr": -3.313139279683431, "logits_per_char_corr": -0.7099584170750209, "bits_per_byte_corr": 1.0242534875522469}, "model_output": [{"sum_logits": -10.13391399383545, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.027559280395508, "logits_per_token": -3.377971331278483, "logits_per_char": -0.6755942662556966, "bits_per_byte": 0.9746764975808118, "num_chars": 15}, {"sum_logits": -11.343531608581543, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.32836151123047, "logits_per_token": -3.781177202860514, "logits_per_char": -0.9452943007151285, "bits_per_byte": 1.3637713998232963, "num_chars": 12}, {"sum_logits": -8.314708709716797, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.900470733642578, "logits_per_token": -2.771569569905599, "logits_per_char": -0.48910051233628216, "bits_per_byte": 0.7056228836443125, "num_chars": 17}, {"sum_logits": -9.939417839050293, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.2957763671875, "logits_per_token": -3.313139279683431, "logits_per_char": -0.7099584170750209, "bits_per_byte": 1.0242534875522469, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 162, "native_id": "Mercury_7142415", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.683235168457031, "logits_per_token_corr": -1.5261764526367188, "logits_per_char_corr": -0.28113776759097453, "bits_per_byte_corr": 0.40559606311038426}, "model_output": [{"sum_logits": -10.683235168457031, "num_tokens": 7, "num_tokens_all": 240, "is_greedy": false, "sum_logits_uncond": -30.35871124267578, "logits_per_token": -1.5261764526367188, "logits_per_char": -0.28113776759097453, "bits_per_byte": 0.40559606311038426, "num_chars": 38}, {"sum_logits": -15.888883590698242, "num_tokens": 10, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -34.255455017089844, "logits_per_token": -1.5888883590698242, "logits_per_char": -0.2787523436964604, "bits_per_byte": 0.40215462388734824, "num_chars": 57}, {"sum_logits": -17.93718147277832, "num_tokens": 9, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -34.4309196472168, "logits_per_token": -1.9930201636420355, "logits_per_char": -0.43749223104337365, "bits_per_byte": 0.6311678721541772, "num_chars": 41}, {"sum_logits": -15.502342224121094, "num_tokens": 8, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -31.666141510009766, "logits_per_token": -1.9377927780151367, "logits_per_char": -0.3875585556030273, "bits_per_byte": 0.5591288062229788, "num_chars": 40}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 163, "native_id": "Mercury_7212818", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.555896759033203, "logits_per_token_corr": -3.4444870948791504, "logits_per_char_corr": -0.5862956757241107, "bits_per_byte_corr": 0.845845863862426}, "model_output": [{"sum_logits": -23.001693725585938, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.24664306640625, "logits_per_token": -2.3001693725585937, "logits_per_char": -0.40353848641378837, "bits_per_byte": 0.5821829731574291, "num_chars": 57}, {"sum_logits": -27.555896759033203, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -36.69851303100586, "logits_per_token": -3.4444870948791504, "logits_per_char": -0.5862956757241107, "bits_per_byte": 0.845845863862426, "num_chars": 47}, {"sum_logits": -30.81549072265625, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -46.28364562988281, "logits_per_token": -3.081549072265625, "logits_per_char": -0.5706572356047453, "bits_per_byte": 0.8232843638549624, "num_chars": 54}, {"sum_logits": -27.156291961669922, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -39.07571029663086, "logits_per_token": -3.8794702802385603, "logits_per_char": -0.6171884536743164, "bits_per_byte": 0.8904147214105037, "num_chars": 44}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 164, "native_id": "Mercury_SC_413299", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.8979387283325195, "logits_per_token_corr": -2.4489693641662598, "logits_per_char_corr": -0.6122423410415649, "bits_per_byte_corr": 0.8832789892435498}, "model_output": [{"sum_logits": -4.8979387283325195, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.815589904785156, "logits_per_token": -2.4489693641662598, "logits_per_char": -0.6122423410415649, "bits_per_byte": 0.8832789892435498, "num_chars": 8}, {"sum_logits": -6.256534576416016, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -19.72254753112793, "logits_per_token": -2.0855115254720054, "logits_per_char": -0.36803144567153034, "bits_per_byte": 0.5309571415618942, "num_chars": 17}, {"sum_logits": -6.576302528381348, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -18.522991180419922, "logits_per_token": -2.1921008427937827, "logits_per_char": -0.41101890802383423, "bits_per_byte": 0.5929749403180087, "num_chars": 16}, {"sum_logits": -6.47550630569458, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.12837028503418, "logits_per_token": -3.23775315284729, "logits_per_char": -0.40471914410591125, "bits_per_byte": 0.5838863021548434, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 165, "native_id": "Mercury_7132020", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.054723739624023, "logits_per_token_corr": -4.054723739624023, "logits_per_char_corr": -0.45052485995822483, "bits_per_byte_corr": 0.6499699812593926}, "model_output": [{"sum_logits": -4.054723739624023, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.433614730834961, "logits_per_token": -4.054723739624023, "logits_per_char": -0.45052485995822483, "bits_per_byte": 0.6499699812593926, "num_chars": 9}, {"sum_logits": -4.068977355957031, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.651010513305664, "logits_per_token": -4.068977355957031, "logits_per_char": -0.5086221694946289, "bits_per_byte": 0.7337866816166141, "num_chars": 8}, {"sum_logits": -5.343851089477539, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.689517974853516, "logits_per_token": -5.343851089477539, "logits_per_char": -0.6679813861846924, "bits_per_byte": 0.9636934332554836, "num_chars": 8}, {"sum_logits": -5.43690299987793, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.843673706054688, "logits_per_token": -5.43690299987793, "logits_per_char": -0.7767004285539899, "bits_per_byte": 1.120541856531979, "num_chars": 7}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 166, "native_id": "MEA_2014_8_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.95947551727295, "logits_per_token_corr": -2.7398688793182373, "logits_per_char_corr": -0.4764989355336065, "bits_per_byte_corr": 0.6874426512836979}, "model_output": [{"sum_logits": -7.505856990814209, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.705585479736328, "logits_per_token": -2.501952330271403, "logits_per_char": -0.41699205504523384, "bits_per_byte": 0.6015923699042888, "num_chars": 18}, {"sum_logits": -7.477197647094727, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -17.941205978393555, "logits_per_token": -1.8692994117736816, "logits_per_char": -0.2875845248882587, "bits_per_byte": 0.41489676789299756, "num_chars": 26}, {"sum_logits": -10.95947551727295, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.654748916625977, "logits_per_token": -2.7398688793182373, "logits_per_char": -0.4764989355336065, "bits_per_byte": 0.6874426512836979, "num_chars": 23}, {"sum_logits": -14.429464340209961, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -28.095966339111328, "logits_per_token": -3.6073660850524902, "logits_per_char": -0.721473217010498, "bits_per_byte": 1.0408658323160003, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 167, "native_id": "TIMSS_1995_8_N2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.034513473510742, "logits_per_token_corr": -2.6724189122517905, "logits_per_char_corr": -0.5726611954825265, "bits_per_byte_corr": 0.8261754668327798}, "model_output": [{"sum_logits": -27.863510131835938, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.693016052246094, "logits_per_token": -3.095945570203993, "logits_per_char": -0.7530678414009713, "bits_per_byte": 1.0864472402429182, "num_chars": 37}, {"sum_logits": -16.034513473510742, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -23.964921951293945, "logits_per_token": -2.6724189122517905, "logits_per_char": -0.5726611954825265, "bits_per_byte": 0.8261754668327798, "num_chars": 28}, {"sum_logits": -16.425537109375, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -21.958873748779297, "logits_per_token": -2.3465053013392856, "logits_per_char": -0.5475179036458333, "bits_per_byte": 0.7899013643883325, "num_chars": 30}, {"sum_logits": -15.501051902770996, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.61397361755371, "logits_per_token": -2.5835086504618325, "logits_per_char": -0.7045932683077726, "bits_per_byte": 1.0165132140321007, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 168, "native_id": "Mercury_7024465", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.972063064575195, "logits_per_token_corr": -1.6573543548583984, "logits_per_char_corr": -0.29247429791618795, "bits_per_byte_corr": 0.4219512191914688}, "model_output": [{"sum_logits": -4.972063064575195, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.312292098999023, "logits_per_token": -1.6573543548583984, "logits_per_char": -0.29247429791618795, "bits_per_byte": 0.4219512191914688, "num_chars": 17}, {"sum_logits": -13.939146041870117, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.20321273803711, "logits_per_token": -4.646382013956706, "logits_per_char": -0.6637688591366723, "bits_per_byte": 0.9576160413736899, "num_chars": 21}, {"sum_logits": -12.413881301879883, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.493019104003906, "logits_per_token": -2.4827762603759767, "logits_per_char": -0.591137204851423, "bits_per_byte": 0.8528307139247239, "num_chars": 21}, {"sum_logits": -5.9294586181640625, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -25.185611724853516, "logits_per_token": -1.1858917236328126, "logits_per_char": -0.29647293090820315, "bits_per_byte": 0.42772002717938823, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 169, "native_id": "Mercury_SC_415762", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.55470275878906, "logits_per_token_corr": -2.325335911342076, "logits_per_char_corr": -0.4459548323121789, "bits_per_byte_corr": 0.643376825037712}, "model_output": [{"sum_logits": -22.632511138916016, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -32.89379119873047, "logits_per_token": -3.772085189819336, "logits_per_char": -0.7072659730911255, "bits_per_byte": 1.0203691119688068, "num_chars": 32}, {"sum_logits": -20.92432403564453, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -31.590450286865234, "logits_per_token": -3.4873873392740884, "logits_per_char": -0.6154212951660156, "bits_per_byte": 0.8878652505941116, "num_chars": 34}, {"sum_logits": -27.39900779724121, "num_tokens": 12, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -41.500732421875, "logits_per_token": -2.283250649770101, "logits_per_char": -0.4419194806006647, "bits_per_byte": 0.6375550431352635, "num_chars": 62}, {"sum_logits": -32.55470275878906, "num_tokens": 14, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -47.40915298461914, "logits_per_token": -2.325335911342076, "logits_per_char": -0.4459548323121789, "bits_per_byte": 0.643376825037712, "num_chars": 73}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 170, "native_id": "Mercury_415093", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -37.64117431640625, "logits_per_token_corr": -1.344327654157366, "logits_per_char_corr": -0.8962184361049107, "bits_per_byte_corr": 1.2929698933227458}, "model_output": [{"sum_logits": -22.80239486694336, "num_tokens": 27, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -47.9421501159668, "logits_per_token": -0.8445331432201244, "logits_per_char": -0.5561559723644722, "bits_per_byte": 0.8023634632915799, "num_chars": 41}, {"sum_logits": -25.420799255371094, "num_tokens": 27, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -49.064353942871094, "logits_per_token": -0.9415110835322628, "logits_per_char": -0.6200194940334414, "bits_per_byte": 0.8944990492971727, "num_chars": 41}, {"sum_logits": -38.719886779785156, "num_tokens": 28, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -58.64363098144531, "logits_per_token": -1.3828530992780412, "logits_per_char": -0.9219020661853609, "bits_per_byte": 1.3300235390718647, "num_chars": 42}, {"sum_logits": -37.64117431640625, "num_tokens": 28, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -57.685638427734375, "logits_per_token": -1.344327654157366, "logits_per_char": -0.8962184361049107, "bits_per_byte": 1.2929698933227458, "num_chars": 42}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 171, "native_id": "LEAP_2005_8_10404", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.06749725341797, "logits_per_token_corr": -2.915227023037997, "logits_per_char_corr": -0.6050471179890182, "bits_per_byte_corr": 0.8728984766275433}, "model_output": [{"sum_logits": -35.04844665527344, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.9452018737793, "logits_per_token": -3.8942718505859375, "logits_per_char": -0.8150801547738009, "bits_per_byte": 1.175912097220016, "num_chars": 43}, {"sum_logits": -32.037296295166016, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -45.81754684448242, "logits_per_token": -2.9124814813787285, "logits_per_char": -0.525201578609279, "bits_per_byte": 0.7577057129272062, "num_chars": 61}, {"sum_logits": -35.779659271240234, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -48.65796661376953, "logits_per_token": -4.472457408905029, "logits_per_char": -0.7301971279844945, "bits_per_byte": 1.0534517754153507, "num_chars": 49}, {"sum_logits": -32.06749725341797, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -46.032005310058594, "logits_per_token": -2.915227023037997, "logits_per_char": -0.6050471179890182, "bits_per_byte": 0.8728984766275433, "num_chars": 53}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 172, "native_id": "AIMS_2008_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.677637100219727, "logits_per_token_corr": -2.4677637100219725, "logits_per_char_corr": -0.5608553886413574, "bits_per_byte_corr": 0.8091432878493199}, "model_output": [{"sum_logits": -28.391929626464844, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.43864822387695, "logits_per_token": -3.1546588473849826, "logits_per_char": -0.660277433173601, "bits_per_byte": 0.9525789784511325, "num_chars": 43}, {"sum_logits": -24.677637100219727, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -32.5671272277832, "logits_per_token": -2.4677637100219725, "logits_per_char": -0.5608553886413574, "bits_per_byte": 0.8091432878493199, "num_chars": 44}, {"sum_logits": -33.6939811706543, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -51.36674499511719, "logits_per_token": -3.063089197332209, "logits_per_char": -0.6239626142713759, "bits_per_byte": 0.9001877693100738, "num_chars": 54}, {"sum_logits": -29.77121353149414, "num_tokens": 13, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -41.69708251953125, "logits_per_token": -2.2900933485764723, "logits_per_char": -0.4580186697152945, "bits_per_byte": 0.6607812634332901, "num_chars": 65}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 173, "native_id": "Mercury_7057173", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.085058212280273, "logits_per_token_corr": -7.042529106140137, "logits_per_char_corr": -0.8285328360164866, "bits_per_byte_corr": 1.1953202137355128}, "model_output": [{"sum_logits": -12.588859558105469, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -21.28538703918457, "logits_per_token": -6.294429779052734, "logits_per_char": -0.5722208890047941, "bits_per_byte": 0.8255402388608835, "num_chars": 22}, {"sum_logits": -11.914679527282715, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -19.32310676574707, "logits_per_token": -5.957339763641357, "logits_per_char": -0.7943119684855143, "bits_per_byte": 1.1459499378536253, "num_chars": 15}, {"sum_logits": -15.831546783447266, "num_tokens": 3, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -25.410898208618164, "logits_per_token": -5.277182261149089, "logits_per_char": -0.510695057530557, "bits_per_byte": 0.7367772269063678, "num_chars": 31}, {"sum_logits": -14.085058212280273, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -22.709583282470703, "logits_per_token": -7.042529106140137, "logits_per_char": -0.8285328360164866, "bits_per_byte": 1.1953202137355128, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 174, "native_id": "TIMSS_2007_8_pg60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.835306167602539, "logits_per_token_corr": -7.835306167602539, "logits_per_char_corr": -1.5670612335205079, "bits_per_byte_corr": 2.260791470371003}, "model_output": [{"sum_logits": -4.436249732971191, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -10.031486511230469, "logits_per_token": -4.436249732971191, "logits_per_char": -0.6337499618530273, "bits_per_byte": 0.9143079271295892, "num_chars": 7}, {"sum_logits": -7.091584205627441, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -10.721514701843262, "logits_per_token": -7.091584205627441, "logits_per_char": -1.181930700937907, "bits_per_byte": 1.70516556091876, "num_chars": 6}, {"sum_logits": -7.835306167602539, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -9.838791847229004, "logits_per_token": -7.835306167602539, "logits_per_char": -1.5670612335205079, "bits_per_byte": 2.260791470371003, "num_chars": 5}, {"sum_logits": -7.603913307189941, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -12.844858169555664, "logits_per_token": -2.534637769063314, "logits_per_char": -0.844879256354438, "bits_per_byte": 1.2189031132933785, "num_chars": 9}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 175, "native_id": "AIMS_2009_8_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.605201721191406, "logits_per_token_corr": -1.7171001434326172, "logits_per_char_corr": -0.3377901921506788, "bits_per_byte_corr": 0.48732823507706446}, "model_output": [{"sum_logits": -20.605201721191406, "num_tokens": 12, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -39.591941833496094, "logits_per_token": -1.7171001434326172, "logits_per_char": -0.3377901921506788, "bits_per_byte": 0.48732823507706446, "num_chars": 61}, {"sum_logits": -21.715547561645508, "num_tokens": 13, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -40.78373718261719, "logits_per_token": -1.6704267355111928, "logits_per_char": -0.35025076712331465, "bits_per_byte": 0.5053050447967242, "num_chars": 62}, {"sum_logits": -21.36073112487793, "num_tokens": 11, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -39.81590270996094, "logits_per_token": -1.9418846477161755, "logits_per_char": -0.35017592007996606, "bits_per_byte": 0.5051970633384599, "num_chars": 61}, {"sum_logits": -21.20242691040039, "num_tokens": 12, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -41.15010070800781, "logits_per_token": -1.7668689092000325, "logits_per_char": -0.34197462758710306, "bits_per_byte": 0.49336509933011813, "num_chars": 62}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 176, "native_id": "Mercury_185010", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.942767143249512, "logits_per_token_corr": -4.971383571624756, "logits_per_char_corr": -0.9038879221135919, "bits_per_byte_corr": 1.3040346227536455}, "model_output": [{"sum_logits": -9.942767143249512, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -16.49663734436035, "logits_per_token": -4.971383571624756, "logits_per_char": -0.9038879221135919, "bits_per_byte": 1.3040346227536455, "num_chars": 11}, {"sum_logits": -10.431127548217773, "num_tokens": 4, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -28.454235076904297, "logits_per_token": -2.6077818870544434, "logits_per_char": -0.8023944267859826, "bits_per_byte": 1.1576104603619113, "num_chars": 13}, {"sum_logits": -6.421530246734619, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -17.792051315307617, "logits_per_token": -3.2107651233673096, "logits_per_char": -0.6421530246734619, "bits_per_byte": 0.9264309841889172, "num_chars": 10}, {"sum_logits": -8.89378547668457, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -19.378381729125977, "logits_per_token": -2.964595158894857, "logits_per_char": -0.6841373443603516, "bits_per_byte": 0.9870015539963334, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 177, "native_id": "Mercury_7206938", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -35.080440521240234, "logits_per_token_corr": -4.385055065155029, "logits_per_char_corr": -0.661895104174344, "bits_per_byte_corr": 0.954912784381696}, "model_output": [{"sum_logits": -35.080440521240234, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -39.568607330322266, "logits_per_token": -4.385055065155029, "logits_per_char": -0.661895104174344, "bits_per_byte": 0.954912784381696, "num_chars": 53}, {"sum_logits": -18.677764892578125, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -37.239601135253906, "logits_per_token": -3.1129608154296875, "logits_per_char": -0.5188268025716146, "bits_per_byte": 0.7485088551508835, "num_chars": 36}, {"sum_logits": -24.74534797668457, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -39.20538330078125, "logits_per_token": -3.0931684970855713, "logits_per_char": -0.4949069595336914, "bits_per_byte": 0.7139998162212045, "num_chars": 50}, {"sum_logits": -12.841400146484375, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -35.298526763916016, "logits_per_token": -1.6051750183105469, "logits_per_char": -0.2568280029296875, "bits_per_byte": 0.3705244861883425, "num_chars": 50}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 178, "native_id": "Mercury_402501", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.382481575012207, "logits_per_token_corr": -1.4764963150024415, "logits_per_char_corr": -0.8202757305569119, "bits_per_byte_corr": 1.1834077286368787}, "model_output": [{"sum_logits": -0.8253471851348877, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": true, "sum_logits_uncond": -20.74490737915039, "logits_per_token": -0.41267359256744385, "logits_per_char": -0.16506943702697754, "bits_per_byte": 0.23814485820132464, "num_chars": 5}, {"sum_logits": -7.382481575012207, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -31.495773315429688, "logits_per_token": -1.4764963150024415, "logits_per_char": -0.8202757305569119, "bits_per_byte": 1.1834077286368787, "num_chars": 9}, {"sum_logits": -12.127174377441406, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -36.514041900634766, "logits_per_token": -2.425434875488281, "logits_per_char": -1.3474638197157118, "bits_per_byte": 1.9439793704825543, "num_chars": 9}, {"sum_logits": -17.818437576293945, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -37.80691909790039, "logits_per_token": -2.227304697036743, "logits_per_char": -1.3706490443303034, "bits_per_byte": 1.9774285790559465, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 179, "native_id": "MCAS_2011_8_15365", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.738584518432617, "logits_per_token_corr": -3.8692922592163086, "logits_per_char_corr": -0.7035076834938743, "bits_per_byte_corr": 1.014947046204624}, "model_output": [{"sum_logits": -9.141251564025879, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.105640411376953, "logits_per_token": -4.5706257820129395, "logits_per_char": -1.0156946182250977, "bits_per_byte": 1.4653375887720101, "num_chars": 9}, {"sum_logits": -7.971303462982178, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -15.142193794250488, "logits_per_token": -3.985651731491089, "logits_per_char": -0.7971303462982178, "bits_per_byte": 1.1500159975473672, "num_chars": 10}, {"sum_logits": -7.738584518432617, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.35232925415039, "logits_per_token": -3.8692922592163086, "logits_per_char": -0.7035076834938743, "bits_per_byte": 1.014947046204624, "num_chars": 11}, {"sum_logits": -12.359785079956055, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.129629135131836, "logits_per_token": -6.179892539978027, "logits_per_char": -1.2359785079956054, "bits_per_byte": 1.7831400641318809, "num_chars": 10}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 180, "native_id": "Mercury_SC_401766", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.618398666381836, "logits_per_token_corr": -2.2061328887939453, "logits_per_char_corr": -0.6016726060347124, "bits_per_byte_corr": 0.8680300849656422}, "model_output": [{"sum_logits": -6.618398666381836, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -15.564255714416504, "logits_per_token": -2.2061328887939453, "logits_per_char": -0.6016726060347124, "bits_per_byte": 0.8680300849656422, "num_chars": 11}, {"sum_logits": -6.352558135986328, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -14.56945514678955, "logits_per_token": -6.352558135986328, "logits_per_char": -0.794069766998291, "bits_per_byte": 1.1456005149691122, "num_chars": 8}, {"sum_logits": -14.6692533493042, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -22.050125122070312, "logits_per_token": -7.3346266746521, "logits_per_char": -1.2224377791086833, "bits_per_byte": 1.7636049217166825, "num_chars": 12}, {"sum_logits": -13.214860916137695, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.195470809936523, "logits_per_token": -6.607430458068848, "logits_per_char": -0.825928807258606, "bits_per_byte": 1.1915633943601833, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 181, "native_id": "Mercury_7162400", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.123754501342773, "logits_per_token_corr": -1.4581949445936415, "logits_per_char_corr": -0.3200915732034823, "bits_per_byte_corr": 0.4617945252913423}, "model_output": [{"sum_logits": -11.562593460083008, "num_tokens": 4, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -20.202003479003906, "logits_per_token": -2.890648365020752, "logits_per_char": -0.4447151330801157, "bits_per_byte": 0.6415883171034192, "num_chars": 26}, {"sum_logits": -14.078402519226074, "num_tokens": 4, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -22.9373779296875, "logits_per_token": -3.5196006298065186, "logits_per_char": -0.5214223155268917, "bits_per_byte": 0.7522533888200273, "num_chars": 27}, {"sum_logits": -16.322280883789062, "num_tokens": 7, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -26.3458251953125, "logits_per_token": -2.331754411969866, "logits_per_char": -0.41852002266125804, "bits_per_byte": 0.6037967612065673, "num_chars": 39}, {"sum_logits": -13.123754501342773, "num_tokens": 9, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -31.205291748046875, "logits_per_token": -1.4581949445936415, "logits_per_char": -0.3200915732034823, "bits_per_byte": 0.4617945252913423, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 182, "native_id": "Mercury_7086695", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.061086654663086, "logits_per_token_corr": -2.530543327331543, "logits_per_char_corr": -0.4600987867875533, "bits_per_byte_corr": 0.6637822380179085}, "model_output": [{"sum_logits": -4.157513618469238, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.043850898742676, "logits_per_token": -4.157513618469238, "logits_per_char": -0.5196892023086548, "bits_per_byte": 0.7497530349747762, "num_chars": 8}, {"sum_logits": -4.075909614562988, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.093048095703125, "logits_per_token": -1.3586365381876628, "logits_per_char": -0.22643942303127712, "bits_per_byte": 0.32668303266921633, "num_chars": 18}, {"sum_logits": -5.061086654663086, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.9829044342041, "logits_per_token": -2.530543327331543, "logits_per_char": -0.4600987867875533, "bits_per_byte": 0.6637822380179085, "num_chars": 11}, {"sum_logits": -3.0162193775177, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.235103607177734, "logits_per_token": -1.50810968875885, "logits_per_char": -0.2742017615925182, "bits_per_byte": 0.395589521652828, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 183, "native_id": "Mercury_SC_402994", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.873870849609375, "logits_per_token_corr": -2.553410121372768, "logits_per_char_corr": -0.49649641248914933, "bits_per_byte_corr": 0.7162929121177716}, "model_output": [{"sum_logits": -24.70173454284668, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -37.990379333496094, "logits_per_token": -3.087716817855835, "logits_per_char": -0.7265216042013729, "bits_per_byte": 1.048149115480768, "num_chars": 34}, {"sum_logits": -16.642114639282227, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -34.42803955078125, "logits_per_token": -2.3774449484688893, "logits_per_char": -0.4622809622022841, "bits_per_byte": 0.6669304516670927, "num_chars": 36}, {"sum_logits": -17.873870849609375, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -35.32854461669922, "logits_per_token": -2.553410121372768, "logits_per_char": -0.49649641248914933, "bits_per_byte": 0.7162929121177716, "num_chars": 36}, {"sum_logits": -19.143556594848633, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -36.75153350830078, "logits_per_token": -2.7347937992640903, "logits_per_char": -0.5037778051275956, "bits_per_byte": 0.726797741168031, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 184, "native_id": "Mercury_7056298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.71514129638672, "logits_per_token_corr": -2.21439266204834, "logits_per_char_corr": -0.38511176731275476, "bits_per_byte_corr": 0.5555988368904948}, "model_output": [{"sum_logits": -17.71514129638672, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.686492919921875, "logits_per_token": -2.21439266204834, "logits_per_char": -0.38511176731275476, "bits_per_byte": 0.5555988368904948, "num_chars": 46}, {"sum_logits": -19.064950942993164, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.63367462158203, "logits_per_token": -2.3831188678741455, "logits_per_char": -0.4144554552824601, "bits_per_byte": 0.5979328300058123, "num_chars": 46}, {"sum_logits": -14.861730575561523, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.85961151123047, "logits_per_token": -1.8577163219451904, "logits_per_char": -0.3162070335225856, "bits_per_byte": 0.4561903191575722, "num_chars": 47}, {"sum_logits": -14.220976829528809, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.03665542602539, "logits_per_token": -1.777622103691101, "logits_per_char": -0.30257397509635764, "bits_per_byte": 0.4365219733738895, "num_chars": 47}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 185, "native_id": "Mercury_409115", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.695560455322266, "logits_per_token_corr": -2.1883956061469183, "logits_per_char_corr": -0.32287804025118466, "bits_per_byte_corr": 0.4658145474826659}, "model_output": [{"sum_logits": -15.958768844604492, "num_tokens": 8, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -38.90150451660156, "logits_per_token": -1.9948461055755615, "logits_per_char": -0.33247435092926025, "bits_per_byte": 0.47965909730876527, "num_chars": 48}, {"sum_logits": -14.704971313476562, "num_tokens": 8, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -35.88121795654297, "logits_per_token": -1.8381214141845703, "logits_per_char": -0.3063535690307617, "bits_per_byte": 0.4419747747996322, "num_chars": 48}, {"sum_logits": -19.695560455322266, "num_tokens": 9, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -43.50560760498047, "logits_per_token": -2.1883956061469183, "logits_per_char": -0.32287804025118466, "bits_per_byte": 0.4658145474826659, "num_chars": 61}, {"sum_logits": -17.948558807373047, "num_tokens": 9, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -38.264923095703125, "logits_per_token": -1.9942843119303386, "logits_per_char": -0.2942386689733286, "bits_per_byte": 0.42449666856589546, "num_chars": 61}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 186, "native_id": "Mercury_409647", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.28501892089844, "logits_per_token_corr": -2.9350017200816763, "logits_per_char_corr": -0.6457003784179688, "bits_per_byte_corr": 0.9315487338443998}, "model_output": [{"sum_logits": -18.61166000366211, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -30.970212936401367, "logits_per_token": -2.067962222629123, "logits_per_char": -0.43282930241074674, "bits_per_byte": 0.6244406881398624, "num_chars": 43}, {"sum_logits": -12.145492553710938, "num_tokens": 8, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -23.241500854492188, "logits_per_token": -1.5181865692138672, "logits_per_char": -0.2760339216752486, "bits_per_byte": 0.3982327699182998, "num_chars": 44}, {"sum_logits": -22.13386344909668, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -45.66482925415039, "logits_per_token": -2.459318161010742, "logits_per_char": -0.5030423511158336, "bits_per_byte": 0.7257367053124593, "num_chars": 44}, {"sum_logits": -32.28501892089844, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -46.66790771484375, "logits_per_token": -2.9350017200816763, "logits_per_char": -0.6457003784179688, "bits_per_byte": 0.9315487338443998, "num_chars": 50}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 187, "native_id": "Mercury_414352", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.802654266357422, "logits_per_token_corr": -2.7605308532714843, "logits_per_char_corr": -0.6572692507789248, "bits_per_byte_corr": 0.9482390886282406}, "model_output": [{"sum_logits": -13.802654266357422, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -25.629798889160156, "logits_per_token": -2.7605308532714843, "logits_per_char": -0.6572692507789248, "bits_per_byte": 0.9482390886282406, "num_chars": 21}, {"sum_logits": -19.00313949584961, "num_tokens": 9, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -44.50470733642578, "logits_per_token": -2.1114599439832897, "logits_per_char": -0.7601255798339843, "bits_per_byte": 1.0966294044801248, "num_chars": 25}, {"sum_logits": -14.07619571685791, "num_tokens": 6, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -32.8818359375, "logits_per_token": -2.3460326194763184, "logits_per_char": -0.5865081548690796, "bits_per_byte": 0.8461524064711652, "num_chars": 24}, {"sum_logits": -10.776805877685547, "num_tokens": 11, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -30.50576400756836, "logits_per_token": -0.9797096252441406, "logits_per_char": -0.33677518367767334, "bits_per_byte": 0.48586388738659814, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 188, "native_id": "Mercury_185325", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.73201322555542, "logits_per_token_corr": -1.86600661277771, "logits_per_char_corr": -0.33927392959594727, "bits_per_byte_corr": 0.4894688157313361}, "model_output": [{"sum_logits": -8.15488338470459, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.071847915649414, "logits_per_token": -4.077441692352295, "logits_per_char": -0.815488338470459, "bits_per_byte": 1.176500981814957, "num_chars": 10}, {"sum_logits": -9.546143531799316, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -19.030513763427734, "logits_per_token": -1.9092287063598632, "logits_per_char": -0.734318733215332, "bits_per_byte": 1.0593979948423864, "num_chars": 13}, {"sum_logits": -3.73201322555542, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.26346206665039, "logits_per_token": -1.86600661277771, "logits_per_char": -0.33927392959594727, "bits_per_byte": 0.4894688157313361, "num_chars": 11}, {"sum_logits": -15.400678634643555, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.145355224609375, "logits_per_token": -7.700339317321777, "logits_per_char": -1.2833898862202961, "bits_per_byte": 1.8515402243784025, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 189, "native_id": "Mercury_SC_412374", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.439685821533203, "logits_per_token_corr": -4.359921455383301, "logits_per_char_corr": -0.9178782011333265, "bits_per_byte_corr": 1.324218328916084}, "model_output": [{"sum_logits": -11.405649185180664, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -24.011150360107422, "logits_per_token": -3.801883061726888, "logits_per_char": -0.7603766123453776, "bits_per_byte": 1.096991567839414, "num_chars": 15}, {"sum_logits": -13.301265716552734, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -27.330415725708008, "logits_per_token": -3.3253164291381836, "logits_per_char": -0.7824273950913373, "bits_per_byte": 1.128804122754753, "num_chars": 17}, {"sum_logits": -17.439685821533203, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -25.475772857666016, "logits_per_token": -4.359921455383301, "logits_per_char": -0.9178782011333265, "bits_per_byte": 1.324218328916084, "num_chars": 19}, {"sum_logits": -22.627416610717773, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -28.289344787597656, "logits_per_token": -5.656854152679443, "logits_per_char": -1.4142135381698608, "bits_per_byte": 2.040278858277159, "num_chars": 16}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 190, "native_id": "Mercury_SC_401818", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.790553092956543, "logits_per_token_corr": -4.8952765464782715, "logits_per_char_corr": -0.8158794244130453, "bits_per_byte_corr": 1.177065199564888}, "model_output": [{"sum_logits": -5.715550899505615, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -17.995222091674805, "logits_per_token": -2.8577754497528076, "logits_per_char": -0.5715550899505615, "bits_per_byte": 0.8245796938671128, "num_chars": 10}, {"sum_logits": -3.9649407863616943, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -18.732772827148438, "logits_per_token": -1.9824703931808472, "logits_per_char": -0.36044916239651764, "bits_per_byte": 0.5200182190824102, "num_chars": 11}, {"sum_logits": -9.790553092956543, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -19.88128089904785, "logits_per_token": -4.8952765464782715, "logits_per_char": -0.8158794244130453, "bits_per_byte": 1.177065199564888, "num_chars": 12}, {"sum_logits": -13.517080307006836, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -17.565086364746094, "logits_per_token": -4.505693435668945, "logits_per_char": -0.965505736214774, "bits_per_byte": 1.3929303375879027, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 191, "native_id": "Mercury_SC_413549", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.8221435546875, "logits_per_token_corr": -2.8036905924479165, "logits_per_char_corr": -0.7313975458559783, "bits_per_byte_corr": 1.055183612325536}, "model_output": [{"sum_logits": -14.833600997924805, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -25.09038543701172, "logits_per_token": -2.4722668329874673, "logits_per_char": -0.7416800498962403, "bits_per_byte": 1.0700181299123535, "num_chars": 20}, {"sum_logits": -16.8221435546875, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -27.47616958618164, "logits_per_token": -2.8036905924479165, "logits_per_char": -0.7313975458559783, "bits_per_byte": 1.055183612325536, "num_chars": 23}, {"sum_logits": -15.222232818603516, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -26.983867645263672, "logits_per_token": -2.537038803100586, "logits_per_char": -0.6919196735728871, "bits_per_byte": 0.9982290817578318, "num_chars": 22}, {"sum_logits": -20.607887268066406, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -26.907073974609375, "logits_per_token": -3.434647878011068, "logits_per_char": -1.0303943634033204, "bits_per_byte": 1.4865448382429787, "num_chars": 20}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 192, "native_id": "Mercury_7093958", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.718682289123535, "logits_per_token_corr": -2.5728940963745117, "logits_per_char_corr": -0.4540401346543256, "bits_per_byte_corr": 0.6550414506308233}, "model_output": [{"sum_logits": -7.718682289123535, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -23.52758026123047, "logits_per_token": -2.5728940963745117, "logits_per_char": -0.4540401346543256, "bits_per_byte": 0.6550414506308233, "num_chars": 17}, {"sum_logits": -9.163325309753418, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.943069458007812, "logits_per_token": -3.054441769917806, "logits_per_char": -0.43634882427397226, "bits_per_byte": 0.6295182848782418, "num_chars": 21}, {"sum_logits": -14.930553436279297, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.969402313232422, "logits_per_token": -3.732638359069824, "logits_per_char": -0.7465276718139648, "bits_per_byte": 1.0770117700131645, "num_chars": 20}, {"sum_logits": -11.170622825622559, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.543964385986328, "logits_per_token": -2.7926557064056396, "logits_per_char": -0.5319344202677408, "bits_per_byte": 0.7674191501989668, "num_chars": 21}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 193, "native_id": "Mercury_7102323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.4972267150878906, "logits_per_token_corr": -0.8324089050292969, "logits_per_char_corr": -0.14689568912281709, "bits_per_byte_corr": 0.21192568222560731}, "model_output": [{"sum_logits": -12.085939407348633, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -19.769004821777344, "logits_per_token": -4.028646469116211, "logits_per_char": -0.863281386239188, "bits_per_byte": 1.2454517748199212, "num_chars": 14}, {"sum_logits": -7.2550048828125, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -17.86338233947754, "logits_per_token": -2.4183349609375, "logits_per_char": -0.45343780517578125, "bits_per_byte": 0.6541724728791455, "num_chars": 16}, {"sum_logits": -5.77995491027832, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -20.784330368041992, "logits_per_token": -1.9266516367594402, "logits_per_char": -0.3042081531725432, "bits_per_byte": 0.43887959398033355, "num_chars": 19}, {"sum_logits": -2.4972267150878906, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -20.805538177490234, "logits_per_token": -0.8324089050292969, "logits_per_char": -0.14689568912281709, "bits_per_byte": 0.21192568222560731, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 194, "native_id": "Mercury_7222793", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.114704132080078, "logits_per_token_corr": -2.5683004591200085, "logits_per_char_corr": -0.48155633608500165, "bits_per_byte_corr": 0.69473893797899}, "model_output": [{"sum_logits": -25.551292419433594, "num_tokens": 7, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -44.913482666015625, "logits_per_token": -3.6501846313476562, "logits_per_char": -0.6905754707955025, "bits_per_byte": 0.9962898070769485, "num_chars": 37}, {"sum_logits": -23.114704132080078, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -46.823814392089844, "logits_per_token": -2.5683004591200085, "logits_per_char": -0.48155633608500165, "bits_per_byte": 0.69473893797899, "num_chars": 48}, {"sum_logits": -23.423900604248047, "num_tokens": 7, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.80818176269531, "logits_per_token": -3.346271514892578, "logits_per_char": -0.459292168710746, "bits_per_byte": 0.6626185341186065, "num_chars": 51}, {"sum_logits": -23.891040802001953, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -39.65195083618164, "logits_per_token": -2.654560089111328, "logits_per_char": -0.497730016708374, "bits_per_byte": 0.718072626807268, "num_chars": 48}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 195, "native_id": "Mercury_SC_400701", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.24836540222168, "logits_per_token_corr": -2.464052200317383, "logits_per_char_corr": -0.42069183907857755, "bits_per_byte_corr": 0.6069300299815577}, "model_output": [{"sum_logits": -17.24836540222168, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -36.22628402709961, "logits_per_token": -2.464052200317383, "logits_per_char": -0.42069183907857755, "bits_per_byte": 0.6069300299815577, "num_chars": 41}, {"sum_logits": -24.441837310791016, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -40.85960388183594, "logits_per_token": -3.055229663848877, "logits_per_char": -0.5961423734339272, "bits_per_byte": 0.8600516458175211, "num_chars": 41}, {"sum_logits": -26.312313079833984, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -38.09397506713867, "logits_per_token": -3.7589018685477122, "logits_per_char": -0.6417637336544875, "bits_per_byte": 0.9258693559663798, "num_chars": 41}, {"sum_logits": -16.338420867919922, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -37.71678924560547, "logits_per_token": -2.0423026084899902, "logits_per_char": -0.3984980699492664, "bits_per_byte": 0.5749111893200429, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 196, "native_id": "Mercury_409301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.439908981323242, "logits_per_token_corr": -2.2711009979248047, "logits_per_char_corr": -0.5378923416137695, "bits_per_byte_corr": 0.776014613778895}, "model_output": [{"sum_logits": -19.940105438232422, "num_tokens": 9, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -40.77137756347656, "logits_per_token": -2.2155672709147134, "logits_per_char": -0.5112847548264724, "bits_per_byte": 0.7376279802708111, "num_chars": 39}, {"sum_logits": -20.439908981323242, "num_tokens": 9, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -39.60894775390625, "logits_per_token": -2.2711009979248047, "logits_per_char": -0.5378923416137695, "bits_per_byte": 0.776014613778895, "num_chars": 38}, {"sum_logits": -24.68466567993164, "num_tokens": 7, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -35.99470520019531, "logits_per_token": -3.526380811418806, "logits_per_char": -0.7052761622837611, "bits_per_byte": 1.017498421784713, "num_chars": 35}, {"sum_logits": -24.566020965576172, "num_tokens": 7, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -34.20378494262695, "logits_per_token": -3.5094315665108815, "logits_per_char": -0.7018863133021763, "bits_per_byte": 1.0126079034696145, "num_chars": 35}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 197, "native_id": "Mercury_SC_400383", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.166830062866211, "logits_per_token_corr": -3.5417075157165527, "logits_per_char_corr": -0.5059582165309361, "bits_per_byte_corr": 0.7299434098867302}, "model_output": [{"sum_logits": -32.89295196533203, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -45.352725982666016, "logits_per_token": -4.111618995666504, "logits_per_char": -0.865603999087685, "bits_per_byte": 1.2488025968583552, "num_chars": 38}, {"sum_logits": -19.819721221923828, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -32.06256103515625, "logits_per_token": -2.8313887459891185, "logits_per_char": -0.5662777491978237, "bits_per_byte": 0.8169661005340514, "num_chars": 35}, {"sum_logits": -14.166830062866211, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -28.28828239440918, "logits_per_token": -3.5417075157165527, "logits_per_char": -0.5059582165309361, "bits_per_byte": 0.7299434098867302, "num_chars": 28}, {"sum_logits": -22.648788452148438, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -30.421878814697266, "logits_per_token": -7.5495961507161455, "logits_per_char": -1.0294903841885654, "bits_per_byte": 1.4852406719127842, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 198, "native_id": "CSZ_2005_5_CSZ10021", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.978029251098633, "logits_per_token_corr": -2.2481691042582193, "logits_per_char_corr": -0.48175052234104704, "bits_per_byte_corr": 0.6950190895275956}, "model_output": [{"sum_logits": -16.39080810546875, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -30.80097007751465, "logits_per_token": -1.8212009006076388, "logits_per_char": -0.38118158384811046, "bits_per_byte": 0.5499287806962646, "num_chars": 43}, {"sum_logits": -26.978029251098633, "num_tokens": 12, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -45.24592590332031, "logits_per_token": -2.2481691042582193, "logits_per_char": -0.48175052234104704, "bits_per_byte": 0.6950190895275956, "num_chars": 56}, {"sum_logits": -30.862960815429688, "num_tokens": 12, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -45.69171905517578, "logits_per_token": -2.571913401285807, "logits_per_char": -0.505950177302126, "bits_per_byte": 0.7299318117311934, "num_chars": 61}, {"sum_logits": -22.086400985717773, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -40.068702697753906, "logits_per_token": -2.4540445539686413, "logits_per_char": -0.501963658766313, "bits_per_byte": 0.7241804812091599, "num_chars": 44}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 199, "native_id": "Mercury_SC_407070", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.88797950744629, "logits_per_token_corr": -2.388797950744629, "logits_per_char_corr": -0.6635549863179525, "bits_per_byte_corr": 0.9573074881187418}, "model_output": [{"sum_logits": -16.458606719970703, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -29.450639724731445, "logits_per_token": -2.743101119995117, "logits_per_char": -0.5486202239990234, "bits_per_byte": 0.791491676495352, "num_chars": 30}, {"sum_logits": -23.88797950744629, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -48.26023864746094, "logits_per_token": -2.388797950744629, "logits_per_char": -0.6635549863179525, "bits_per_byte": 0.9573074881187418, "num_chars": 36}, {"sum_logits": -19.346416473388672, "num_tokens": 9, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.653348922729492, "logits_per_token": -2.149601830376519, "logits_per_char": -0.5091162229839125, "bits_per_byte": 0.7344994501355379, "num_chars": 38}, {"sum_logits": -41.03010940551758, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -45.50782775878906, "logits_per_token": -5.128763675689697, "logits_per_char": -0.9325024864890359, "bits_per_byte": 1.3453167128753263, "num_chars": 44}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 200, "native_id": "Mercury_SC_400708", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.65278434753418, "logits_per_token_corr": -2.62808714972602, "logits_per_char_corr": -0.4927663405736287, "bits_per_byte_corr": 0.7109115558630869}, "model_output": [{"sum_logits": -21.074459075927734, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -32.568634033203125, "logits_per_token": -3.010637010846819, "logits_per_char": -0.6798212605137979, "bits_per_byte": 0.9807747612348449, "num_chars": 31}, {"sum_logits": -28.35199546813965, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -39.57756042480469, "logits_per_token": -4.050285066877093, "logits_per_char": -0.885999858379364, "bits_per_byte": 1.2782276019131507, "num_chars": 32}, {"sum_logits": -23.65278434753418, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.60723114013672, "logits_per_token": -2.62808714972602, "logits_per_char": -0.4927663405736287, "bits_per_byte": 0.7109115558630869, "num_chars": 48}, {"sum_logits": -24.66592025756836, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.0711669921875, "logits_per_token": -3.083240032196045, "logits_per_char": -0.7047405787876674, "bits_per_byte": 1.016725738130916, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 201, "native_id": "Mercury_7075040", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.072006225585938, "logits_per_token_corr": -2.8453343709309897, "logits_per_char_corr": -0.3970234005950218, "bits_per_byte_corr": 0.5727836911557218}, "model_output": [{"sum_logits": -18.401336669921875, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -32.62037658691406, "logits_per_token": -3.680267333984375, "logits_per_char": -0.6133778889973959, "bits_per_byte": 0.8849172386481199, "num_chars": 30}, {"sum_logits": -22.909927368164062, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -36.306114196777344, "logits_per_token": -3.8183212280273438, "logits_per_char": -0.47729015350341797, "bits_per_byte": 0.6885841375250079, "num_chars": 48}, {"sum_logits": -17.072006225585938, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.397705078125, "logits_per_token": -2.8453343709309897, "logits_per_char": -0.3970234005950218, "bits_per_byte": 0.5727836911557218, "num_chars": 43}, {"sum_logits": -21.620216369628906, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -39.1962890625, "logits_per_token": -3.0886023385184154, "logits_per_char": -0.42392581116919426, "bits_per_byte": 0.6115956654790671, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 202, "native_id": "Mercury_7137165", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.6300101280212402, "logits_per_token_corr": -1.3150050640106201, "logits_per_char_corr": -0.29222334755791557, "bits_per_byte_corr": 0.42158917435407967}, "model_output": [{"sum_logits": -2.6300101280212402, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": true, "sum_logits_uncond": -14.222358703613281, "logits_per_token": -1.3150050640106201, "logits_per_char": -0.29222334755791557, "bits_per_byte": 0.42158917435407967, "num_chars": 9}, {"sum_logits": -3.8291354179382324, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -13.537635803222656, "logits_per_token": -1.9145677089691162, "logits_per_char": -0.38291354179382325, "bits_per_byte": 0.5524274678355745, "num_chars": 10}, {"sum_logits": -10.421004295349121, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.598295211791992, "logits_per_token": -5.2105021476745605, "logits_per_char": -0.8684170246124268, "bits_per_byte": 1.252860934832797, "num_chars": 12}, {"sum_logits": -6.685487270355225, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -13.713146209716797, "logits_per_token": -3.3427436351776123, "logits_per_char": -0.6077715700322931, "bits_per_byte": 0.8768290300795185, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 203, "native_id": "Mercury_SC_400046", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.519742965698242, "logits_per_token_corr": -3.3149678707122803, "logits_per_char_corr": -0.6314224515642438, "bits_per_byte_corr": 0.9109500395783408}, "model_output": [{"sum_logits": -19.47873878479004, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -28.858322143554688, "logits_per_token": -3.2464564641316733, "logits_per_char": -0.9275589897519066, "bits_per_byte": 1.338184754648014, "num_chars": 21}, {"sum_logits": -26.519742965698242, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -38.97453689575195, "logits_per_token": -3.3149678707122803, "logits_per_char": -0.6314224515642438, "bits_per_byte": 0.9109500395783408, "num_chars": 42}, {"sum_logits": -23.2066650390625, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -33.89345169067383, "logits_per_token": -3.3152378627232144, "logits_per_char": -0.6630475725446429, "bits_per_byte": 0.9565754447843087, "num_chars": 35}, {"sum_logits": -18.32575798034668, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -33.29452133178711, "logits_per_token": -1.6659779982133345, "logits_per_char": -0.38178662459055585, "bits_per_byte": 0.550801669974927, "num_chars": 48}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 204, "native_id": "Mercury_7099330", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.185531616210938, "logits_per_token_corr": -6.092765808105469, "logits_per_char_corr": -0.5802634102957589, "bits_per_byte_corr": 0.8371431444436107}, "model_output": [{"sum_logits": -10.967428207397461, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -17.585660934448242, "logits_per_token": -5.4837141036987305, "logits_per_char": -0.6451428357292625, "bits_per_byte": 0.9307443697723188, "num_chars": 17}, {"sum_logits": -15.999459266662598, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.12740707397461, "logits_per_token": -7.999729633331299, "logits_per_char": -0.7999729633331298, "bits_per_byte": 1.1541170270467842, "num_chars": 20}, {"sum_logits": -7.297170162200928, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -17.547393798828125, "logits_per_token": -3.648585081100464, "logits_per_char": -0.3648585081100464, "bits_per_byte": 0.5263795602768877, "num_chars": 20}, {"sum_logits": -12.185531616210938, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -16.429866790771484, "logits_per_token": -6.092765808105469, "logits_per_char": -0.5802634102957589, "bits_per_byte": 0.8371431444436107, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 205, "native_id": "MDSA_2007_5_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.147666931152344, "logits_per_token_corr": -1.536916732788086, "logits_per_char_corr": -0.5123055775960287, "bits_per_byte_corr": 0.7391007162180776}, "model_output": [{"sum_logits": -9.393122673034668, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -22.28094482421875, "logits_per_token": -3.131040891011556, "logits_per_char": -0.9393122673034668, "bits_per_byte": 1.3551411498858537, "num_chars": 10}, {"sum_logits": -3.326037883758545, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": true, "sum_logits_uncond": -18.56544303894043, "logits_per_token": -0.8315094709396362, "logits_per_char": -0.2771698236465454, "bits_per_byte": 0.3998715300592269, "num_chars": 12}, {"sum_logits": -6.147666931152344, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -24.16274642944336, "logits_per_token": -1.536916732788086, "logits_per_char": -0.5123055775960287, "bits_per_byte": 0.7391007162180776, "num_chars": 12}, {"sum_logits": -5.328301906585693, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -21.698043823242188, "logits_per_token": -1.3320754766464233, "logits_per_char": -0.4440251588821411, "bits_per_byte": 0.6405928947496593, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 206, "native_id": "Mercury_7271758", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.164976119995117, "logits_per_token_corr": -2.116497611999512, "logits_per_char_corr": -0.38481774763627485, "bits_per_byte_corr": 0.5551746561613132}, "model_output": [{"sum_logits": -20.076581954956055, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -41.413265228271484, "logits_per_token": -2.509572744369507, "logits_per_char": -0.42716131819055436, "bits_per_byte": 0.6162635154135481, "num_chars": 47}, {"sum_logits": -21.164976119995117, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -43.614234924316406, "logits_per_token": -2.116497611999512, "logits_per_char": -0.38481774763627485, "bits_per_byte": 0.5551746561613132, "num_chars": 55}, {"sum_logits": -20.804412841796875, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -40.317710876464844, "logits_per_token": -2.6005516052246094, "logits_per_char": -0.4079296635646446, "bits_per_byte": 0.5885181026566388, "num_chars": 51}, {"sum_logits": -20.5631103515625, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -37.88136291503906, "logits_per_token": -2.937587193080357, "logits_per_char": -0.4673434170809659, "bits_per_byte": 0.6742340302152964, "num_chars": 44}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 207, "native_id": "MCAS_2003_8_31", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.98637580871582, "logits_per_token_corr": -1.6986375808715821, "logits_per_char_corr": -0.2739738033663842, "bits_per_byte_corr": 0.39526064745045447}, "model_output": [{"sum_logits": -13.61648178100586, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -42.029937744140625, "logits_per_token": -1.361648178100586, "logits_per_char": -0.252157070018627, "bits_per_byte": 0.3637857544412257, "num_chars": 54}, {"sum_logits": -22.267847061157227, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -52.200645446777344, "logits_per_token": -2.2267847061157227, "logits_per_char": -0.42014805775768355, "bits_per_byte": 0.6061455193665753, "num_chars": 53}, {"sum_logits": -16.98637580871582, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -43.86644744873047, "logits_per_token": -1.6986375808715821, "logits_per_char": -0.2739738033663842, "bits_per_byte": 0.39526064745045447, "num_chars": 62}, {"sum_logits": -28.268550872802734, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -62.38211441040039, "logits_per_token": -2.8268550872802733, "logits_per_char": -0.5333688843925044, "bits_per_byte": 0.7694886444780977, "num_chars": 53}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 208, "native_id": "AKDE&ED_2008_8_53", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -38.1552619934082, "logits_per_token_corr": -3.8155261993408205, "logits_per_char_corr": -0.646699355820478, "bits_per_byte_corr": 0.9329899535889611}, "model_output": [{"sum_logits": -36.1418571472168, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -40.23255157470703, "logits_per_token": -3.6141857147216796, "logits_per_char": -0.6453903062002999, "bits_per_byte": 0.9311013941936512, "num_chars": 56}, {"sum_logits": -38.289154052734375, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -49.01182556152344, "logits_per_token": -3.8289154052734373, "logits_per_char": -0.6276910500448258, "bits_per_byte": 0.905566765110707, "num_chars": 61}, {"sum_logits": -38.1552619934082, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -46.565547943115234, "logits_per_token": -3.8155261993408205, "logits_per_char": -0.646699355820478, "bits_per_byte": 0.9329899535889611, "num_chars": 59}, {"sum_logits": -40.916282653808594, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -54.18467712402344, "logits_per_token": -4.0916282653808596, "logits_per_char": -0.6393169164657593, "bits_per_byte": 0.9223393449422372, "num_chars": 64}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 209, "native_id": "TIMSS_2007_8_pg109", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.554975509643555, "logits_per_token_corr": -3.8887438774108887, "logits_per_char_corr": -0.7407131195068359, "bits_per_byte_corr": 1.068623144234674}, "model_output": [{"sum_logits": -14.238085746765137, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.908926010131836, "logits_per_token": -2.8476171493530273, "logits_per_char": -0.5273365091394495, "bits_per_byte": 0.7607857666157279, "num_chars": 27}, {"sum_logits": -17.604354858398438, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.056705474853516, "logits_per_token": -4.401088714599609, "logits_per_char": -0.6520131429036459, "bits_per_byte": 0.9406561278621928, "num_chars": 27}, {"sum_logits": -15.554975509643555, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.06862449645996, "logits_per_token": -3.8887438774108887, "logits_per_char": -0.7407131195068359, "bits_per_byte": 1.068623144234674, "num_chars": 21}, {"sum_logits": -12.868340492248535, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.84186553955078, "logits_per_token": -3.217085123062134, "logits_per_char": -0.45958358900887625, "bits_per_byte": 0.6630389647375337, "num_chars": 28}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 210, "native_id": "Mercury_175385", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.841205596923828, "logits_per_token_corr": -2.6841205596923827, "logits_per_char_corr": -0.5368241119384766, "bits_per_byte_corr": 0.7744734841238183}, "model_output": [{"sum_logits": -11.401605606079102, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.34010887145996, "logits_per_token": -2.2803211212158203, "logits_per_char": -0.4072002002171108, "bits_per_byte": 0.5874657095026408, "num_chars": 28}, {"sum_logits": -22.011554718017578, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -35.660675048828125, "logits_per_token": -2.7514443397521973, "logits_per_char": -0.511896621349246, "bits_per_byte": 0.7385107170689031, "num_chars": 43}, {"sum_logits": -26.841205596923828, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.125404357910156, "logits_per_token": -2.6841205596923827, "logits_per_char": -0.5368241119384766, "bits_per_byte": 0.7744734841238183, "num_chars": 50}, {"sum_logits": -32.276588439941406, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -40.59487533569336, "logits_per_token": -4.034573554992676, "logits_per_char": -0.7016649660856827, "bits_per_byte": 1.0122885669380643, "num_chars": 46}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 211, "native_id": "Mercury_410669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.437233924865723, "logits_per_token_corr": -1.6796542406082153, "logits_per_char_corr": -1.1197694937388103, "bits_per_byte_corr": 1.6154858955568874}, "model_output": [{"sum_logits": -10.815298080444336, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -28.350181579589844, "logits_per_token": -1.351912260055542, "logits_per_char": -0.9832089164040305, "bits_per_byte": 1.4184706278549253, "num_chars": 11}, {"sum_logits": -11.603919982910156, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -27.580474853515625, "logits_per_token": -1.4504899978637695, "logits_per_char": -1.054901816628196, "bits_per_byte": 1.5219016194753505, "num_chars": 11}, {"sum_logits": -11.778193473815918, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -28.46094512939453, "logits_per_token": -1.4722741842269897, "logits_per_char": -1.0707448612559924, "bits_per_byte": 1.5447583013924713, "num_chars": 11}, {"sum_logits": -13.437233924865723, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -30.69891357421875, "logits_per_token": -1.6796542406082153, "logits_per_char": -1.1197694937388103, "bits_per_byte": 1.6154858955568874, "num_chars": 12}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 212, "native_id": "MEAP_2005_8_39", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.35097312927246, "logits_per_token_corr": -2.7251621882120767, "logits_per_char_corr": -0.6288835818950946, "bits_per_byte_corr": 0.9072872248971932}, "model_output": [{"sum_logits": -27.569368362426758, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -37.029502868652344, "logits_per_token": -3.938481194632394, "logits_per_char": -0.835435404922023, "bits_per_byte": 1.2052785156649315, "num_chars": 33}, {"sum_logits": -19.234895706176758, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -38.593955993652344, "logits_per_token": -3.2058159510294595, "logits_per_char": -0.663272265730233, "bits_per_byte": 0.9568996085288813, "num_chars": 29}, {"sum_logits": -16.35097312927246, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.12133026123047, "logits_per_token": -2.7251621882120767, "logits_per_char": -0.6288835818950946, "bits_per_byte": 0.9072872248971932, "num_chars": 26}, {"sum_logits": -12.43099594116211, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.601919174194336, "logits_per_token": -2.4861991882324217, "logits_per_char": -0.5404780843983525, "bits_per_byte": 0.77974505207123, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 213, "native_id": "Mercury_SC_408568", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.645559310913086, "logits_per_token_corr": -1.6075932184855144, "logits_per_char_corr": -0.2836929209092084, "bits_per_byte_corr": 0.4092823701313139}, "model_output": [{"sum_logits": -5.684550762176514, "num_tokens": 7, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -29.098819732666016, "logits_per_token": -0.8120786803109306, "logits_per_char": -0.18337260523150045, "bits_per_byte": 0.26455074820256536, "num_chars": 31}, {"sum_logits": -9.645559310913086, "num_tokens": 6, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -37.089744567871094, "logits_per_token": -1.6075932184855144, "logits_per_char": -0.2836929209092084, "bits_per_byte": 0.4092823701313139, "num_chars": 34}, {"sum_logits": -10.350045204162598, "num_tokens": 8, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -33.53080749511719, "logits_per_token": -1.2937556505203247, "logits_per_char": -0.2875012556711833, "bits_per_byte": 0.41477663580646407, "num_chars": 36}, {"sum_logits": -8.829988479614258, "num_tokens": 8, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -24.001312255859375, "logits_per_token": -1.1037485599517822, "logits_per_char": -0.23864833728687182, "bits_per_byte": 0.34429677272041403, "num_chars": 37}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 214, "native_id": "AKDE&ED_2008_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -42.467979431152344, "logits_per_token_corr": -3.0334271022251675, "logits_per_char_corr": -0.5242960423599055, "bits_per_byte_corr": 0.7563993002708889}, "model_output": [{"sum_logits": -50.153900146484375, "num_tokens": 14, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -59.935508728027344, "logits_per_token": -3.582421439034598, "logits_per_char": -0.7063929598096391, "bits_per_byte": 1.0191096200369754, "num_chars": 71}, {"sum_logits": -42.467979431152344, "num_tokens": 14, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -63.13752746582031, "logits_per_token": -3.0334271022251675, "logits_per_char": -0.5242960423599055, "bits_per_byte": 0.7563993002708889, "num_chars": 81}, {"sum_logits": -57.835304260253906, "num_tokens": 18, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -72.38482666015625, "logits_per_token": -3.2130724589029946, "logits_per_char": -0.6804153442382812, "bits_per_byte": 0.9816318428780304, "num_chars": 85}, {"sum_logits": -51.161956787109375, "num_tokens": 15, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -65.66029357910156, "logits_per_token": -3.410797119140625, "logits_per_char": -0.5748534470461728, "bits_per_byte": 0.8293382172920357, "num_chars": 89}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 215, "native_id": "Mercury_7082845", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.7433857917785645, "logits_per_token_corr": -1.148677158355713, "logits_per_char_corr": -0.22973543167114258, "bits_per_byte_corr": 0.3314381679886808}, "model_output": [{"sum_logits": -16.000473022460938, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.704708099365234, "logits_per_token": -2.2857818603515625, "logits_per_char": -0.48486281886245264, "bits_per_byte": 0.6995091842848067, "num_chars": 33}, {"sum_logits": -5.7433857917785645, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -31.2071475982666, "logits_per_token": -1.148677158355713, "logits_per_char": -0.22973543167114258, "bits_per_byte": 0.3314381679886808, "num_chars": 25}, {"sum_logits": -17.27512550354004, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.198772430419922, "logits_per_token": -3.4550251007080077, "logits_per_char": -0.5234886516224254, "bits_per_byte": 0.755234481657866, "num_chars": 33}, {"sum_logits": -17.8782901763916, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -38.40321350097656, "logits_per_token": -2.5540414537702287, "logits_per_char": -0.5417663689815637, "bits_per_byte": 0.7816036538506839, "num_chars": 33}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 216, "native_id": "Mercury_SC_405726", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.937349319458008, "logits_per_token_corr": -3.8228915532430015, "logits_per_char_corr": -0.6036144557752108, "bits_per_byte_corr": 0.8708315819564127}, "model_output": [{"sum_logits": -17.46148681640625, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -23.254390716552734, "logits_per_token": -4.3653717041015625, "logits_per_char": -0.582049560546875, "bits_per_byte": 0.8397200145531802, "num_chars": 30}, {"sum_logits": -29.79384994506836, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -36.0044059753418, "logits_per_token": -5.958769989013672, "logits_per_char": -0.9310578107833862, "bits_per_byte": 1.3432324863990912, "num_chars": 32}, {"sum_logits": -18.15927505493164, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.205322265625, "logits_per_token": -3.026545842488607, "logits_per_char": -0.47787565934030635, "bits_per_byte": 0.689428843892299, "num_chars": 38}, {"sum_logits": -22.937349319458008, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.498046875, "logits_per_token": -3.8228915532430015, "logits_per_char": -0.6036144557752108, "bits_per_byte": 0.8708315819564127, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 217, "native_id": "Mercury_SC_415407", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -41.610572814941406, "logits_per_token_corr": -4.1610572814941404, "logits_per_char_corr": -0.9246793958875869, "bits_per_byte_corr": 1.3340303788601826}, "model_output": [{"sum_logits": -41.610572814941406, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -51.571834564208984, "logits_per_token": -4.1610572814941404, "logits_per_char": -0.9246793958875869, "bits_per_byte": 1.3340303788601826, "num_chars": 45}, {"sum_logits": -38.70101547241211, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -54.298927307128906, "logits_per_token": -3.5182741338556465, "logits_per_char": -0.9000236156374909, "bits_per_byte": 1.2984596069640955, "num_chars": 43}, {"sum_logits": -39.56885528564453, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -52.9211311340332, "logits_per_token": -3.597168662331321, "logits_per_char": -0.9202059368754543, "bits_per_byte": 1.327576541727754, "num_chars": 43}, {"sum_logits": -40.895572662353516, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -50.22951126098633, "logits_per_token": -4.089557266235351, "logits_per_char": -0.9087905036078558, "bits_per_byte": 1.3111075527629792, "num_chars": 45}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 218, "native_id": "Mercury_SC_401792", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.9715018272399902, "logits_per_token_corr": -3.9715018272399902, "logits_per_char_corr": -0.661916971206665, "bits_per_byte_corr": 0.9549443318407845}, "model_output": [{"sum_logits": -2.7677512168884277, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -12.446762084960938, "logits_per_token": -2.7677512168884277, "logits_per_char": -0.6919378042221069, "bits_per_byte": 0.9982552387555493, "num_chars": 4}, {"sum_logits": -2.3029189109802246, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -2.3029189109802246, "logits_per_char": -0.4605837821960449, "bits_per_byte": 0.6644819384885938, "num_chars": 5}, {"sum_logits": -3.9715018272399902, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -13.36142349243164, "logits_per_token": -3.9715018272399902, "logits_per_char": -0.661916971206665, "bits_per_byte": 0.9549443318407845, "num_chars": 6}, {"sum_logits": -7.964725017547607, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -13.445572853088379, "logits_per_token": -7.964725017547607, "logits_per_char": -0.8849694463941786, "bits_per_byte": 1.27674103165205, "num_chars": 9}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 219, "native_id": "LEAP_2000_8_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.416278839111328, "logits_per_token_corr": -2.630896977015904, "logits_per_char_corr": -0.4846389168187192, "bits_per_byte_corr": 0.6991861619166673}, "model_output": [{"sum_logits": -15.681896209716797, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -40.62063980102539, "logits_per_token": -2.613649368286133, "logits_per_char": -0.5058676196682838, "bits_per_byte": 0.7298127062422616, "num_chars": 31}, {"sum_logits": -18.416278839111328, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -38.50529479980469, "logits_per_token": -2.630896977015904, "logits_per_char": -0.4846389168187192, "bits_per_byte": 0.6991861619166673, "num_chars": 38}, {"sum_logits": -12.307474136352539, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -27.54348373413086, "logits_per_token": -1.5384342670440674, "logits_per_char": -0.3326344361176362, "bits_per_byte": 0.4798900514161552, "num_chars": 37}, {"sum_logits": -11.557537078857422, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -31.040863037109375, "logits_per_token": -1.4446921348571777, "logits_per_char": -0.31236586699614655, "bits_per_byte": 0.45064868725864593, "num_chars": 37}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 220, "native_id": "Mercury_SC_413439", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.485506057739258, "logits_per_token_corr": -3.3106882572174072, "logits_per_char_corr": -0.7158244880470069, "bits_per_byte_corr": 1.03271643905304}, "model_output": [{"sum_logits": -26.485506057739258, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -40.381065368652344, "logits_per_token": -3.3106882572174072, "logits_per_char": -0.7158244880470069, "bits_per_byte": 1.03271643905304, "num_chars": 37}, {"sum_logits": -29.19595718383789, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.036705017089844, "logits_per_token": -4.1708510262625555, "logits_per_char": -0.6346947213877803, "bits_per_byte": 0.9156709270252107, "num_chars": 46}, {"sum_logits": -27.471956253051758, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -37.99738693237305, "logits_per_token": -3.4339945316314697, "logits_per_char": -0.6867989063262939, "bits_per_byte": 0.9908413762456199, "num_chars": 40}, {"sum_logits": -25.97045135498047, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -35.35148620605469, "logits_per_token": -3.2463064193725586, "logits_per_char": -0.6834329303942228, "bits_per_byte": 0.9859852994606657, "num_chars": 38}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 221, "native_id": "ACTAAP_2014_7_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.023908615112305, "logits_per_token_corr": -1.4567189650102095, "logits_per_char_corr": -0.3560868581136068, "bits_per_byte_corr": 0.5137247443266015}, "model_output": [{"sum_logits": -19.564655303955078, "num_tokens": 11, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -30.031444549560547, "logits_per_token": -1.7786050276322798, "logits_per_char": -0.4347701178656684, "bits_per_byte": 0.6272406929719605, "num_chars": 45}, {"sum_logits": -16.023908615112305, "num_tokens": 11, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.559051513671875, "logits_per_token": -1.4567189650102095, "logits_per_char": -0.3560868581136068, "bits_per_byte": 0.5137247443266015, "num_chars": 45}, {"sum_logits": -22.668027877807617, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -32.77216720581055, "logits_per_token": -2.2668027877807617, "logits_per_char": -0.48229846548526845, "bits_per_byte": 0.6958096043844536, "num_chars": 47}, {"sum_logits": -19.269800186157227, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -28.262569427490234, "logits_per_token": -1.9269800186157227, "logits_per_char": -0.4099957486416431, "bits_per_byte": 0.5914988333512814, "num_chars": 47}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 222, "native_id": "Mercury_SC_402638", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.640377044677734, "logits_per_token_corr": -3.5467923482259116, "logits_per_char_corr": -0.8184905418982873, "bits_per_byte_corr": 1.1808322458120277}, "model_output": [{"sum_logits": -10.640377044677734, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.46862030029297, "logits_per_token": -3.5467923482259116, "logits_per_char": -0.8184905418982873, "bits_per_byte": 1.1808322458120277, "num_chars": 13}, {"sum_logits": -9.533313751220703, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.186378479003906, "logits_per_token": -4.766656875610352, "logits_per_char": -0.7333318270169772, "bits_per_byte": 1.0579741901641961, "num_chars": 13}, {"sum_logits": -6.6612653732299805, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -19.51083755493164, "logits_per_token": -1.11021089553833, "logits_per_char": -0.39183913960176353, "bits_per_byte": 0.5653043835300686, "num_chars": 17}, {"sum_logits": -15.673523902893066, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.226272583007812, "logits_per_token": -3.9183809757232666, "logits_per_char": -0.7836761951446534, "bits_per_byte": 1.1306057603987352, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 223, "native_id": "Mercury_SC_406725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.838130950927734, "logits_per_token_corr": -2.139688491821289, "logits_per_char_corr": -0.4937742673433744, "bits_per_byte_corr": 0.7123656868153793}, "model_output": [{"sum_logits": -12.838130950927734, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -31.176082611083984, "logits_per_token": -2.139688491821289, "logits_per_char": -0.4937742673433744, "bits_per_byte": 0.7123656868153793, "num_chars": 26}, {"sum_logits": -13.965171813964844, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -30.79562759399414, "logits_per_token": -1.9950245448521204, "logits_per_char": -0.3774370760531039, "bits_per_byte": 0.5445265978698347, "num_chars": 37}, {"sum_logits": -13.290914535522461, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -26.929763793945312, "logits_per_token": -1.8987020765032088, "logits_per_char": -0.40275498592492304, "bits_per_byte": 0.5810526208876081, "num_chars": 33}, {"sum_logits": -25.607219696044922, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -43.66949462890625, "logits_per_token": -3.2009024620056152, "logits_per_char": -0.5955167371173238, "bits_per_byte": 0.8591490434061567, "num_chars": 43}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 224, "native_id": "NYSEDREGENTS_2015_4_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.2580997943878174, "logits_per_token_corr": -2.2580997943878174, "logits_per_char_corr": -0.2508999771542019, "bits_per_byte_corr": 0.3619721527997814}, "model_output": [{"sum_logits": -2.2580997943878174, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -2.2580997943878174, "logits_per_char": -0.2508999771542019, "bits_per_byte": 0.3619721527997814, "num_chars": 9}, {"sum_logits": -7.150601387023926, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -7.150601387023926, "logits_per_char": -1.191766897837321, "bits_per_byte": 1.7193561934067623, "num_chars": 6}, {"sum_logits": -6.964765548706055, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -17.23819351196289, "logits_per_token": -3.4823827743530273, "logits_per_char": -0.5357511960543119, "bits_per_byte": 0.7729255936984418, "num_chars": 13}, {"sum_logits": -8.303295135498047, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.52342987060547, "logits_per_token": -2.7677650451660156, "logits_per_char": -0.6919412612915039, "bits_per_byte": 0.9982602262524243, "num_chars": 12}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 225, "native_id": "Mercury_406136", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.6781063079834, "logits_per_token_corr": -2.7796843846639, "logits_per_char_corr": -0.45075962994549723, "bits_per_byte_corr": 0.6503086827557804}, "model_output": [{"sum_logits": -16.6781063079834, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -29.760393142700195, "logits_per_token": -2.7796843846639, "logits_per_char": -0.45075962994549723, "bits_per_byte": 0.6503086827557804, "num_chars": 37}, {"sum_logits": -25.548309326171875, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -38.14698028564453, "logits_per_token": -4.2580515543619795, "logits_per_char": -0.672323929636102, "bits_per_byte": 0.9699583991576816, "num_chars": 38}, {"sum_logits": -27.879436492919922, "num_tokens": 7, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -36.08968734741211, "logits_per_token": -3.982776641845703, "logits_per_char": -0.7534982835924303, "bits_per_byte": 1.087068237057926, "num_chars": 37}, {"sum_logits": -21.40180206298828, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -32.69432067871094, "logits_per_token": -3.566967010498047, "logits_per_char": -0.6294647665584788, "bits_per_byte": 0.9081256971288989, "num_chars": 34}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 226, "native_id": "MSA_2012_5_23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.144218444824219, "logits_per_token_corr": -1.3144218444824218, "logits_per_char_corr": -0.31295758201962426, "bits_per_byte_corr": 0.45150235158863733}, "model_output": [{"sum_logits": -8.730415344238281, "num_tokens": 4, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -19.685440063476562, "logits_per_token": -2.1826038360595703, "logits_per_char": -0.4365207672119141, "bits_per_byte": 0.6297663461021265, "num_chars": 20}, {"sum_logits": -10.215827941894531, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -24.064655303955078, "logits_per_token": -1.7026379903157551, "logits_per_char": -0.444166432256284, "bits_per_byte": 0.640796709145945, "num_chars": 23}, {"sum_logits": -13.144218444824219, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -33.208251953125, "logits_per_token": -1.3144218444824218, "logits_per_char": -0.31295758201962426, "bits_per_byte": 0.45150235158863733, "num_chars": 42}, {"sum_logits": -14.86780071258545, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -34.42810821533203, "logits_per_token": -1.4867800712585448, "logits_per_char": -0.3539952550615583, "bits_per_byte": 0.5107071989759008, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 227, "native_id": "Mercury_405873", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.194290161132812, "logits_per_token_corr": -2.119429016113281, "logits_per_char_corr": -0.3592252569683528, "bits_per_byte_corr": 0.5182524967906784}, "model_output": [{"sum_logits": -21.246612548828125, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -32.36069869995117, "logits_per_token": -2.1246612548828123, "logits_per_char": -0.39345578794126157, "bits_per_byte": 0.5676367140723255, "num_chars": 54}, {"sum_logits": -21.194290161132812, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.10969924926758, "logits_per_token": -2.119429016113281, "logits_per_char": -0.3592252569683528, "bits_per_byte": 0.5182524967906784, "num_chars": 59}, {"sum_logits": -19.573036193847656, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.728740692138672, "logits_per_token": -2.1747817993164062, "logits_per_char": -0.48932590484619143, "bits_per_byte": 0.7059480563006123, "num_chars": 40}, {"sum_logits": -28.448192596435547, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -38.09898376464844, "logits_per_token": -2.844819259643555, "logits_per_char": -0.45884181607154106, "bits_per_byte": 0.6619688125993738, "num_chars": 62}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 228, "native_id": "Mercury_7043820", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.220716953277588, "logits_per_token_corr": -1.305179238319397, "logits_per_char_corr": -0.40159361179058367, "bits_per_byte_corr": 0.5793771121833788}, "model_output": [{"sum_logits": -3.6551380157470703, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.270633697509766, "logits_per_token": -0.9137845039367676, "logits_per_char": -0.3045948346455892, "bits_per_byte": 0.43943745742390106, "num_chars": 12}, {"sum_logits": -5.220716953277588, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.1387882232666, "logits_per_token": -1.305179238319397, "logits_per_char": -0.40159361179058367, "bits_per_byte": 0.5793771121833788, "num_chars": 13}, {"sum_logits": -12.042059898376465, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.66040802001953, "logits_per_token": -6.021029949188232, "logits_per_char": -1.003504991531372, "bits_per_byte": 1.4477516747906718, "num_chars": 12}, {"sum_logits": -8.421070098876953, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -19.763370513916016, "logits_per_token": -2.807023366292318, "logits_per_char": -0.6015050070626395, "bits_per_byte": 0.8677882907597744, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 229, "native_id": "MCAS_2005_5_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.099802017211914, "logits_per_token_corr": -3.219960403442383, "logits_per_char_corr": -0.487872788400361, "bits_per_byte_corr": 0.7038516524103771}, "model_output": [{"sum_logits": -23.89885711669922, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -29.613235473632812, "logits_per_token": -4.779771423339843, "logits_per_char": -0.9191868121807392, "bits_per_byte": 1.3261062555846401, "num_chars": 26}, {"sum_logits": -16.099802017211914, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -28.6278076171875, "logits_per_token": -3.219960403442383, "logits_per_char": -0.487872788400361, "bits_per_byte": 0.7038516524103771, "num_chars": 33}, {"sum_logits": -26.622478485107422, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -34.34490966796875, "logits_per_token": -2.9580531650119357, "logits_per_char": -0.6191274066304051, "bits_per_byte": 0.8932120392247719, "num_chars": 43}, {"sum_logits": -42.30121612548828, "num_tokens": 10, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -45.53486251831055, "logits_per_token": -4.230121612548828, "logits_per_char": -0.8812753359476725, "bits_per_byte": 1.2714115568303757, "num_chars": 48}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 230, "native_id": "Mercury_7182245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -31.299041748046875, "logits_per_token_corr": -2.407618596003606, "logits_per_char_corr": -0.504823254000756, "bits_per_byte_corr": 0.7283060050728435}, "model_output": [{"sum_logits": -30.45450782775879, "num_tokens": 12, "num_tokens_all": 255, "is_greedy": false, "sum_logits_uncond": -48.14201354980469, "logits_per_token": -2.5378756523132324, "logits_per_char": -0.5161780987755727, "bits_per_byte": 0.7446875833195473, "num_chars": 59}, {"sum_logits": -27.124584197998047, "num_tokens": 11, "num_tokens_all": 254, "is_greedy": false, "sum_logits_uncond": -33.477752685546875, "logits_per_token": -2.465871290727095, "logits_per_char": -0.475869898210492, "bits_per_byte": 0.6865351422571059, "num_chars": 57}, {"sum_logits": -31.876323699951172, "num_tokens": 13, "num_tokens_all": 256, "is_greedy": false, "sum_logits_uncond": -48.56732940673828, "logits_per_token": -2.452024899996244, "logits_per_char": -0.5402766728805284, "bits_per_byte": 0.779454476673287, "num_chars": 59}, {"sum_logits": -31.299041748046875, "num_tokens": 13, "num_tokens_all": 256, "is_greedy": false, "sum_logits_uncond": -51.934410095214844, "logits_per_token": -2.407618596003606, "logits_per_char": -0.504823254000756, "bits_per_byte": 0.7283060050728435, "num_chars": 62}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 231, "native_id": "MSA_2012_8_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.083824157714844, "logits_per_token_corr": -5.541912078857422, "logits_per_char_corr": -0.8526018582857572, "bits_per_byte_corr": 1.2300444728024604}, "model_output": [{"sum_logits": -11.083824157714844, "num_tokens": 2, "num_tokens_all": 333, "is_greedy": false, "sum_logits_uncond": -22.84111976623535, "logits_per_token": -5.541912078857422, "logits_per_char": -0.8526018582857572, "bits_per_byte": 1.2300444728024604, "num_chars": 13}, {"sum_logits": -8.207656860351562, "num_tokens": 2, "num_tokens_all": 333, "is_greedy": false, "sum_logits_uncond": -17.77920913696289, "logits_per_token": -4.103828430175781, "logits_per_char": -0.5862612043108258, "bits_per_byte": 0.8457961321254275, "num_chars": 14}, {"sum_logits": -12.413793563842773, "num_tokens": 2, "num_tokens_all": 333, "is_greedy": false, "sum_logits_uncond": -15.94287395477295, "logits_per_token": -6.206896781921387, "logits_per_char": -0.8275862375895182, "bits_per_byte": 1.193954560879211, "num_chars": 15}, {"sum_logits": -7.789217472076416, "num_tokens": 2, "num_tokens_all": 333, "is_greedy": false, "sum_logits_uncond": -18.80341339111328, "logits_per_token": -3.894608736038208, "logits_per_char": -0.43273430400424534, "bits_per_byte": 0.6243036344099103, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 232, "native_id": "Mercury_7252753", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.323298454284668, "logits_per_token_corr": -2.661649227142334, "logits_per_char_corr": -0.5323298454284668, "bits_per_byte_corr": 0.7679896281173892}, "model_output": [{"sum_logits": -5.323298454284668, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -17.014938354492188, "logits_per_token": -2.661649227142334, "logits_per_char": -0.5323298454284668, "bits_per_byte": 0.7679896281173892, "num_chars": 10}, {"sum_logits": -4.4635090827941895, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -4.4635090827941895, "logits_per_char": -0.6376441546848842, "bits_per_byte": 0.9199260598163784, "num_chars": 7}, {"sum_logits": -4.2385125160217285, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -11.467022895812988, "logits_per_token": -4.2385125160217285, "logits_per_char": -0.7064187526702881, "bits_per_byte": 1.019146831269124, "num_chars": 6}, {"sum_logits": -5.485467433929443, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -12.272923469543457, "logits_per_token": -5.485467433929443, "logits_per_char": -1.0970934867858886, "bits_per_byte": 1.58277133277872, "num_chars": 5}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 233, "native_id": "TAKS_2009_8_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.9997129440307617, "logits_per_token_corr": -2.9997129440307617, "logits_per_char_corr": -1.4998564720153809, "bits_per_byte_corr": 2.163835494223361}, "model_output": [{"sum_logits": -2.4264841079711914, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": true, "sum_logits_uncond": -6.3205461502075195, "logits_per_token": -2.4264841079711914, "logits_per_char": -1.2132420539855957, "bits_per_byte": 1.7503382946842165, "num_chars": 2}, {"sum_logits": -2.9997129440307617, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -6.575865745544434, "logits_per_token": -2.9997129440307617, "logits_per_char": -1.4998564720153809, "bits_per_byte": 2.163835494223361, "num_chars": 2}, {"sum_logits": -3.3446779251098633, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -3.3446779251098633, "logits_per_char": -1.6723389625549316, "bits_per_byte": 2.412675127965127, "num_chars": 2}, {"sum_logits": -3.50003719329834, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -6.782100677490234, "logits_per_token": -3.50003719329834, "logits_per_char": -1.75001859664917, "bits_per_byte": 2.5247431508510343, "num_chars": 2}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 234, "native_id": "Mercury_SC_415473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.0364584922790527, "logits_per_token_corr": -2.0364584922790527, "logits_per_char_corr": -1.0182292461395264, "bits_per_byte_corr": 1.4689942838946577}, "model_output": [{"sum_logits": -3.2648119926452637, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -6.3205461502075195, "logits_per_token": -3.2648119926452637, "logits_per_char": -1.6324059963226318, "bits_per_byte": 2.3550640356137604, "num_chars": 2}, {"sum_logits": -3.008338451385498, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -3.008338451385498, "logits_per_char": -1.504169225692749, "bits_per_byte": 2.17005748256628, "num_chars": 2}, {"sum_logits": -2.0364584922790527, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": true, "sum_logits_uncond": -7.313617706298828, "logits_per_token": -2.0364584922790527, "logits_per_char": -1.0182292461395264, "bits_per_byte": 1.4689942838946577, "num_chars": 2}, {"sum_logits": -3.15250825881958, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -8.77786922454834, "logits_per_token": -3.15250825881958, "logits_per_char": -1.0508360862731934, "bits_per_byte": 1.5160360104545922, "num_chars": 3}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 235, "native_id": "Mercury_SC_413624", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.77670669555664, "logits_per_token_corr": -2.7529674106174045, "logits_per_char_corr": -0.5899215879894438, "bits_per_byte_corr": 0.8510769495063243}, "model_output": [{"sum_logits": -26.45113754272461, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -44.154170989990234, "logits_per_token": -4.408522923787435, "logits_per_char": -0.8015496225068064, "bits_per_byte": 1.156391665417821, "num_chars": 33}, {"sum_logits": -24.77670669555664, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -39.691688537597656, "logits_per_token": -2.7529674106174045, "logits_per_char": -0.5899215879894438, "bits_per_byte": 0.8510769495063243, "num_chars": 42}, {"sum_logits": -29.58800506591797, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -44.046539306640625, "logits_per_token": -3.698500633239746, "logits_per_char": -0.704476311093285, "bits_per_byte": 1.0163444804387631, "num_chars": 42}, {"sum_logits": -24.08259391784668, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -43.76608657836914, "logits_per_token": -4.013765652974446, "logits_per_char": -0.6880741119384766, "bits_per_byte": 0.9926811090584308, "num_chars": 35}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 236, "native_id": "Mercury_7016800", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.18886947631836, "logits_per_token_corr": -2.0209854973687067, "logits_per_char_corr": -0.39541020600692084, "bits_per_byte_corr": 0.5704563433234779}, "model_output": [{"sum_logits": -18.702369689941406, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.845645904541016, "logits_per_token": -2.0780410766601562, "logits_per_char": -0.5054694510794975, "bits_per_byte": 0.7292382703937814, "num_chars": 37}, {"sum_logits": -25.86606216430664, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.537147521972656, "logits_per_token": -3.23325777053833, "logits_per_char": -0.6466515541076661, "bits_per_byte": 0.9329209902949412, "num_chars": 40}, {"sum_logits": -16.49627685546875, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.044879913330078, "logits_per_token": -1.8329196506076388, "logits_per_char": -0.41240692138671875, "bits_per_byte": 0.5949774203133312, "num_chars": 40}, {"sum_logits": -18.18886947631836, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.757354736328125, "logits_per_token": -2.0209854973687067, "logits_per_char": -0.39541020600692084, "bits_per_byte": 0.5704563433234779, "num_chars": 46}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 237, "native_id": "Mercury_SC_407228", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.582843780517578, "logits_per_token_corr": -3.4478554725646973, "logits_per_char_corr": -0.5408400741277957, "bits_per_byte_corr": 0.7802672928587507}, "model_output": [{"sum_logits": -14.349313735961914, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -24.98918914794922, "logits_per_token": -4.783104578653972, "logits_per_char": -0.5739725494384765, "bits_per_byte": 0.8280673506818804, "num_chars": 25}, {"sum_logits": -16.651409149169922, "num_tokens": 5, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -35.31804656982422, "logits_per_token": -3.3302818298339845, "logits_per_char": -0.6404388134296124, "bits_per_byte": 0.9239579001283776, "num_chars": 26}, {"sum_logits": -13.024323463439941, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -27.125717163085938, "logits_per_token": -1.6280404329299927, "logits_per_char": -0.32560808658599855, "bits_per_byte": 0.46975317179130177, "num_chars": 40}, {"sum_logits": -27.582843780517578, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -41.48371505737305, "logits_per_token": -3.4478554725646973, "logits_per_char": -0.5408400741277957, "bits_per_byte": 0.7802672928587507, "num_chars": 51}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 238, "native_id": "Mercury_414504", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.664958953857422, "logits_per_token_corr": -2.1387465794881186, "logits_per_char_corr": -0.5346866448720297, "bits_per_byte_corr": 0.7713897709869897}, "model_output": [{"sum_logits": -37.45040512084961, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -60.114784240722656, "logits_per_token": -3.404582283713601, "logits_per_char": -0.6809164567427202, "bits_per_byte": 0.9823547954031125, "num_chars": 55}, {"sum_logits": -25.664958953857422, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -46.71757507324219, "logits_per_token": -2.1387465794881186, "logits_per_char": -0.5346866448720297, "bits_per_byte": 0.7713897709869897, "num_chars": 48}, {"sum_logits": -26.266054153442383, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -47.560150146484375, "logits_per_token": -2.3878231048583984, "logits_per_char": -0.5710011772487474, "bits_per_byte": 0.8237805667591198, "num_chars": 46}, {"sum_logits": -18.39678192138672, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -44.25986099243164, "logits_per_token": -2.6281117030552457, "logits_per_char": -0.49721032219964106, "bits_per_byte": 0.7173228661167412, "num_chars": 37}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 239, "native_id": "TIMSS_2011_4_pg27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.148599624633789, "logits_per_token_corr": -0.8297199249267578, "logits_per_char_corr": -0.2304777569240994, "bits_per_byte_corr": 0.332509116949849}, "model_output": [{"sum_logits": -4.148599624633789, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.630828857421875, "logits_per_token": -0.8297199249267578, "logits_per_char": -0.2304777569240994, "bits_per_byte": 0.332509116949849, "num_chars": 18}, {"sum_logits": -5.731100082397461, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -27.228208541870117, "logits_per_token": -1.1462200164794922, "logits_per_char": -0.31839444902208114, "bits_per_byte": 0.45934609265106036, "num_chars": 18}, {"sum_logits": -6.53120231628418, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -27.18503761291504, "logits_per_token": -1.3062404632568358, "logits_per_char": -0.3628445731268989, "bits_per_byte": 0.5234740662640259, "num_chars": 18}, {"sum_logits": -5.6897101402282715, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -25.431846618652344, "logits_per_token": -1.1379420280456543, "logits_per_char": -0.3160950077904595, "bits_per_byte": 0.4560287001893818, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 240, "native_id": "Mercury_SC_402029", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.903246879577637, "logits_per_token_corr": -1.7670274310641818, "logits_per_char_corr": -0.3878840702336009, "bits_per_byte_corr": 0.5595984245662444}, "model_output": [{"sum_logits": -16.183391571044922, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -23.537817001342773, "logits_per_token": -3.2366783142089846, "logits_per_char": -0.8517574511076275, "bits_per_byte": 1.2288262507540806, "num_chars": 19}, {"sum_logits": -19.77684783935547, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.608097076416016, "logits_per_token": -4.944211959838867, "logits_per_char": -1.0987137688530817, "bits_per_byte": 1.5851089056819025, "num_chars": 18}, {"sum_logits": -11.402955055236816, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -18.052772521972656, "logits_per_token": -3.800985018412272, "logits_per_char": -0.633497503068712, "bits_per_byte": 0.9139437060934282, "num_chars": 18}, {"sum_logits": -15.903246879577637, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.69916915893555, "logits_per_token": -1.7670274310641818, "logits_per_char": -0.3878840702336009, "bits_per_byte": 0.5595984245662444, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 241, "native_id": "Mercury_7131845", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.811599731445312, "logits_per_token_corr": -3.3019332885742188, "logits_per_char_corr": -0.5826941097483915, "bits_per_byte_corr": 0.8406499024898179}, "model_output": [{"sum_logits": -19.77647590637207, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.866790771484375, "logits_per_token": -3.2960793177286782, "logits_per_char": -0.6180148720741272, "bits_per_byte": 0.891606991137611, "num_chars": 32}, {"sum_logits": -20.908618927001953, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -33.53611373901367, "logits_per_token": -2.986945561000279, "logits_per_char": -0.6149593802059398, "bits_per_byte": 0.8871988481718973, "num_chars": 34}, {"sum_logits": -19.811599731445312, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -28.50514793395996, "logits_per_token": -3.3019332885742188, "logits_per_char": -0.5826941097483915, "bits_per_byte": 0.8406499024898179, "num_chars": 34}, {"sum_logits": -16.791152954101562, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -33.895790100097656, "logits_per_token": -2.7985254923502603, "logits_per_char": -0.47974722726004465, "bits_per_byte": 0.6921289456487942, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 242, "native_id": "Mercury_SC_405533", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.573204040527344, "logits_per_token_corr": -2.595534006754557, "logits_per_char_corr": -0.4866626262664795, "bits_per_byte_corr": 0.7021057575011533}, "model_output": [{"sum_logits": -12.915841102600098, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -28.05780792236328, "logits_per_token": -2.5831682205200197, "logits_per_char": -0.4453738311241413, "bits_per_byte": 0.642538617504979, "num_chars": 29}, {"sum_logits": -15.573204040527344, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -28.13775062561035, "logits_per_token": -2.595534006754557, "logits_per_char": -0.4866626262664795, "bits_per_byte": 0.7021057575011533, "num_chars": 32}, {"sum_logits": -24.682811737060547, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -42.03529357910156, "logits_per_token": -2.2438919760964136, "logits_per_char": -0.5485069274902343, "bits_per_byte": 0.7913282241839719, "num_chars": 45}, {"sum_logits": -16.99930191040039, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -34.23216247558594, "logits_per_token": -2.124912738800049, "logits_per_char": -0.3541521231333415, "bits_per_byte": 0.5109335117651364, "num_chars": 48}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 243, "native_id": "Mercury_7086748", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.884690761566162, "logits_per_token_corr": -3.442345380783081, "logits_per_char_corr": -0.764965640174018, "bits_per_byte_corr": 1.1036121355303}, "model_output": [{"sum_logits": -6.884690761566162, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.1248779296875, "logits_per_token": -3.442345380783081, "logits_per_char": -0.764965640174018, "bits_per_byte": 1.1036121355303, "num_chars": 9}, {"sum_logits": -8.416595458984375, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.30130958557129, "logits_per_token": -4.2082977294921875, "logits_per_char": -0.7013829549153646, "bits_per_byte": 1.0118817108211708, "num_chars": 12}, {"sum_logits": -11.817296028137207, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.0715274810791, "logits_per_token": -5.9086480140686035, "logits_per_char": -0.984774669011434, "bits_per_byte": 1.4207295313768868, "num_chars": 12}, {"sum_logits": -14.873926162719727, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.81066131591797, "logits_per_token": -7.436963081359863, "logits_per_char": -1.144148166363056, "bits_per_byte": 1.6506568856553674, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 244, "native_id": "MDSA_2007_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.607200622558594, "logits_per_token_corr": -0.7576571873256138, "logits_per_char_corr": -0.17388853479604252, "bits_per_byte_corr": 0.25086812681787873}, "model_output": [{"sum_logits": -29.369632720947266, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -44.215606689453125, "logits_per_token": -3.263292524549696, "logits_per_char": -0.5993802596111687, "bits_per_byte": 0.8647229281483938, "num_chars": 49}, {"sum_logits": -10.607200622558594, "num_tokens": 14, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -40.96195983886719, "logits_per_token": -0.7576571873256138, "logits_per_char": -0.17388853479604252, "bits_per_byte": 0.25086812681787873, "num_chars": 61}, {"sum_logits": -14.259794235229492, "num_tokens": 15, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -42.73530578613281, "logits_per_token": -0.9506529490152995, "logits_per_char": -0.22634594024173796, "bits_per_byte": 0.3265481655123396, "num_chars": 63}, {"sum_logits": -10.503076553344727, "num_tokens": 14, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -41.04914093017578, "logits_per_token": -0.7502197538103376, "logits_per_char": -0.16411057114601135, "bits_per_byte": 0.2367615071499761, "num_chars": 64}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 245, "native_id": "Mercury_7210473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.054582595825195, "logits_per_token_corr": -4.027291297912598, "logits_per_char_corr": -0.5034114122390747, "bits_per_byte_corr": 0.7262691479647445}, "model_output": [{"sum_logits": -11.215303421020508, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -19.361520767211914, "logits_per_token": -5.607651710510254, "logits_per_char": -0.7009564638137817, "bits_per_byte": 1.0112664142239336, "num_chars": 16}, {"sum_logits": -10.862043380737305, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -17.28662109375, "logits_per_token": -5.431021690368652, "logits_per_char": -0.638943728278665, "bits_per_byte": 0.9218009481953975, "num_chars": 17}, {"sum_logits": -8.054582595825195, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -22.072322845458984, "logits_per_token": -4.027291297912598, "logits_per_char": -0.5034114122390747, "bits_per_byte": 0.7262691479647445, "num_chars": 16}, {"sum_logits": -9.441360473632812, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -18.390764236450195, "logits_per_token": -4.720680236816406, "logits_per_char": -0.5900850296020508, "bits_per_byte": 0.8513127459103075, "num_chars": 16}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 246, "native_id": "Mercury_7214340", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.790539741516113, "logits_per_token_corr": -5.790539741516113, "logits_per_char_corr": -0.5790539741516113, "bits_per_byte_corr": 0.8353982969161758}, "model_output": [{"sum_logits": -5.790539741516113, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.136021614074707, "logits_per_token": -5.790539741516113, "logits_per_char": -0.5790539741516113, "bits_per_byte": 0.8353982969161758, "num_chars": 10}, {"sum_logits": -5.577472686767578, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -13.832594871520996, "logits_per_token": -5.577472686767578, "logits_per_char": -0.5070429715243253, "bits_per_byte": 0.7315083805362735, "num_chars": 11}, {"sum_logits": -4.466116905212402, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.116777420043945, "logits_per_token": -4.466116905212402, "logits_per_char": -0.4962352116902669, "bits_per_byte": 0.7159160790205473, "num_chars": 9}, {"sum_logits": -7.999354362487793, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.491503715515137, "logits_per_token": -7.999354362487793, "logits_per_char": -0.7999354362487793, "bits_per_byte": 1.1540628869082925, "num_chars": 10}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 247, "native_id": "MCAS_2005_9_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.911534547805786, "logits_per_token_corr": -3.911534547805786, "logits_per_char_corr": -0.3008872729081374, "bits_per_byte_corr": 0.4340885764914858}, "model_output": [{"sum_logits": -3.855181932449341, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -13.868720054626465, "logits_per_token": -3.855181932449341, "logits_per_char": -0.2409488707780838, "bits_per_byte": 0.3476157409795869, "num_chars": 16}, {"sum_logits": -8.75989818572998, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -12.272923469543457, "logits_per_token": -8.75989818572998, "logits_per_char": -1.7519796371459961, "bits_per_byte": 2.52757233425079, "num_chars": 5}, {"sum_logits": -3.911534547805786, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -15.784364700317383, "logits_per_token": -3.911534547805786, "logits_per_char": -0.3008872729081374, "bits_per_byte": 0.4340885764914858, "num_chars": 13}, {"sum_logits": -3.1584322452545166, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -14.865165710449219, "logits_per_token": -3.1584322452545166, "logits_per_char": -0.28713020411404694, "bits_per_byte": 0.414241321565069, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 248, "native_id": "MEA_2016_8_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.135285377502441, "logits_per_token_corr": -5.567642688751221, "logits_per_char_corr": -0.8565604136540339, "bits_per_byte_corr": 1.2357554610013617}, "model_output": [{"sum_logits": -7.045827388763428, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -18.707500457763672, "logits_per_token": -3.522913694381714, "logits_per_char": -0.39143485493130153, "bits_per_byte": 0.5647211240408853, "num_chars": 18}, {"sum_logits": -11.135285377502441, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -18.282508850097656, "logits_per_token": -5.567642688751221, "logits_per_char": -0.8565604136540339, "bits_per_byte": 1.2357554610013617, "num_chars": 13}, {"sum_logits": -9.126611709594727, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -17.623241424560547, "logits_per_token": -4.563305854797363, "logits_per_char": -0.7605509757995605, "bits_per_byte": 1.0972431211300764, "num_chars": 12}, {"sum_logits": -8.858305931091309, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -18.500301361083984, "logits_per_token": -4.429152965545654, "logits_per_char": -0.8858305931091308, "bits_per_byte": 1.2779834037471904, "num_chars": 10}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 249, "native_id": "Mercury_SC_401278", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.340009689331055, "logits_per_token_corr": -3.1133365631103516, "logits_per_char_corr": -0.5837506055831909, "bits_per_byte_corr": 0.8421741037914038}, "model_output": [{"sum_logits": -8.015501022338867, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.0362491607666, "logits_per_token": -2.6718336741129556, "logits_per_char": -0.572535787309919, "bits_per_byte": 0.825994541084072, "num_chars": 14}, {"sum_logits": -9.340009689331055, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.321815490722656, "logits_per_token": -3.1133365631103516, "logits_per_char": -0.5837506055831909, "bits_per_byte": 0.8421741037914038, "num_chars": 16}, {"sum_logits": -10.660429000854492, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -20.184967041015625, "logits_per_token": -3.553476333618164, "logits_per_char": -0.5922460556030273, "bits_per_byte": 0.8544304474051507, "num_chars": 18}, {"sum_logits": -15.406227111816406, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -24.29659080505371, "logits_per_token": -5.135409037272136, "logits_per_char": -1.027081807454427, "bits_per_byte": 1.4817658302028396, "num_chars": 15}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 250, "native_id": "Mercury_SC_407689", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.433048248291016, "logits_per_token_corr": -7.433048248291016, "logits_per_char_corr": -1.486609649658203, "bits_per_byte_corr": 2.1447243693011098}, "model_output": [{"sum_logits": -8.012686729431152, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -15.029439926147461, "logits_per_token": -8.012686729431152, "logits_per_char": -0.728426066311923, "bits_per_byte": 1.0508966735232215, "num_chars": 11}, {"sum_logits": -9.276449203491211, "num_tokens": 3, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -17.519977569580078, "logits_per_token": -3.0921497344970703, "logits_per_char": -0.927644920349121, "bits_per_byte": 1.338308726294476, "num_chars": 10}, {"sum_logits": -4.712510108947754, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -14.54318904876709, "logits_per_token": -4.712510108947754, "logits_per_char": -0.5890637636184692, "bits_per_byte": 0.8498393705403647, "num_chars": 8}, {"sum_logits": -7.433048248291016, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -13.27519702911377, "logits_per_token": -7.433048248291016, "logits_per_char": -1.486609649658203, "bits_per_byte": 2.1447243693011098, "num_chars": 5}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 251, "native_id": "Mercury_7230405", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.211212158203125, "logits_per_token_corr": -4.458744594029018, "logits_per_char_corr": -0.8669781155056424, "bits_per_byte_corr": 1.2507850278001478}, "model_output": [{"sum_logits": -26.741546630859375, "num_tokens": 8, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -43.36954116821289, "logits_per_token": -3.342693328857422, "logits_per_char": -0.6522328446551067, "bits_per_byte": 0.9409730904895001, "num_chars": 41}, {"sum_logits": -31.211212158203125, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -46.28588104248047, "logits_per_token": -4.458744594029018, "logits_per_char": -0.8669781155056424, "bits_per_byte": 1.2507850278001478, "num_chars": 36}, {"sum_logits": -21.537416458129883, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -37.71045684814453, "logits_per_token": -3.0767737797328403, "logits_per_char": -0.39884104552092375, "bits_per_byte": 0.5754059984764194, "num_chars": 54}, {"sum_logits": -31.359241485595703, "num_tokens": 9, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -46.20245361328125, "logits_per_token": -3.4843601650661893, "logits_per_char": -0.6968720330132379, "bits_per_byte": 1.0053738261631306, "num_chars": 45}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 252, "native_id": "Mercury_SC_405640", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.963116645812988, "logits_per_token_corr": -2.5926233291625977, "logits_per_char_corr": -0.7201731469896104, "bits_per_byte_corr": 1.038990227744056}, "model_output": [{"sum_logits": -12.963116645812988, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -27.681617736816406, "logits_per_token": -2.5926233291625977, "logits_per_char": -0.7201731469896104, "bits_per_byte": 1.038990227744056, "num_chars": 18}, {"sum_logits": -14.594372749328613, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.364604949951172, "logits_per_token": -2.9188745498657225, "logits_per_char": -0.7297186374664306, "bits_per_byte": 1.0527614595178272, "num_chars": 20}, {"sum_logits": -16.043468475341797, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -27.489845275878906, "logits_per_token": -3.2086936950683596, "logits_per_char": -0.7639746893019903, "bits_per_byte": 1.1021824956214599, "num_chars": 21}, {"sum_logits": -18.18202781677246, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -29.54788589477539, "logits_per_token": -3.6364055633544923, "logits_per_char": -0.6734084376582393, "bits_per_byte": 0.9715230135030245, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 253, "native_id": "Mercury_7201775", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.358556747436523, "logits_per_token_corr": -6.358556747436523, "logits_per_char_corr": -0.7948195934295654, "bits_per_byte_corr": 1.1466822858430399}, "model_output": [{"sum_logits": -7.486730575561523, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -15.550889015197754, "logits_per_token": -7.486730575561523, "logits_per_char": -0.575902351966271, "bits_per_byte": 0.8308514672186265, "num_chars": 13}, {"sum_logits": -5.979748725891113, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -17.22313690185547, "logits_per_token": -5.979748725891113, "logits_per_char": -0.5979748725891113, "bits_per_byte": 0.8626953832611404, "num_chars": 10}, {"sum_logits": -4.714831829071045, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.293819427490234, "logits_per_token": -2.3574159145355225, "logits_per_char": -0.36267937146700346, "bits_per_byte": 0.523235730648548, "num_chars": 13}, {"sum_logits": -6.358556747436523, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -11.888246536254883, "logits_per_token": -6.358556747436523, "logits_per_char": -0.7948195934295654, "bits_per_byte": 1.1466822858430399, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 254, "native_id": "Mercury_7177398", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.898006439208984, "logits_per_token_corr": -3.237250804901123, "logits_per_char_corr": -0.8354195625551285, "bits_per_byte_corr": 1.2052556599607769}, "model_output": [{"sum_logits": -24.857437133789062, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -34.786415100097656, "logits_per_token": -3.107179641723633, "logits_per_char": -0.6214359283447266, "bits_per_byte": 0.8965425320538104, "num_chars": 40}, {"sum_logits": -15.142843246459961, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -28.91041374206543, "logits_per_token": -1.8928554058074951, "logits_per_char": -0.44537774254294005, "bits_per_byte": 0.6425442604894828, "num_chars": 34}, {"sum_logits": -21.908466339111328, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.702110290527344, "logits_per_token": -3.1297809055873325, "logits_per_char": -0.7067247206164945, "bits_per_byte": 1.0195882497077873, "num_chars": 31}, {"sum_logits": -25.898006439208984, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -32.325653076171875, "logits_per_token": -3.237250804901123, "logits_per_char": -0.8354195625551285, "bits_per_byte": 1.2052556599607769, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 255, "native_id": "Mercury_7041423", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.617363929748535, "logits_per_token_corr": -6.308681964874268, "logits_per_char_corr": -0.7421978782205021, "bits_per_byte_corr": 1.0707651982677984}, "model_output": [{"sum_logits": -12.617363929748535, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.599403381347656, "logits_per_token": -6.308681964874268, "logits_per_char": -0.7421978782205021, "bits_per_byte": 1.0707651982677984, "num_chars": 17}, {"sum_logits": -15.588827133178711, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -26.788206100463867, "logits_per_token": -5.19627571105957, "logits_per_char": -0.9169898313634536, "bits_per_byte": 1.3229366822546118, "num_chars": 17}, {"sum_logits": -10.2245512008667, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.630815505981445, "logits_per_token": -5.11227560043335, "logits_per_char": -0.5680306222703722, "bits_per_byte": 0.8194949618231266, "num_chars": 18}, {"sum_logits": -11.8607177734375, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -24.74371337890625, "logits_per_token": -3.9535725911458335, "logits_per_char": -0.593035888671875, "bits_per_byte": 0.855569935656708, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 256, "native_id": "Mercury_7004743", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.72169303894043, "logits_per_token_corr": -1.9072310129801433, "logits_per_char_corr": -0.44013023376464844, "bits_per_byte_corr": 0.6349737055980147}, "model_output": [{"sum_logits": -5.137452602386475, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.42009925842285, "logits_per_token": -1.7124842007954915, "logits_per_char": -0.5137452602386474, "bits_per_byte": 0.7411777392270391, "num_chars": 10}, {"sum_logits": -7.0608415603637695, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -19.362831115722656, "logits_per_token": -2.35361385345459, "logits_per_char": -0.6418946873057972, "bits_per_byte": 0.9260582821497109, "num_chars": 11}, {"sum_logits": -10.886528015136719, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -20.29461097717285, "logits_per_token": -3.6288426717122397, "logits_per_char": -0.989684365012429, "bits_per_byte": 1.4278127254497999, "num_chars": 11}, {"sum_logits": -5.72169303894043, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.945898056030273, "logits_per_token": -1.9072310129801433, "logits_per_char": -0.44013023376464844, "bits_per_byte": 0.6349737055980147, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 257, "native_id": "Mercury_7198468", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -31.66900634765625, "logits_per_token_corr": -3.166900634765625, "logits_per_char_corr": -0.5026826404389881, "bits_per_byte_corr": 0.7252177525028191}, "model_output": [{"sum_logits": -31.66900634765625, "num_tokens": 10, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -47.427730560302734, "logits_per_token": -3.166900634765625, "logits_per_char": -0.5026826404389881, "bits_per_byte": 0.7252177525028191, "num_chars": 63}, {"sum_logits": -34.109130859375, "num_tokens": 8, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -42.8642692565918, "logits_per_token": -4.263641357421875, "logits_per_char": -0.6090916224888393, "bits_per_byte": 0.8787334632122924, "num_chars": 56}, {"sum_logits": -26.70918083190918, "num_tokens": 8, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -35.80146026611328, "logits_per_token": -3.3386476039886475, "logits_per_char": -0.5136380929213303, "bits_per_byte": 0.7410231294698002, "num_chars": 52}, {"sum_logits": -26.684864044189453, "num_tokens": 9, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -36.59027862548828, "logits_per_token": -2.964984893798828, "logits_per_char": -0.5445890621263154, "bits_per_byte": 0.7856759392525713, "num_chars": 49}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 258, "native_id": "MEA_2014_5_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -34.86766052246094, "logits_per_token_corr": -4.358457565307617, "logits_per_char_corr": -0.64569741708261, "bits_per_byte_corr": 0.9315444615405633}, "model_output": [{"sum_logits": -16.06158447265625, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.542816162109375, "logits_per_token": -2.0076980590820312, "logits_per_char": -0.34173583984375, "bits_per_byte": 0.49302060143695736, "num_chars": 47}, {"sum_logits": -20.488563537597656, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.650571823120117, "logits_per_token": -2.561070442199707, "logits_per_char": -0.5122140884399414, "bits_per_byte": 0.7389687252662953, "num_chars": 40}, {"sum_logits": -34.86766052246094, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -45.80579376220703, "logits_per_token": -4.358457565307617, "logits_per_char": -0.64569741708261, "bits_per_byte": 0.9315444615405633, "num_chars": 54}, {"sum_logits": -23.315677642822266, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -45.20952606201172, "logits_per_token": -2.3315677642822266, "logits_per_char": -0.44837841620812047, "bits_per_byte": 0.6468733175055678, "num_chars": 52}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 259, "native_id": "Mercury_410602", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.5689311027526855, "logits_per_token_corr": -2.7844655513763428, "logits_per_char_corr": -0.5062664638866078, "bits_per_byte_corr": 0.7303881168181253}, "model_output": [{"sum_logits": -5.5689311027526855, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -11.962862014770508, "logits_per_token": -2.7844655513763428, "logits_per_char": -0.5062664638866078, "bits_per_byte": 0.7303881168181253, "num_chars": 11}, {"sum_logits": -5.950006484985352, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -13.296392440795898, "logits_per_token": -2.975003242492676, "logits_per_char": -0.5409096804532137, "bits_per_byte": 0.780367713559246, "num_chars": 11}, {"sum_logits": -3.745713710784912, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": true, "sum_logits_uncond": -11.815290451049805, "logits_per_token": -1.872856855392456, "logits_per_char": -0.468214213848114, "bits_per_byte": 0.675490324392884, "num_chars": 8}, {"sum_logits": -4.465099811553955, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -12.385896682739258, "logits_per_token": -2.2325499057769775, "logits_per_char": -0.5581374764442444, "bits_per_byte": 0.8052221694009705, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 260, "native_id": "Mercury_7108868", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.582340240478516, "logits_per_token_corr": -3.116468048095703, "logits_per_char_corr": -0.6774930539338485, "bits_per_byte_corr": 0.9774158691477844}, "model_output": [{"sum_logits": -11.238906860351562, "num_tokens": 3, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -16.9565372467041, "logits_per_token": -3.746302286783854, "logits_per_char": -0.535186040969122, "bits_per_byte": 0.7721102472597047, "num_chars": 21}, {"sum_logits": -11.989139556884766, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -21.488122940063477, "logits_per_token": -2.9972848892211914, "logits_per_char": -0.4440422058105469, "bits_per_byte": 0.6406174882687327, "num_chars": 27}, {"sum_logits": -15.582340240478516, "num_tokens": 5, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -24.064699172973633, "logits_per_token": -3.116468048095703, "logits_per_char": -0.6774930539338485, "bits_per_byte": 0.9774158691477844, "num_chars": 23}, {"sum_logits": -22.05825424194336, "num_tokens": 3, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -27.44350242614746, "logits_per_token": -7.35275141398112, "logits_per_char": -1.102912712097168, "bits_per_byte": 1.5911667002771246, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 261, "native_id": "Mercury_7033828", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.789973020553589, "logits_per_token_corr": -3.789973020553589, "logits_per_char_corr": -0.7579946041107177, "bits_per_byte_corr": 1.0935550563719112}, "model_output": [{"sum_logits": -5.706958770751953, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -12.446762084960938, "logits_per_token": -5.706958770751953, "logits_per_char": -1.4267396926879883, "bits_per_byte": 2.0583502792818833, "num_chars": 4}, {"sum_logits": -4.778205871582031, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -11.783369064331055, "logits_per_token": -4.778205871582031, "logits_per_char": -1.1945514678955078, "bits_per_byte": 1.7233734788207191, "num_chars": 4}, {"sum_logits": -2.4500415325164795, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": true, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -2.4500415325164795, "logits_per_char": -0.4900083065032959, "bits_per_byte": 0.7069325537872121, "num_chars": 5}, {"sum_logits": -3.789973020553589, "num_tokens": 1, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -13.27519702911377, "logits_per_token": -3.789973020553589, "logits_per_char": -0.7579946041107177, "bits_per_byte": 1.0935550563719112, "num_chars": 5}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 262, "native_id": "TIMSS_2007_4_pg19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.859206199645996, "logits_per_token_corr": -1.979886599949428, "logits_per_char_corr": -0.38497794999016655, "bits_per_byte_corr": 0.5554057793028117}, "model_output": [{"sum_logits": -13.859206199645996, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -35.44118118286133, "logits_per_token": -1.979886599949428, "logits_per_char": -0.38497794999016655, "bits_per_byte": 0.5554057793028117, "num_chars": 36}, {"sum_logits": -22.55991554260254, "num_tokens": 9, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -40.75873565673828, "logits_per_token": -2.506657282511393, "logits_per_char": -0.46040643964494976, "bits_per_byte": 0.6642260872695901, "num_chars": 49}, {"sum_logits": -12.483992576599121, "num_tokens": 3, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -21.095056533813477, "logits_per_token": -4.161330858866374, "logits_per_char": -0.7343525045058307, "bits_per_byte": 1.0594467165157133, "num_chars": 17}, {"sum_logits": -10.539373397827148, "num_tokens": 3, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -20.502666473388672, "logits_per_token": -3.513124465942383, "logits_per_char": -0.6199631410486558, "bits_per_byte": 0.8944177491254832, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 263, "native_id": "Mercury_400828", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.824884414672852, "logits_per_token_corr": -1.7649768829345702, "logits_per_char_corr": -0.6303488867623466, "bits_per_byte_corr": 0.9094012129625695}, "model_output": [{"sum_logits": -14.328184127807617, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -30.2528018951416, "logits_per_token": -2.8656368255615234, "logits_per_char": -1.194015343983968, "bits_per_byte": 1.722600015512238, "num_chars": 12}, {"sum_logits": -14.241498947143555, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.156585693359375, "logits_per_token": -2.8482997894287108, "logits_per_char": -1.1867915789286296, "bits_per_byte": 1.7121783254903469, "num_chars": 12}, {"sum_logits": -6.633608818054199, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.45671844482422, "logits_per_token": -1.32672176361084, "logits_per_char": -0.47382920128958567, "bits_per_byte": 0.6835910389293548, "num_chars": 14}, {"sum_logits": -8.824884414672852, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.136539459228516, "logits_per_token": -1.7649768829345702, "logits_per_char": -0.6303488867623466, "bits_per_byte": 0.9094012129625695, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 264, "native_id": "VASoL_2008_3_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.332551956176758, "logits_per_token_corr": -2.3332217080252513, "logits_per_char_corr": -0.48036917518166933, "bits_per_byte_corr": 0.693026226831014}, "model_output": [{"sum_logits": -26.33725357055664, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.879426956176758, "logits_per_token": -3.762464795793806, "logits_per_char": -0.7118176640690984, "bits_per_byte": 1.0269358139703921, "num_chars": 37}, {"sum_logits": -16.332551956176758, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -31.099367141723633, "logits_per_token": -2.3332217080252513, "logits_per_char": -0.48036917518166933, "bits_per_byte": 0.693026226831014, "num_chars": 34}, {"sum_logits": -15.160653114318848, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -24.49289894104004, "logits_per_token": -3.0321306228637694, "logits_per_char": -0.561505670900698, "bits_per_byte": 0.8100814468400493, "num_chars": 27}, {"sum_logits": -14.987133979797363, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -19.015151977539062, "logits_per_token": -2.9974267959594725, "logits_per_char": -0.5994853591918945, "bits_per_byte": 0.8648745547923065, "num_chars": 25}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 265, "native_id": "LEAP__5_10315", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.943263053894043, "logits_per_token_corr": -1.3943263053894044, "logits_per_char_corr": -0.3031144142150879, "bits_per_byte_corr": 0.4373016622103846}, "model_output": [{"sum_logits": -17.234439849853516, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.99097442626953, "logits_per_token": -1.7234439849853516, "logits_per_char": -0.40080092674077944, "bits_per_byte": 0.5782335093930386, "num_chars": 43}, {"sum_logits": -14.343480110168457, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.16766357421875, "logits_per_token": -1.4343480110168456, "logits_per_char": -0.3259881843220104, "bits_per_byte": 0.47030153691009957, "num_chars": 44}, {"sum_logits": -13.943263053894043, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -35.99482345581055, "logits_per_token": -1.3943263053894044, "logits_per_char": -0.3031144142150879, "bits_per_byte": 0.4373016622103846, "num_chars": 46}, {"sum_logits": -15.743097305297852, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.66140365600586, "logits_per_token": -1.574309730529785, "logits_per_char": -0.3422412457673446, "bits_per_byte": 0.49374974805656374, "num_chars": 46}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 266, "native_id": "Mercury_SC_415471", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.301265716552734, "logits_per_token_corr": -2.7168776194254556, "logits_per_char_corr": -0.4794489916633157, "bits_per_byte_corr": 0.6916986826323765}, "model_output": [{"sum_logits": -16.301265716552734, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.63313102722168, "logits_per_token": -2.7168776194254556, "logits_per_char": -0.4794489916633157, "bits_per_byte": 0.6916986826323765, "num_chars": 34}, {"sum_logits": -10.797565460205078, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.140817642211914, "logits_per_token": -1.799594243367513, "logits_per_char": -0.3483085632324219, "bits_per_byte": 0.502503036874936, "num_chars": 31}, {"sum_logits": -17.838123321533203, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -23.289081573486328, "logits_per_token": -4.459530830383301, "logits_per_char": -0.7755705791970958, "bits_per_byte": 1.118911828467835, "num_chars": 23}, {"sum_logits": -14.241852760314941, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.05545425415039, "logits_per_token": -3.5604631900787354, "logits_per_char": -0.7120926380157471, "bits_per_byte": 1.0273325175195962, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 267, "native_id": "Mercury_7247065", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -36.50006103515625, "logits_per_token_corr": -3.650006103515625, "logits_per_char_corr": -0.869049072265625, "bits_per_byte_corr": 1.2537727868476722}, "model_output": [{"sum_logits": -29.651752471923828, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -35.595306396484375, "logits_per_token": -3.294639163547092, "logits_per_char": -0.823659790886773, "bits_per_byte": 1.1882898956928418, "num_chars": 36}, {"sum_logits": -31.9895076751709, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -40.64198303222656, "logits_per_token": -5.331584612528483, "logits_per_char": -0.7616549446469262, "bits_per_byte": 1.0988358115114678, "num_chars": 42}, {"sum_logits": -36.50006103515625, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -40.74968719482422, "logits_per_token": -3.650006103515625, "logits_per_char": -0.869049072265625, "bits_per_byte": 1.2537727868476722, "num_chars": 42}, {"sum_logits": -39.13630294799805, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -41.38805389404297, "logits_per_token": -4.892037868499756, "logits_per_char": -0.7673784891764323, "bits_per_byte": 1.1070931408204994, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 268, "native_id": "MDSA_2011_5_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.246507167816162, "logits_per_token_corr": -2.246507167816162, "logits_per_char_corr": -0.18720893065134683, "bits_per_byte_corr": 0.270085395861018}, "model_output": [{"sum_logits": -2.8467326164245605, "num_tokens": 1, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -2.8467326164245605, "logits_per_char": -0.21897943203265852, "bits_per_byte": 0.31592054065042524, "num_chars": 13}, {"sum_logits": -2.246507167816162, "num_tokens": 1, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -2.246507167816162, "logits_per_char": -0.18720893065134683, "bits_per_byte": 0.270085395861018, "num_chars": 12}, {"sum_logits": -5.110611438751221, "num_tokens": 1, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -14.220553398132324, "logits_per_token": -5.110611438751221, "logits_per_char": -0.5678457154168023, "bits_per_byte": 0.8192281976224548, "num_chars": 9}, {"sum_logits": -6.8810296058654785, "num_tokens": 1, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -15.890351295471191, "logits_per_token": -6.8810296058654785, "logits_per_char": -0.8601287007331848, "bits_per_byte": 1.2409034110749246, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 269, "native_id": "MDSA_2009_5_39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.109190940856934, "logits_per_token_corr": -2.221838188171387, "logits_per_char_corr": -0.31740545545305526, "bits_per_byte_corr": 0.4579192765335546}, "model_output": [{"sum_logits": -8.3439359664917, "num_tokens": 5, "num_tokens_all": 310, "is_greedy": false, "sum_logits_uncond": -30.378952026367188, "logits_per_token": -1.6687871932983398, "logits_per_char": -0.30903466542561847, "bits_per_byte": 0.4458427792726401, "num_chars": 27}, {"sum_logits": -18.404251098632812, "num_tokens": 4, "num_tokens_all": 309, "is_greedy": false, "sum_logits_uncond": -25.114803314208984, "logits_per_token": -4.601062774658203, "logits_per_char": -0.5936855193107359, "bits_per_byte": 0.8565071545578029, "num_chars": 31}, {"sum_logits": -11.109190940856934, "num_tokens": 5, "num_tokens_all": 310, "is_greedy": false, "sum_logits_uncond": -33.37052917480469, "logits_per_token": -2.221838188171387, "logits_per_char": -0.31740545545305526, "bits_per_byte": 0.4579192765335546, "num_chars": 35}, {"sum_logits": -15.551034927368164, "num_tokens": 6, "num_tokens_all": 311, "is_greedy": false, "sum_logits_uncond": -34.061859130859375, "logits_per_token": -2.591839154561361, "logits_per_char": -0.42029824128022064, "bits_per_byte": 0.606362188389763, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 270, "native_id": "Mercury_187198", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.860523223876953, "logits_per_token_corr": -2.317835913764106, "logits_per_char_corr": -0.474102800542658, "bits_per_byte_corr": 0.6839857592149535}, "model_output": [{"sum_logits": -21.233314514160156, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.39037322998047, "logits_per_token": -2.3592571682400174, "logits_per_char": -0.5178857198575648, "bits_per_byte": 0.7471511597862565, "num_chars": 41}, {"sum_logits": -17.11929702758789, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -45.5698356628418, "logits_per_token": -2.4456138610839844, "logits_per_char": -0.41754382994116807, "bits_per_byte": 0.6023884128103406, "num_chars": 41}, {"sum_logits": -20.860523223876953, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.99679946899414, "logits_per_token": -2.317835913764106, "logits_per_char": -0.474102800542658, "bits_per_byte": 0.6839857592149535, "num_chars": 44}, {"sum_logits": -12.9765625, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -34.44963455200195, "logits_per_token": -1.8537946428571428, "logits_per_char": -0.294921875, "bits_per_byte": 0.4254823265124804, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 271, "native_id": "MCAS_2000_4_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.274751663208008, "logits_per_token_corr": -10.137375831604004, "logits_per_char_corr": -1.6895626386006672, "bits_per_byte_corr": 2.4375236399822056}, "model_output": [{"sum_logits": -20.274751663208008, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -19.871131896972656, "logits_per_token": -10.137375831604004, "logits_per_char": -1.6895626386006672, "bits_per_byte": 2.4375236399822056, "num_chars": 12}, {"sum_logits": -5.375843048095703, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.766148567199707, "logits_per_token": -5.375843048095703, "logits_per_char": -1.0751686096191406, "bits_per_byte": 1.5511404212181303, "num_chars": 5}, {"sum_logits": -9.898523330688477, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.13360023498535, "logits_per_token": -9.898523330688477, "logits_per_char": -1.4140747615269251, "bits_per_byte": 2.040078645902604, "num_chars": 7}, {"sum_logits": -7.791378974914551, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.11587905883789, "logits_per_token": -7.791378974914551, "logits_per_char": -0.8657087749905057, "bits_per_byte": 1.2489537565337594, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 272, "native_id": "Mercury_184100", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.54387855529785, "logits_per_token_corr": -3.9087757110595702, "logits_per_char_corr": -0.8497338502303414, "bits_per_byte_corr": 1.2259068118036796}, "model_output": [{"sum_logits": -20.505455017089844, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.07471466064453, "logits_per_token": -4.101091003417968, "logits_per_char": -0.7594612969292535, "bits_per_byte": 1.0956710468277218, "num_chars": 27}, {"sum_logits": -19.28459930419922, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.676532745361328, "logits_per_token": -2.7549427577427457, "logits_per_char": -0.7713839721679687, "bits_per_byte": 1.1128718312687582, "num_chars": 25}, {"sum_logits": -19.54387855529785, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -37.77337646484375, "logits_per_token": -3.9087757110595702, "logits_per_char": -0.8497338502303414, "bits_per_byte": 1.2259068118036796, "num_chars": 23}, {"sum_logits": -15.250435829162598, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -30.563444137573242, "logits_per_token": -3.0500871658325197, "logits_per_char": -0.49194954287621284, "bits_per_byte": 0.7097331658756146, "num_chars": 31}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 273, "native_id": "Mercury_LBS10814", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.414919853210449, "logits_per_token_corr": -1.4716399510701497, "logits_per_char_corr": -0.2597011678359088, "bits_per_byte_corr": 0.3746695869502072}, "model_output": [{"sum_logits": -8.214216232299805, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -15.550552368164062, "logits_per_token": -4.107108116149902, "logits_per_char": -0.5867297308785575, "bits_per_byte": 0.8464720730812192, "num_chars": 14}, {"sum_logits": -4.414919853210449, "num_tokens": 3, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -18.15780258178711, "logits_per_token": -1.4716399510701497, "logits_per_char": -0.2597011678359088, "bits_per_byte": 0.3746695869502072, "num_chars": 17}, {"sum_logits": -7.762055397033691, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -14.288697242736816, "logits_per_token": -3.8810276985168457, "logits_per_char": -0.9702569246292114, "bits_per_byte": 1.3997848535517456, "num_chars": 8}, {"sum_logits": -5.422048568725586, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -15.733524322509766, "logits_per_token": -2.711024284362793, "logits_per_char": -0.5422048568725586, "bits_per_byte": 0.7822362581565124, "num_chars": 10}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 274, "native_id": "Mercury_SC_408384", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.682456970214844, "logits_per_token_corr": -3.3364913940429686, "logits_per_char_corr": -0.490660499123966, "bits_per_byte_corr": 0.7078734688467578}, "model_output": [{"sum_logits": -11.62050724029541, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.372726440429688, "logits_per_token": -1.4525634050369263, "logits_per_char": -0.3748550722675939, "bits_per_byte": 0.5408015538129203, "num_chars": 31}, {"sum_logits": -16.682456970214844, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.616683959960938, "logits_per_token": -3.3364913940429686, "logits_per_char": -0.490660499123966, "bits_per_byte": 0.7078734688467578, "num_chars": 34}, {"sum_logits": -31.272489547729492, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -40.015533447265625, "logits_per_token": -4.467498506818499, "logits_per_char": -0.8229602512560392, "bits_per_byte": 1.187280673336676, "num_chars": 38}, {"sum_logits": -23.716230392456055, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.17020034790039, "logits_per_token": -3.388032913208008, "logits_per_char": -0.527027342054579, "bits_per_byte": 0.7603397327955789, "num_chars": 45}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 275, "native_id": "Mercury_7043068", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.809062957763672, "logits_per_token_corr": -2.544151851109096, "logits_per_char_corr": -0.5088303702218192, "bits_per_byte_corr": 0.7340870517732413}, "model_output": [{"sum_logits": -14.38182258605957, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.87392234802246, "logits_per_token": -2.876364517211914, "logits_per_char": -0.5992426077524821, "bits_per_byte": 0.8645243384944973, "num_chars": 24}, {"sum_logits": -18.663057327270508, "num_tokens": 9, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -41.589996337890625, "logits_per_token": -2.07367303636339, "logits_per_char": -0.6912243454544632, "bits_per_byte": 0.9972259353295903, "num_chars": 27}, {"sum_logits": -12.91848373413086, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -46.64217758178711, "logits_per_token": -2.153080622355143, "logits_per_char": -0.44546495634933997, "bits_per_byte": 0.642670083415473, "num_chars": 29}, {"sum_logits": -17.809062957763672, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -39.1238899230957, "logits_per_token": -2.544151851109096, "logits_per_char": -0.5088303702218192, "bits_per_byte": 0.7340870517732413, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 276, "native_id": "Mercury_411071", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.19896125793457, "logits_per_token_corr": -1.5497403144836426, "logits_per_char_corr": -0.8855658939906529, "bits_per_byte_corr": 1.2776015236416343}, "model_output": [{"sum_logits": -9.526785850524902, "num_tokens": 4, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.443084716796875, "logits_per_token": -2.3816964626312256, "logits_per_char": -1.360969407217843, "bits_per_byte": 1.9634638145961851, "num_chars": 7}, {"sum_logits": -6.19896125793457, "num_tokens": 4, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -15.150472640991211, "logits_per_token": -1.5497403144836426, "logits_per_char": -0.8855658939906529, "bits_per_byte": 1.2776015236416343, "num_chars": 7}, {"sum_logits": -3.784449338912964, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -15.227701187133789, "logits_per_token": -1.892224669456482, "logits_per_char": -0.630741556485494, "bits_per_byte": 0.909967715624862, "num_chars": 6}, {"sum_logits": -5.460989952087402, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -17.322948455810547, "logits_per_token": -2.730494976043701, "logits_per_char": -0.910164992014567, "bits_per_byte": 1.3130905203711023, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 277, "native_id": "NYSEDREGENTS_2010_4_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.128597259521484, "logits_per_token_corr": -3.1880995432535806, "logits_per_char_corr": -0.617051524500693, "bits_per_byte_corr": 0.8902171743707641}, "model_output": [{"sum_logits": -20.389114379882812, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.95521926879883, "logits_per_token": -3.3981857299804688, "logits_per_char": -0.5510571454022382, "bits_per_byte": 0.7950074109188086, "num_chars": 37}, {"sum_logits": -19.128597259521484, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.62405014038086, "logits_per_token": -3.1880995432535806, "logits_per_char": -0.617051524500693, "bits_per_byte": 0.8902171743707641, "num_chars": 31}, {"sum_logits": -15.089734077453613, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.97698974609375, "logits_per_token": -2.514955679575602, "logits_per_char": -0.41915927992926705, "bits_per_byte": 0.6047190144969768, "num_chars": 36}, {"sum_logits": -14.744219779968262, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.60773849487305, "logits_per_token": -2.4573699633280435, "logits_per_char": -0.4914739926656087, "bits_per_byte": 0.7090470919450819, "num_chars": 30}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 278, "native_id": "Mercury_SC_409673", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -34.245689392089844, "logits_per_token_corr": -2.6342837993915262, "logits_per_char_corr": -0.5435823713030133, "bits_per_byte_corr": 0.7842235913940839}, "model_output": [{"sum_logits": -16.60750961303711, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -28.309185028076172, "logits_per_token": -1.845278845893012, "logits_per_char": -0.4370397266588713, "bits_per_byte": 0.6305150463226747, "num_chars": 38}, {"sum_logits": -18.774768829345703, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -33.71862030029297, "logits_per_token": -2.086085425482856, "logits_per_char": -0.447018305460612, "bits_per_byte": 0.644911092475076, "num_chars": 42}, {"sum_logits": -32.701663970947266, "num_tokens": 12, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -46.63981628417969, "logits_per_token": -2.7251386642456055, "logits_per_char": -0.6055863698323568, "bits_per_byte": 0.8736764525877186, "num_chars": 54}, {"sum_logits": -34.245689392089844, "num_tokens": 13, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -44.50273513793945, "logits_per_token": -2.6342837993915262, "logits_per_char": -0.5435823713030133, "bits_per_byte": 0.7842235913940839, "num_chars": 63}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 279, "native_id": "Mercury_SC_400374", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.636545181274414, "logits_per_token_corr": -3.212181727091471, "logits_per_char_corr": -0.507186588488127, "bits_per_byte_corr": 0.731715576017738}, "model_output": [{"sum_logits": -14.822376251220703, "num_tokens": 4, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -26.652910232543945, "logits_per_token": -3.705594062805176, "logits_per_char": -0.8234653472900391, "bits_per_byte": 1.188009372880101, "num_chars": 18}, {"sum_logits": -9.636545181274414, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -26.27800178527832, "logits_per_token": -3.212181727091471, "logits_per_char": -0.507186588488127, "bits_per_byte": 0.731715576017738, "num_chars": 19}, {"sum_logits": -17.900169372558594, "num_tokens": 4, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -33.17048645019531, "logits_per_token": -4.475042343139648, "logits_per_char": -0.7458403905232748, "bits_per_byte": 1.0760202327033894, "num_chars": 24}, {"sum_logits": -15.866301536560059, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -35.069313049316406, "logits_per_token": -3.173260307312012, "logits_per_char": -0.4807970162593957, "bits_per_byte": 0.6936434710321389, "num_chars": 33}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 280, "native_id": "CSZ_2009_8_CSZ20740", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.891619682312012, "logits_per_token_corr": -2.945809841156006, "logits_per_char_corr": -1.1783239364624023, "bits_per_byte_corr": 1.6999620996962912}, "model_output": [{"sum_logits": -8.069183349609375, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -17.30475616455078, "logits_per_token": -2.0172958374023438, "logits_per_char": -1.152740478515625, "bits_per_byte": 1.6630529717876577, "num_chars": 7}, {"sum_logits": -5.891619682312012, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -17.31517791748047, "logits_per_token": -2.945809841156006, "logits_per_char": -1.1783239364624023, "bits_per_byte": 1.6999620996962912, "num_chars": 5}, {"sum_logits": -6.579619407653809, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -18.261821746826172, "logits_per_token": -3.2898097038269043, "logits_per_char": -1.096603234608968, "bits_per_byte": 1.5820640483942914, "num_chars": 6}, {"sum_logits": -6.586043357849121, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -18.398906707763672, "logits_per_token": -3.2930216789245605, "logits_per_char": -1.0976738929748535, "bits_per_byte": 1.5836086819092416, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 281, "native_id": "Mercury_SC_406482", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.301117897033691, "logits_per_token_corr": -2.0501863161722818, "logits_per_char_corr": -0.3075279474258423, "bits_per_byte_corr": 0.4436690446863433}, "model_output": [{"sum_logits": -13.791476249694824, "num_tokens": 4, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -25.055641174316406, "logits_per_token": -3.447869062423706, "logits_per_char": -0.6268852840770375, "bits_per_byte": 0.9044042905448608, "num_chars": 22}, {"sum_logits": -18.048585891723633, "num_tokens": 4, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -26.210838317871094, "logits_per_token": -4.512146472930908, "logits_per_char": -0.6941763804509089, "bits_per_byte": 1.001484821579496, "num_chars": 26}, {"sum_logits": -26.885784149169922, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -33.06081771850586, "logits_per_token": -4.480964024861653, "logits_per_char": -0.8401807546615601, "bits_per_byte": 1.2121246082014503, "num_chars": 32}, {"sum_logits": -12.301117897033691, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -34.21109390258789, "logits_per_token": -2.0501863161722818, "logits_per_char": -0.3075279474258423, "bits_per_byte": 0.4436690446863433, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 282, "native_id": "OHAT_2007_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.557819366455078, "logits_per_token_corr": -3.5697274208068848, "logits_per_char_corr": -0.7718329558501372, "bits_per_byte_corr": 1.1135195778004632}, "model_output": [{"sum_logits": -28.557819366455078, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -42.174232482910156, "logits_per_token": -3.5697274208068848, "logits_per_char": -0.7718329558501372, "bits_per_byte": 1.1135195778004632, "num_chars": 37}, {"sum_logits": -29.053407669067383, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -42.71470642089844, "logits_per_token": -3.228156407674154, "logits_per_char": -0.6603047197515314, "bits_per_byte": 0.9526183446617955, "num_chars": 44}, {"sum_logits": -28.302404403686523, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -48.71009063720703, "logits_per_token": -4.043200629098075, "logits_per_char": -0.6903025464313787, "bits_per_byte": 0.9958960604502889, "num_chars": 41}, {"sum_logits": -24.2938175201416, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -30.961040496826172, "logits_per_token": -3.4705453600202287, "logits_per_char": -0.60734543800354, "bits_per_byte": 0.8762142515148721, "num_chars": 40}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 283, "native_id": "Mercury_188335", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.703996658325195, "logits_per_token_corr": -3.5879995822906494, "logits_per_char_corr": -0.7359999143160306, "bits_per_byte_corr": 1.0618234264792021}, "model_output": [{"sum_logits": -34.351905822753906, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -45.37736511230469, "logits_per_token": -3.816878424750434, "logits_per_char": -0.7633756849500868, "bits_per_byte": 1.101318315013497, "num_chars": 45}, {"sum_logits": -28.703996658325195, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -46.266563415527344, "logits_per_token": -3.5879995822906494, "logits_per_char": -0.7359999143160306, "bits_per_byte": 1.0618234264792021, "num_chars": 39}, {"sum_logits": -23.6684513092041, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -36.47477722167969, "logits_per_token": -4.73369026184082, "logits_per_char": -0.8766093077483, "bits_per_byte": 1.2646799010864882, "num_chars": 27}, {"sum_logits": -18.204099655151367, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -32.57741165161133, "logits_per_token": -3.0340166091918945, "logits_per_char": -0.9102049827575683, "bits_per_byte": 1.3131482148177116, "num_chars": 20}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 284, "native_id": "Mercury_7128555", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.149005889892578, "logits_per_token_corr": -2.0149005889892577, "logits_per_char_corr": -0.3534913314016242, "bits_per_byte_corr": 0.5099801908107268}, "model_output": [{"sum_logits": -18.262210845947266, "num_tokens": 10, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -40.36830520629883, "logits_per_token": -1.8262210845947267, "logits_per_char": -0.326110907963344, "bits_per_byte": 0.4704785896988516, "num_chars": 56}, {"sum_logits": -20.149005889892578, "num_tokens": 10, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -41.475337982177734, "logits_per_token": -2.0149005889892577, "logits_per_char": -0.3534913314016242, "bits_per_byte": 0.5099801908107268, "num_chars": 57}, {"sum_logits": -18.18680763244629, "num_tokens": 12, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -45.49182891845703, "logits_per_token": -1.5155673027038574, "logits_per_char": -0.35660407122443705, "bits_per_byte": 0.5144709251166797, "num_chars": 51}, {"sum_logits": -19.09511375427246, "num_tokens": 11, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -36.085662841796875, "logits_per_token": -1.7359194322065874, "logits_per_char": -0.37441399518181295, "bits_per_byte": 0.5401652140886138, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 285, "native_id": "Mercury_407517", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.582447052001953, "logits_per_token_corr": -7.582447052001953, "logits_per_char_corr": -2.527482350667318, "bits_per_byte_corr": 3.646386253244739}, "model_output": [{"sum_logits": -2.921017646789551, "num_tokens": 1, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -8.11905574798584, "logits_per_token": -2.921017646789551, "logits_per_char": -0.9736725489298502, "bits_per_byte": 1.4047125577918207, "num_chars": 3}, {"sum_logits": -4.654999732971191, "num_tokens": 1, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -8.761136054992676, "logits_per_token": -4.654999732971191, "logits_per_char": -1.5516665776570637, "bits_per_byte": 2.238581676700604, "num_chars": 3}, {"sum_logits": -7.582447052001953, "num_tokens": 1, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -9.764516830444336, "logits_per_token": -7.582447052001953, "logits_per_char": -2.527482350667318, "bits_per_byte": 3.646386253244739, "num_chars": 3}, {"sum_logits": -7.146437644958496, "num_tokens": 1, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -10.375563621520996, "logits_per_token": -7.146437644958496, "logits_per_char": -2.382145881652832, "bits_per_byte": 3.4367100501370773, "num_chars": 3}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 286, "native_id": "Mercury_405950", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.075634002685547, "logits_per_token_corr": -2.3844542503356934, "logits_per_char_corr": -0.41468769571055536, "bits_per_byte_corr": 0.5982678821197195}, "model_output": [{"sum_logits": -19.797719955444336, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.906211853027344, "logits_per_token": -3.299619992574056, "logits_per_char": -0.4828712184254716, "bits_per_byte": 0.6966359122109398, "num_chars": 41}, {"sum_logits": -26.990232467651367, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -35.43589782714844, "logits_per_token": -2.9989147186279297, "logits_per_char": -0.5742602652691781, "bits_per_byte": 0.828482436884019, "num_chars": 47}, {"sum_logits": -19.075634002685547, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -31.336334228515625, "logits_per_token": -2.3844542503356934, "logits_per_char": -0.41468769571055536, "bits_per_byte": 0.5982678821197195, "num_chars": 46}, {"sum_logits": -15.239822387695312, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.747451782226562, "logits_per_token": -1.5239822387695312, "logits_per_char": -0.29882004681755514, "bits_per_byte": 0.43110619966220437, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 287, "native_id": "MCAS_2004_9_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.493732452392578, "logits_per_token_corr": -1.4367165565490723, "logits_per_char_corr": -0.3482949227997751, "bits_per_byte_corr": 0.5024833578904009}, "model_output": [{"sum_logits": -10.79229736328125, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -29.341590881347656, "logits_per_token": -1.5417567661830358, "logits_per_char": -0.37214818494073276, "bits_per_byte": 0.5368963408902097, "num_chars": 29}, {"sum_logits": -7.931967735290527, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.94733428955078, "logits_per_token": -1.3219946225484211, "logits_per_char": -0.27351612880312165, "bits_per_byte": 0.39460036262769405, "num_chars": 29}, {"sum_logits": -11.493732452392578, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -30.57801055908203, "logits_per_token": -1.4367165565490723, "logits_per_char": -0.3482949227997751, "bits_per_byte": 0.5024833578904009, "num_chars": 33}, {"sum_logits": -9.019888877868652, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.14307403564453, "logits_per_token": -1.2885555539812361, "logits_per_char": -0.2818715274333954, "bits_per_byte": 0.40665465479624907, "num_chars": 32}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 288, "native_id": "NCEOGA_2013_8_28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.812685012817383, "logits_per_token_corr": -2.302114168802897, "logits_per_char_corr": -0.600551522296408, "bits_per_byte_corr": 0.866412703015968}, "model_output": [{"sum_logits": -14.722160339355469, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -22.921510696411133, "logits_per_token": -2.453693389892578, "logits_per_char": -0.8660094317267922, "bits_per_byte": 1.2493875125162102, "num_chars": 17}, {"sum_logits": -13.812685012817383, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -21.32781982421875, "logits_per_token": -2.302114168802897, "logits_per_char": -0.600551522296408, "bits_per_byte": 0.866412703015968, "num_chars": 23}, {"sum_logits": -18.98276138305664, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -23.591806411743164, "logits_per_token": -2.711823054722377, "logits_per_char": -0.7909483909606934, "bits_per_byte": 1.141097321238917, "num_chars": 24}, {"sum_logits": -18.05457305908203, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -22.68181037902832, "logits_per_token": -2.5792247227260043, "logits_per_char": -0.5642054080963135, "bits_per_byte": 0.81397634430387, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 289, "native_id": "Mercury_SC_406451", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.65837860107422, "logits_per_token_corr": -2.241670781915838, "logits_per_char_corr": -0.4251444586392107, "bits_per_byte_corr": 0.6133538021406529}, "model_output": [{"sum_logits": -18.803524017333984, "num_tokens": 8, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -29.896053314208984, "logits_per_token": -2.350440502166748, "logits_per_char": -0.4178560892740885, "bits_per_byte": 0.6028389078014166, "num_chars": 45}, {"sum_logits": -25.2966251373291, "num_tokens": 12, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -44.13301086425781, "logits_per_token": -2.1080520947774253, "logits_per_char": -0.4599386388605291, "bits_per_byte": 0.6635511933977819, "num_chars": 55}, {"sum_logits": -24.65837860107422, "num_tokens": 11, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -38.66419982910156, "logits_per_token": -2.241670781915838, "logits_per_char": -0.4251444586392107, "bits_per_byte": 0.6133538021406529, "num_chars": 58}, {"sum_logits": -27.342378616333008, "num_tokens": 12, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -42.278785705566406, "logits_per_token": -2.278531551361084, "logits_per_char": -0.4557063102722168, "bits_per_byte": 0.6574452339320068, "num_chars": 60}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 290, "native_id": "Mercury_7109323", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -41.796730041503906, "logits_per_token_corr": -3.483060836791992, "logits_per_char_corr": -0.6146577947279986, "bits_per_byte_corr": 0.8867637522984672}, "model_output": [{"sum_logits": -21.5961971282959, "num_tokens": 7, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -35.734130859375, "logits_per_token": -3.0851710183279857, "logits_per_char": -0.5267365153242902, "bits_per_byte": 0.7599201585140329, "num_chars": 41}, {"sum_logits": -35.71689987182617, "num_tokens": 10, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -44.02074432373047, "logits_per_token": -3.571689987182617, "logits_per_char": -0.6739037711665316, "bits_per_byte": 0.9722376286990244, "num_chars": 53}, {"sum_logits": -32.83123016357422, "num_tokens": 11, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -46.51573181152344, "logits_per_token": -2.9846572875976562, "logits_per_char": -0.5759864940977933, "bits_per_byte": 0.8309728586545037, "num_chars": 57}, {"sum_logits": -41.796730041503906, "num_tokens": 12, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -54.43492126464844, "logits_per_token": -3.483060836791992, "logits_per_char": -0.6146577947279986, "bits_per_byte": 0.8867637522984672, "num_chars": 68}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 291, "native_id": "Mercury_404132", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.15327262878418, "logits_per_token_corr": -1.3764880330939042, "logits_per_char_corr": -0.8436539557672316, "bits_per_byte_corr": 1.2171353782126164}, "model_output": [{"sum_logits": -29.591064453125, "num_tokens": 13, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -47.555782318115234, "logits_per_token": -2.2762357271634617, "logits_per_char": -1.0203815328663792, "bits_per_byte": 1.4720993772820616, "num_chars": 29}, {"sum_logits": -26.15327262878418, "num_tokens": 19, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -44.501102447509766, "logits_per_token": -1.3764880330939042, "logits_per_char": -0.8436539557672316, "bits_per_byte": 1.2171353782126164, "num_chars": 31}, {"sum_logits": -25.104957580566406, "num_tokens": 12, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.61371612548828, "logits_per_token": -2.0920797983805337, "logits_per_char": -1.0460398991902669, "bits_per_byte": 1.5091165751348734, "num_chars": 24}, {"sum_logits": -16.30921173095703, "num_tokens": 15, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -32.50089645385742, "logits_per_token": -1.0872807820638022, "logits_per_char": -0.6795504887898763, "bits_per_byte": 0.9803841202115301, "num_chars": 24}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 292, "native_id": "Mercury_7210210", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.245726585388184, "logits_per_token_corr": -4.245726585388184, "logits_per_char_corr": -0.8491453170776367, "bits_per_byte_corr": 1.225057737942873}, "model_output": [{"sum_logits": -4.754141807556152, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -4.754141807556152, "logits_per_char": -0.5282379786173502, "bits_per_byte": 0.762086312161009, "num_chars": 9}, {"sum_logits": -1.7360485792160034, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": true, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -1.7360485792160034, "logits_per_char": -0.2893414298693339, "bits_per_byte": 0.4174314459965097, "num_chars": 6}, {"sum_logits": -4.245726585388184, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -4.245726585388184, "logits_per_char": -0.8491453170776367, "bits_per_byte": 1.225057737942873, "num_chars": 5}, {"sum_logits": -7.784414291381836, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.711965560913086, "logits_per_token": -7.784414291381836, "logits_per_char": -1.1120591844831194, "bits_per_byte": 1.6043622706299738, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 293, "native_id": "Mercury_SC_408042", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.413299798965454, "logits_per_token_corr": -3.413299798965454, "logits_per_char_corr": -0.4876142569950649, "bits_per_byte_corr": 0.7034786704340421}, "model_output": [{"sum_logits": -5.7523193359375, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -12.779866218566895, "logits_per_token": -5.7523193359375, "logits_per_char": -0.7190399169921875, "bits_per_byte": 1.0373553224465861, "num_chars": 8}, {"sum_logits": -3.413299798965454, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -13.562169075012207, "logits_per_token": -3.413299798965454, "logits_per_char": -0.4876142569950649, "bits_per_byte": 0.7034786704340421, "num_chars": 7}, {"sum_logits": -5.227924346923828, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -13.92944049835205, "logits_per_token": -5.227924346923828, "logits_per_char": -0.8713207244873047, "bits_per_byte": 1.2570500882425164, "num_chars": 6}, {"sum_logits": -5.239562034606934, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -15.081374168395996, "logits_per_token": -5.239562034606934, "logits_per_char": -0.8732603391011556, "bits_per_byte": 1.2598483606271569, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 294, "native_id": "MCAS_2004_8_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.0255050659179688, "logits_per_token_corr": -2.0255050659179688, "logits_per_char_corr": -0.3375841776529948, "bits_per_byte_corr": 0.48703101898290435}, "model_output": [{"sum_logits": -3.765488624572754, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.882744312286377, "logits_per_char": -0.25103257497151693, "bits_per_byte": 0.3621634510132546, "num_chars": 15}, {"sum_logits": -3.9379377365112305, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -16.243860244750977, "logits_per_token": -1.9689688682556152, "logits_per_char": -0.32816147804260254, "bits_per_byte": 0.4734369369831953, "num_chars": 12}, {"sum_logits": -2.0255050659179688, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -12.671343803405762, "logits_per_token": -2.0255050659179688, "logits_per_char": -0.3375841776529948, "bits_per_byte": 0.48703101898290435, "num_chars": 6}, {"sum_logits": -8.210583686828613, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -10.223247528076172, "logits_per_token": -8.210583686828613, "logits_per_char": -2.0526459217071533, "bits_per_byte": 2.961342091949993, "num_chars": 4}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 295, "native_id": "TIMSS_2011_4_pg5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.117603302001953, "logits_per_token_corr": -4.117603302001953, "logits_per_char_corr": -0.8235206604003906, "bits_per_byte_corr": 1.1880891728301013}, "model_output": [{"sum_logits": -6.724538803100586, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.735373497009277, "logits_per_token": -6.724538803100586, "logits_per_char": -1.3449077606201172, "bits_per_byte": 1.9402917567011182, "num_chars": 5}, {"sum_logits": -4.117603302001953, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.196450233459473, "logits_per_token": -4.117603302001953, "logits_per_char": -0.8235206604003906, "bits_per_byte": 1.1880891728301013, "num_chars": 5}, {"sum_logits": -9.030838966369629, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.8252534866333, "logits_per_token": -9.030838966369629, "logits_per_char": -2.2577097415924072, "bits_per_byte": 3.257186647964409, "num_chars": 4}, {"sum_logits": -10.031065940856934, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.59682846069336, "logits_per_token": -10.031065940856934, "logits_per_char": -2.0062131881713867, "bits_per_byte": 2.8943538175429757, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 296, "native_id": "Mercury_SC_406833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.01251983642578, "logits_per_token_corr": -2.858931405203683, "logits_per_char_corr": -0.6455651560137349, "bits_per_byte_corr": 0.9313536491523944}, "model_output": [{"sum_logits": -25.43896484375, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.03475570678711, "logits_per_token": -3.6341378348214284, "logits_per_char": -0.9085344587053571, "bits_per_byte": 1.310738158051899, "num_chars": 28}, {"sum_logits": -18.326461791992188, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -20.594051361083984, "logits_per_token": -3.0544102986653647, "logits_per_char": -0.6545164925711495, "bits_per_byte": 0.9442676980131138, "num_chars": 28}, {"sum_logits": -20.01251983642578, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -24.598941802978516, "logits_per_token": -2.858931405203683, "logits_per_char": -0.6455651560137349, "bits_per_byte": 0.9313536491523944, "num_chars": 31}, {"sum_logits": -28.470008850097656, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -30.204517364501953, "logits_per_token": -4.745001475016276, "logits_per_char": -0.9183873822612147, "bits_per_byte": 1.3249529220042031, "num_chars": 31}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 297, "native_id": "Mercury_7029558", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.386402130126953, "logits_per_token_corr": -3.0966005325317383, "logits_per_char_corr": -0.5898286728631883, "bits_per_byte_corr": 0.8509429013144518}, "model_output": [{"sum_logits": -6.6234588623046875, "num_tokens": 1, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -15.006959915161133, "logits_per_token": -6.6234588623046875, "logits_per_char": -0.7359398735894097, "bits_per_byte": 1.061736806020655, "num_chars": 9}, {"sum_logits": -7.476171970367432, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -19.371938705444336, "logits_per_token": -3.738085985183716, "logits_per_char": -0.6796519973061301, "bits_per_byte": 0.9805305660445375, "num_chars": 11}, {"sum_logits": -12.386402130126953, "num_tokens": 4, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -28.577634811401367, "logits_per_token": -3.0966005325317383, "logits_per_char": -0.5898286728631883, "bits_per_byte": 0.8509429013144518, "num_chars": 21}, {"sum_logits": -13.408527374267578, "num_tokens": 4, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -29.420303344726562, "logits_per_token": -3.3521318435668945, "logits_per_char": -0.6094785170121626, "bits_per_byte": 0.8792916340224384, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 298, "native_id": "Mercury_7138390", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.62542724609375, "logits_per_token_corr": -1.4531784057617188, "logits_per_char_corr": -0.3229285346137153, "bits_per_byte_corr": 0.4658873954490817}, "model_output": [{"sum_logits": -15.408851623535156, "num_tokens": 8, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -27.6943359375, "logits_per_token": -1.9261064529418945, "logits_per_char": -0.4054960953561883, "bits_per_byte": 0.5850072058706314, "num_chars": 38}, {"sum_logits": -11.62542724609375, "num_tokens": 8, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -26.705795288085938, "logits_per_token": -1.4531784057617188, "logits_per_char": -0.3229285346137153, "bits_per_byte": 0.4658873954490817, "num_chars": 36}, {"sum_logits": -10.345620155334473, "num_tokens": 7, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -27.624555587768555, "logits_per_token": -1.4779457364763533, "logits_per_char": -0.2722531619824861, "bits_per_byte": 0.3927782866587546, "num_chars": 38}, {"sum_logits": -11.655543327331543, "num_tokens": 7, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -28.201683044433594, "logits_per_token": -1.6650776181902205, "logits_per_char": -0.3237650924258762, "bits_per_byte": 0.4670942932561041, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 299, "native_id": "MEAP_2005_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.630708694458008, "logits_per_token_corr": -2.181189854939779, "logits_per_char_corr": -0.4907677173614502, "bits_per_byte_corr": 0.7080281520662693}, "model_output": [{"sum_logits": -19.630708694458008, "num_tokens": 9, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -41.161293029785156, "logits_per_token": -2.181189854939779, "logits_per_char": -0.4907677173614502, "bits_per_byte": 0.7080281520662693, "num_chars": 40}, {"sum_logits": -16.898284912109375, "num_tokens": 10, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -35.33955764770508, "logits_per_token": -1.6898284912109376, "logits_per_char": -0.34486295738998723, "bits_per_byte": 0.4975320784131939, "num_chars": 49}, {"sum_logits": -27.92466163635254, "num_tokens": 17, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -52.48216247558594, "logits_per_token": -1.642627155079561, "logits_per_char": -0.36742975837305974, "bits_per_byte": 0.5300890902802242, "num_chars": 76}, {"sum_logits": -25.78624153137207, "num_tokens": 20, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -49.98857116699219, "logits_per_token": -1.2893120765686035, "logits_per_char": -0.3144663601386838, "bits_per_byte": 0.45367905829880784, "num_chars": 82}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 300, "native_id": "MCAS_2000_4_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.140172958374023, "logits_per_token_corr": -3.785043239593506, "logits_per_char_corr": -1.261681079864502, "bits_per_byte_corr": 1.820221037105257}, "model_output": [{"sum_logits": -6.281868934631348, "num_tokens": 1, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -14.295531272888184, "logits_per_token": -6.281868934631348, "logits_per_char": -0.8974098478044782, "bits_per_byte": 1.2946887370733702, "num_chars": 7}, {"sum_logits": -5.906267166137695, "num_tokens": 1, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -11.50367259979248, "logits_per_token": -5.906267166137695, "logits_per_char": -0.6562519073486328, "bits_per_byte": 0.9467713723064762, "num_chars": 9}, {"sum_logits": -6.050206184387207, "num_tokens": 1, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -13.734278678894043, "logits_per_token": -6.050206184387207, "logits_per_char": -0.6050206184387207, "bits_per_byte": 0.8728602458577431, "num_chars": 10}, {"sum_logits": -15.140172958374023, "num_tokens": 4, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -17.49211311340332, "logits_per_token": -3.785043239593506, "logits_per_char": -1.261681079864502, "bits_per_byte": 1.820221037105257, "num_chars": 12}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 301, "native_id": "MCAS_1998_4_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.597347259521484, "logits_per_token_corr": -1.5088497508655896, "logits_per_char_corr": -0.38598481998887174, "bits_per_byte_corr": 0.5568583856567646}, "model_output": [{"sum_logits": -23.033742904663086, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -32.86443328857422, "logits_per_token": -2.5593047671847873, "logits_per_char": -0.8226336751665387, "bits_per_byte": 1.1868095236318805, "num_chars": 28}, {"sum_logits": -27.759349822998047, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -40.73014831542969, "logits_per_token": -2.523577256636186, "logits_per_char": -0.895462897516066, "bits_per_byte": 1.2918798815474186, "num_chars": 31}, {"sum_logits": -16.597347259521484, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -39.64264678955078, "logits_per_token": -1.5088497508655896, "logits_per_char": -0.38598481998887174, "bits_per_byte": 0.5568583856567646, "num_chars": 43}, {"sum_logits": -31.000551223754883, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -36.14435577392578, "logits_per_token": -3.100055122375488, "logits_per_char": -1.0333517074584961, "bits_per_byte": 1.4908113838455863, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 302, "native_id": "Mercury_175840", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.997152328491211, "logits_per_token_corr": -2.9992880821228027, "logits_per_char_corr": -0.4443389751293041, "bits_per_byte_corr": 0.641045635893192}, "model_output": [{"sum_logits": -12.379805564880371, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.730287551879883, "logits_per_token": -3.0949513912200928, "logits_per_char": -0.45851131721779154, "bits_per_byte": 0.6614920035420495, "num_chars": 27}, {"sum_logits": -11.997152328491211, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.566326141357422, "logits_per_token": -2.9992880821228027, "logits_per_char": -0.4443389751293041, "bits_per_byte": 0.641045635893192, "num_chars": 27}, {"sum_logits": -16.9151668548584, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -35.04581832885742, "logits_per_token": -2.416452407836914, "logits_per_char": -0.5125808137835879, "bits_per_byte": 0.7394977981009427, "num_chars": 33}, {"sum_logits": -13.160097122192383, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -27.15297508239746, "logits_per_token": -2.6320194244384765, "logits_per_char": -0.4874110045256438, "bits_per_byte": 0.7031854391043596, "num_chars": 27}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 303, "native_id": "Mercury_7099190", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.733694076538086, "logits_per_token_corr": -2.0917117595672607, "logits_per_char_corr": -0.3281116485595703, "bits_per_byte_corr": 0.4733650482351346}, "model_output": [{"sum_logits": -18.75897216796875, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -34.51472473144531, "logits_per_token": -2.6798531668526784, "logits_per_char": -0.4362551666969477, "bits_per_byte": 0.6293831655563267, "num_chars": 43}, {"sum_logits": -19.69225311279297, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -36.099918365478516, "logits_per_token": -2.8131790161132812, "logits_per_char": -0.43760562472873266, "bits_per_byte": 0.6313314646617129, "num_chars": 45}, {"sum_logits": -16.733694076538086, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -40.40912628173828, "logits_per_token": -2.0917117595672607, "logits_per_char": -0.3281116485595703, "bits_per_byte": 0.4733650482351346, "num_chars": 51}, {"sum_logits": -17.126005172729492, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -37.357608795166016, "logits_per_token": -2.4465721675327847, "logits_per_char": -0.34252010345458983, "bits_per_byte": 0.4941520546590665, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 304, "native_id": "Mercury_SC_401605", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.97825813293457, "logits_per_token_corr": -3.496376355489095, "logits_per_char_corr": -0.6767180042882119, "bits_per_byte_corr": 0.976297708867581}, "model_output": [{"sum_logits": -20.97825813293457, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.400516510009766, "logits_per_token": -3.496376355489095, "logits_per_char": -0.6767180042882119, "bits_per_byte": 0.976297708867581, "num_chars": 31}, {"sum_logits": -18.505529403686523, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.46227264404297, "logits_per_token": -3.0842549006144204, "logits_per_char": -0.5442802765790153, "bits_per_byte": 0.785230455874783, "num_chars": 34}, {"sum_logits": -19.635028839111328, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.08332061767578, "logits_per_token": -2.805004119873047, "logits_per_char": -0.5610008239746094, "bits_per_byte": 0.8093531066833727, "num_chars": 35}, {"sum_logits": -34.791236877441406, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -47.05701446533203, "logits_per_token": -5.798539479573567, "logits_per_char": -0.8697809219360352, "bits_per_byte": 1.25482862273785, "num_chars": 40}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 305, "native_id": "TAKS_2009_5_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.8627281188964844, "logits_per_token_corr": -2.8627281188964844, "logits_per_char_corr": -0.47712135314941406, "bits_per_byte_corr": 0.688340610091386}, "model_output": [{"sum_logits": -2.8627281188964844, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -13.454791069030762, "logits_per_token": -2.8627281188964844, "logits_per_char": -0.47712135314941406, "bits_per_byte": 0.688340610091386, "num_chars": 6}, {"sum_logits": -5.262542724609375, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.241479873657227, "logits_per_token": -5.262542724609375, "logits_per_char": -1.052508544921875, "bits_per_byte": 1.5184488582531386, "num_chars": 5}, {"sum_logits": -9.578584671020508, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -17.431182861328125, "logits_per_token": -4.789292335510254, "logits_per_char": -1.9157169342041016, "bits_per_byte": 2.7637953207252517, "num_chars": 5}, {"sum_logits": -6.709292411804199, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.653677940368652, "logits_per_token": -6.709292411804199, "logits_per_char": -0.8386615514755249, "bits_per_byte": 1.2099328612988531, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 306, "native_id": "Mercury_7171570", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.1309614181518555, "logits_per_token_corr": -2.0654807090759277, "logits_per_char_corr": -0.2753974278767904, "bits_per_byte_corr": 0.3973145034717069}, "model_output": [{"sum_logits": -3.146749973297119, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -17.77805519104004, "logits_per_token": -1.0489166577657063, "logits_per_char": -0.1656184196472168, "bits_per_byte": 0.2389368727050786, "num_chars": 19}, {"sum_logits": -5.246869087219238, "num_tokens": 4, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -20.099563598632812, "logits_per_token": -1.3117172718048096, "logits_per_char": -0.3086393580717199, "bits_per_byte": 0.44527247131354325, "num_chars": 17}, {"sum_logits": -5.3214030265808105, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.7699556350708, "logits_per_token": -2.6607015132904053, "logits_per_char": -0.3801002161843436, "bits_per_byte": 0.5483686969303694, "num_chars": 14}, {"sum_logits": -4.1309614181518555, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -17.078739166259766, "logits_per_token": -2.0654807090759277, "logits_per_char": -0.2753974278767904, "bits_per_byte": 0.3973145034717069, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 307, "native_id": "Mercury_SC_402057", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.976397514343262, "logits_per_token_corr": -1.9960662523905437, "logits_per_char_corr": -0.4435702783090097, "bits_per_byte_corr": 0.6399366408026054}, "model_output": [{"sum_logits": -6.75174617767334, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -15.688955307006836, "logits_per_token": -3.37587308883667, "logits_per_char": -0.7501940197414823, "bits_per_byte": 1.0823011919863712, "num_chars": 9}, {"sum_logits": -4.349607944488525, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -15.52852725982666, "logits_per_token": -2.1748039722442627, "logits_per_char": -0.43496079444885255, "bits_per_byte": 0.6275157811329342, "num_chars": 10}, {"sum_logits": -11.976397514343262, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.747013092041016, "logits_per_token": -1.9960662523905437, "logits_per_char": -0.4435702783090097, "bits_per_byte": 0.6399366408026054, "num_chars": 27}, {"sum_logits": -8.74923038482666, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -27.478912353515625, "logits_per_token": -2.187307596206665, "logits_per_char": -0.29164101282755533, "bits_per_byte": 0.42074904292645093, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 308, "native_id": "Mercury_SC_413628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.628232955932617, "logits_per_token_corr": -2.4380388259887695, "logits_per_char_corr": -0.5626243444589468, "bits_per_byte_corr": 0.8116953516349097}, "model_output": [{"sum_logits": -20.59139633178711, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -34.61212158203125, "logits_per_token": -3.431899388631185, "logits_per_char": -0.6434811353683472, "bits_per_byte": 0.9283470429021812, "num_chars": 32}, {"sum_logits": -16.397533416748047, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -31.68308448791504, "logits_per_token": -3.2795066833496094, "logits_per_char": -0.5856261934552874, "bits_per_byte": 0.8448800051132309, "num_chars": 28}, {"sum_logits": -14.628232955932617, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -27.722370147705078, "logits_per_token": -2.4380388259887695, "logits_per_char": -0.5626243444589468, "bits_per_byte": 0.8116953516349097, "num_chars": 26}, {"sum_logits": -12.826644897460938, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.029178619384766, "logits_per_token": -2.5653289794921874, "logits_per_char": -0.5576802129330842, "bits_per_byte": 0.8045624776010398, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 309, "native_id": "Mercury_LBS10131", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.605199933052063, "logits_per_token_corr": -1.605199933052063, "logits_per_char_corr": -0.14592726664109665, "bits_per_byte_corr": 0.21052854391374284}, "model_output": [{"sum_logits": -3.5220203399658203, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -13.859761238098145, "logits_per_token": -3.5220203399658203, "logits_per_char": -0.352202033996582, "bits_per_byte": 0.50812012783824, "num_chars": 10}, {"sum_logits": -5.996901512145996, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.965056419372559, "logits_per_token": -5.996901512145996, "logits_per_char": -0.545172864740545, "bits_per_byte": 0.786518188388979, "num_chars": 11}, {"sum_logits": -1.605199933052063, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": true, "sum_logits_uncond": -15.479575157165527, "logits_per_token": -1.605199933052063, "logits_per_char": -0.14592726664109665, "bits_per_byte": 0.21052854391374284, "num_chars": 11}, {"sum_logits": -9.110928535461426, "num_tokens": 2, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -17.724884033203125, "logits_per_token": -4.555464267730713, "logits_per_char": -0.7008406565739558, "bits_per_byte": 1.0110993396933374, "num_chars": 13}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 310, "native_id": "Mercury_7032428", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.729124069213867, "logits_per_token_corr": -1.9548540115356445, "logits_per_char_corr": -0.3909708023071289, "bits_per_byte_corr": 0.5640516376212794}, "model_output": [{"sum_logits": -11.404504776000977, "num_tokens": 6, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -22.336402893066406, "logits_per_token": -1.9007507960001628, "logits_per_char": -0.42238906577781393, "bits_per_byte": 0.6093786105238121, "num_chars": 27}, {"sum_logits": -11.729124069213867, "num_tokens": 6, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -18.92633819580078, "logits_per_token": -1.9548540115356445, "logits_per_char": -0.3909708023071289, "bits_per_byte": 0.5640516376212794, "num_chars": 30}, {"sum_logits": -16.668373107910156, "num_tokens": 6, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -28.73818016052246, "logits_per_token": -2.778062184651693, "logits_per_char": -0.5208866596221924, "bits_per_byte": 0.7514806007026943, "num_chars": 32}, {"sum_logits": -17.656002044677734, "num_tokens": 9, "num_tokens_all": 281, "is_greedy": false, "sum_logits_uncond": -31.350276947021484, "logits_per_token": -1.9617780049641926, "logits_per_char": -0.44140005111694336, "bits_per_byte": 0.6368056647950067, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 311, "native_id": "Mercury_7025008", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.211223602294922, "logits_per_token_corr": -2.6514029502868652, "logits_per_char_corr": -0.6628507375717163, "bits_per_byte_corr": 0.9562914719449939}, "model_output": [{"sum_logits": -21.211223602294922, "num_tokens": 8, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -31.50475311279297, "logits_per_token": -2.6514029502868652, "logits_per_char": -0.6628507375717163, "bits_per_byte": 0.9562914719449939, "num_chars": 32}, {"sum_logits": -15.557326316833496, "num_tokens": 8, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -27.515439987182617, "logits_per_token": -1.944665789604187, "logits_per_char": -0.3617982864379883, "bits_per_byte": 0.5219645936465854, "num_chars": 43}, {"sum_logits": -36.416648864746094, "num_tokens": 10, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -46.89115905761719, "logits_per_token": -3.6416648864746093, "logits_per_char": -0.7916662796683933, "bits_per_byte": 1.1421330157174268, "num_chars": 46}, {"sum_logits": -26.906551361083984, "num_tokens": 9, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -33.62228775024414, "logits_per_token": -2.9896168178982205, "logits_per_char": -0.5849250295887822, "bits_per_byte": 0.8438684394801725, "num_chars": 46}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 312, "native_id": "MEA_2011_8_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.857518196105957, "logits_per_token_corr": -6.857518196105957, "logits_per_char_corr": -1.7143795490264893, "bits_per_byte_corr": 2.4733268735837504}, "model_output": [{"sum_logits": -13.36708927154541, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -13.62519359588623, "logits_per_token": -13.36708927154541, "logits_per_char": -1.6708861589431763, "bits_per_byte": 2.4105791753990604, "num_chars": 8}, {"sum_logits": -6.474217414855957, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -11.467022895812988, "logits_per_token": -6.474217414855957, "logits_per_char": -1.0790362358093262, "bits_per_byte": 1.5567202263427273, "num_chars": 6}, {"sum_logits": -8.730277061462402, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -12.272923469543457, "logits_per_token": -8.730277061462402, "logits_per_char": -1.7460554122924805, "bits_per_byte": 2.519025484433506, "num_chars": 5}, {"sum_logits": -6.857518196105957, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -10.820269584655762, "logits_per_token": -6.857518196105957, "logits_per_char": -1.7143795490264893, "bits_per_byte": 2.4733268735837504, "num_chars": 4}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 313, "native_id": "NYSEDREGENTS_2008_8_27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.349437713623047, "logits_per_token_corr": -4.174718856811523, "logits_per_char_corr": -1.6698875427246094, "bits_per_byte_corr": 2.4091384767327817}, "model_output": [{"sum_logits": -7.404027938842773, "num_tokens": 2, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -14.437345504760742, "logits_per_token": -3.7020139694213867, "logits_per_char": -1.8510069847106934, "bits_per_byte": 2.670438597494869, "num_chars": 4}, {"sum_logits": -5.541206359863281, "num_tokens": 2, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -15.24575424194336, "logits_per_token": -2.7706031799316406, "logits_per_char": -1.1082412719726562, "bits_per_byte": 1.5988541871845767, "num_chars": 5}, {"sum_logits": -8.349437713623047, "num_tokens": 2, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -16.530860900878906, "logits_per_token": -4.174718856811523, "logits_per_char": -1.6698875427246094, "bits_per_byte": 2.4091384767327817, "num_chars": 5}, {"sum_logits": -8.73172378540039, "num_tokens": 2, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -16.364322662353516, "logits_per_token": -4.365861892700195, "logits_per_char": -1.746344757080078, "bits_per_byte": 2.51944292072368, "num_chars": 5}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 314, "native_id": "VASoL_2007_5_22", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.426875114440918, "logits_per_token_corr": -1.5533593893051147, "logits_per_char_corr": -0.36549632689532113, "bits_per_byte_corr": 0.5272997382753901}, "model_output": [{"sum_logits": -16.26881980895996, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -38.64457321166992, "logits_per_token": -2.033602476119995, "logits_per_char": -0.49299453966545337, "bits_per_byte": 0.711240777561198, "num_chars": 33}, {"sum_logits": -12.426875114440918, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -33.470340728759766, "logits_per_token": -1.5533593893051147, "logits_per_char": -0.36549632689532113, "bits_per_byte": 0.5272997382753901, "num_chars": 34}, {"sum_logits": -8.547901153564453, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -32.045265197753906, "logits_per_token": -1.0684876441955566, "logits_per_char": -0.2374416987101237, "bits_per_byte": 0.3425559612295929, "num_chars": 36}, {"sum_logits": -13.390348434448242, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -36.163692474365234, "logits_per_token": -1.6737935543060303, "logits_per_char": -0.4057681343772195, "bits_per_byte": 0.5853996752172016, "num_chars": 33}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 315, "native_id": "NCEOGA_2013_5_19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.391507148742676, "logits_per_token_corr": -2.195753574371338, "logits_per_char_corr": -0.3992279226129705, "bits_per_byte_corr": 0.5759641441385492}, "model_output": [{"sum_logits": -4.391507148742676, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -18.295711517333984, "logits_per_token": -2.195753574371338, "logits_per_char": -0.3992279226129705, "bits_per_byte": 0.5759641441385492, "num_chars": 11}, {"sum_logits": -11.411794662475586, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -29.006237030029297, "logits_per_token": -1.9019657770792644, "logits_per_char": -0.4075640950884138, "bits_per_byte": 0.5879906988288749, "num_chars": 28}, {"sum_logits": -2.945021629333496, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": true, "sum_logits_uncond": -19.55733871459961, "logits_per_token": -1.472510814666748, "logits_per_char": -0.17323656643138213, "bits_per_byte": 0.24992753529136602, "num_chars": 17}, {"sum_logits": -6.62384033203125, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -29.122390747070312, "logits_per_token": -1.103973388671875, "logits_per_char": -0.220794677734375, "bits_per_byte": 0.3185393866222885, "num_chars": 30}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 316, "native_id": "Mercury_7037555", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.23662567138672, "logits_per_token_corr": -4.206104278564453, "logits_per_char_corr": -0.5486222972040591, "bits_per_byte_corr": 0.7914946674979757}, "model_output": [{"sum_logits": -22.47762107849121, "num_tokens": 5, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -33.30097579956055, "logits_per_token": -4.495524215698242, "logits_per_char": -0.7024256587028503, "bits_per_byte": 1.0133860144044937, "num_chars": 32}, {"sum_logits": -15.013893127441406, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -37.67219543457031, "logits_per_token": -1.8767366409301758, "logits_per_char": -0.349160305289335, "bits_per_byte": 0.503731840916562, "num_chars": 43}, {"sum_logits": -25.23662567138672, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -37.301979064941406, "logits_per_token": -4.206104278564453, "logits_per_char": -0.5486222972040591, "bits_per_byte": 0.7914946674979757, "num_chars": 46}, {"sum_logits": -17.30916404724121, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -43.658164978027344, "logits_per_token": -2.1636455059051514, "logits_per_char": -0.3036695446884423, "bits_per_byte": 0.4381025461913399, "num_chars": 57}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 317, "native_id": "Mercury_402132", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.28823471069336, "logits_per_token_corr": -3.254248301188151, "logits_per_char_corr": -0.5977190757284359, "bits_per_byte_corr": 0.8623263463987687}, "model_output": [{"sum_logits": -26.488147735595703, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -44.234619140625, "logits_per_token": -3.7840211050851003, "logits_per_char": -0.5758292985999066, "bits_per_byte": 0.8307460734892522, "num_chars": 46}, {"sum_logits": -29.28823471069336, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -42.45228576660156, "logits_per_token": -3.254248301188151, "logits_per_char": -0.5977190757284359, "bits_per_byte": 0.8623263463987687, "num_chars": 49}, {"sum_logits": -24.724224090576172, "num_tokens": 10, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -49.59291076660156, "logits_per_token": -2.472422409057617, "logits_per_char": -0.5260473210760888, "bits_per_byte": 0.7589258613899428, "num_chars": 47}, {"sum_logits": -41.5614013671875, "num_tokens": 12, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -58.682289123535156, "logits_per_token": -3.4634501139322915, "logits_per_char": -0.7421678815569196, "bits_per_byte": 1.0707219222300046, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 318, "native_id": "MCAS_2006_8_24", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.014419555664062, "logits_per_token_corr": -1.734294637044271, "logits_per_char_corr": -0.34685892740885416, "bits_per_byte_corr": 0.5004116544611783}, "model_output": [{"sum_logits": -26.014419555664062, "num_tokens": 15, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -44.52720642089844, "logits_per_token": -1.734294637044271, "logits_per_char": -0.34685892740885416, "bits_per_byte": 0.5004116544611783, "num_chars": 75}, {"sum_logits": -18.892988204956055, "num_tokens": 17, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -38.44239807128906, "logits_per_token": -1.111352247350356, "logits_per_char": -0.27381142326023267, "bits_per_byte": 0.39502638247657046, "num_chars": 69}, {"sum_logits": -16.126901626586914, "num_tokens": 17, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -35.503692626953125, "logits_per_token": -0.9486412721521714, "logits_per_char": -0.2209164606381769, "bits_per_byte": 0.3187150822136687, "num_chars": 73}, {"sum_logits": -17.292837142944336, "num_tokens": 17, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -37.067169189453125, "logits_per_token": -1.0172257142908432, "logits_per_char": -0.22170304029415816, "bits_per_byte": 0.3198498767826178, "num_chars": 78}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 319, "native_id": "Mercury_7128923", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.7297749519348145, "logits_per_token_corr": -5.7297749519348145, "logits_per_char_corr": -1.1459549903869628, "bits_per_byte_corr": 1.6532635817144188}, "model_output": [{"sum_logits": -5.3603835105896, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.703452110290527, "logits_per_token": -5.3603835105896, "logits_per_char": -1.0720767021179198, "bits_per_byte": 1.5466797415992286, "num_chars": 5}, {"sum_logits": -5.7297749519348145, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.895293235778809, "logits_per_token": -5.7297749519348145, "logits_per_char": -1.1459549903869628, "bits_per_byte": 1.6532635817144188, "num_chars": 5}, {"sum_logits": -6.933618068695068, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.730541229248047, "logits_per_token": -6.933618068695068, "logits_per_char": -1.155603011449178, "bits_per_byte": 1.667182733855279, "num_chars": 6}, {"sum_logits": -1.9060150384902954, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": true, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -1.9060150384902954, "logits_per_char": -0.3176691730817159, "bits_per_byte": 0.45829974064861856, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 320, "native_id": "Mercury_416379", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.188114166259766, "logits_per_token_corr": -2.354234907362196, "logits_per_char_corr": -0.378359181540353, "bits_per_byte_corr": 0.5458569148834664}, "model_output": [{"sum_logits": -21.188114166259766, "num_tokens": 9, "num_tokens_all": 251, "is_greedy": false, "sum_logits_uncond": -36.94356918334961, "logits_per_token": -2.354234907362196, "logits_per_char": -0.378359181540353, "bits_per_byte": 0.5458569148834664, "num_chars": 56}, {"sum_logits": -26.241561889648438, "num_tokens": 9, "num_tokens_all": 251, "is_greedy": false, "sum_logits_uncond": -43.90220260620117, "logits_per_token": -2.9157290988498263, "logits_per_char": -0.4859548498083044, "bits_per_byte": 0.7010846519148854, "num_chars": 54}, {"sum_logits": -15.508441925048828, "num_tokens": 7, "num_tokens_all": 249, "is_greedy": false, "sum_logits_uncond": -32.17140579223633, "logits_per_token": -2.215491703578404, "logits_per_char": -0.329966849469124, "bits_per_byte": 0.47604153738720234, "num_chars": 47}, {"sum_logits": -24.68279266357422, "num_tokens": 6, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -40.1296272277832, "logits_per_token": -4.11379877726237, "logits_per_char": -0.5609725605357777, "bits_per_byte": 0.8093123311603317, "num_chars": 44}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 321, "native_id": "Mercury_7168053", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.242457389831543, "logits_per_token_corr": -3.1212286949157715, "logits_per_char_corr": -0.5674961263483221, "bits_per_byte_corr": 0.8187238472070092}, "model_output": [{"sum_logits": -3.2473297119140625, "num_tokens": 1, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -15.851914405822754, "logits_per_token": -3.2473297119140625, "logits_per_char": -0.4059162139892578, "bits_per_byte": 0.5856133089391462, "num_chars": 8}, {"sum_logits": -5.802148818969727, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.09860610961914, "logits_per_token": -1.9340496063232422, "logits_per_char": -0.48351240158081055, "bits_per_byte": 0.6975609439694496, "num_chars": 12}, {"sum_logits": -6.242457389831543, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.235103607177734, "logits_per_token": -3.1212286949157715, "logits_per_char": -0.5674961263483221, "bits_per_byte": 0.8187238472070092, "num_chars": 11}, {"sum_logits": -8.812728881835938, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -17.292001724243164, "logits_per_token": -4.406364440917969, "logits_per_char": -0.6294806344168526, "bits_per_byte": 0.9081485896094843, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 322, "native_id": "AKDE&ED_2008_8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.641782760620117, "logits_per_token_corr": -2.7283565521240236, "logits_per_char_corr": -0.36869683136811127, "bits_per_byte_corr": 0.5319170902066307}, "model_output": [{"sum_logits": -13.641782760620117, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.061296463012695, "logits_per_token": -2.7283565521240236, "logits_per_char": -0.36869683136811127, "bits_per_byte": 0.5319170902066307, "num_chars": 37}, {"sum_logits": -15.899395942687988, "num_tokens": 4, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -28.77241325378418, "logits_per_token": -3.974848985671997, "logits_per_char": -0.40767681904328174, "bits_per_byte": 0.5881533251195524, "num_chars": 39}, {"sum_logits": -25.56580924987793, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -38.21142578125, "logits_per_token": -3.6522584642682756, "logits_per_char": -0.5439533882952751, "bits_per_byte": 0.7847588557689058, "num_chars": 47}, {"sum_logits": -27.029489517211914, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -42.1316032409668, "logits_per_token": -3.003276613023546, "logits_per_char": -0.5197978753309983, "bits_per_byte": 0.7499098170051897, "num_chars": 52}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 323, "native_id": "Mercury_SC_415476", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.521066665649414, "logits_per_token_corr": -3.503009523664202, "logits_per_char_corr": -0.6811407407124838, "bits_per_byte_corr": 0.9826783687740416}, "model_output": [{"sum_logits": -22.374378204345703, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -38.58281707763672, "logits_per_token": -2.486042022705078, "logits_per_char": -0.6047129244417757, "bits_per_byte": 0.8724163372542391, "num_chars": 37}, {"sum_logits": -24.521066665649414, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -39.844696044921875, "logits_per_token": -3.503009523664202, "logits_per_char": -0.6811407407124838, "bits_per_byte": 0.9826783687740416, "num_chars": 36}, {"sum_logits": -25.774036407470703, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.61394500732422, "logits_per_token": -5.154807281494141, "logits_per_char": -0.8314205292732485, "bits_per_byte": 1.1994862744766546, "num_chars": 31}, {"sum_logits": -18.893497467041016, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -32.54026794433594, "logits_per_token": -3.148916244506836, "logits_per_char": -0.6094676602271295, "bits_per_byte": 0.8792759709925112, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 324, "native_id": "Mercury_7106960", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.5178210735321045, "logits_per_token_corr": -2.5178210735321045, "logits_per_char_corr": -0.5035642147064209, "bits_per_byte_corr": 0.7264895953266205}, "model_output": [{"sum_logits": -2.5178210735321045, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -2.5178210735321045, "logits_per_char": -0.5035642147064209, "bits_per_byte": 0.7264895953266205, "num_chars": 5}, {"sum_logits": -4.853896141052246, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -13.36142349243164, "logits_per_token": -4.853896141052246, "logits_per_char": -0.8089826901753744, "bits_per_byte": 1.1671153152818639, "num_chars": 6}, {"sum_logits": -4.5214738845825195, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -4.5214738845825195, "logits_per_char": -0.7535789807637533, "bits_per_byte": 1.0871846584668075, "num_chars": 6}, {"sum_logits": -5.937864303588867, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -13.27519702911377, "logits_per_token": -5.937864303588867, "logits_per_char": -1.1875728607177733, "bits_per_byte": 1.7133054768530822, "num_chars": 5}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 325, "native_id": "Mercury_7160563", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -37.19145202636719, "logits_per_token_corr": -3.099287668863932, "logits_per_char_corr": -0.7292441573797488, "bits_per_byte_corr": 1.0520769294497703}, "model_output": [{"sum_logits": -21.37741470336914, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -36.417503356933594, "logits_per_token": -4.275482940673828, "logits_per_char": -0.5938170750935873, "bits_per_byte": 0.8566969494333231, "num_chars": 36}, {"sum_logits": -28.90008544921875, "num_tokens": 8, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -38.44304656982422, "logits_per_token": -3.6125106811523438, "logits_per_char": -0.64222412109375, "bits_per_byte": 0.9265335546418919, "num_chars": 45}, {"sum_logits": -37.19145202636719, "num_tokens": 12, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -50.05571746826172, "logits_per_token": -3.099287668863932, "logits_per_char": -0.7292441573797488, "bits_per_byte": 1.0520769294497703, "num_chars": 51}, {"sum_logits": -32.25330352783203, "num_tokens": 12, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -46.58053970336914, "logits_per_token": -2.6877752939860025, "logits_per_char": -0.5375550587972006, "bits_per_byte": 0.7755280175320536, "num_chars": 60}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 326, "native_id": "Mercury_7068583", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.445919990539551, "logits_per_token_corr": -2.7229599952697754, "logits_per_char_corr": -0.32034823473762064, "bits_per_byte_corr": 0.4621648096138309}, "model_output": [{"sum_logits": -14.4233980178833, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.374576568603516, "logits_per_token": -7.21169900894165, "logits_per_char": -1.0302427155630929, "bits_per_byte": 1.4863260566559209, "num_chars": 14}, {"sum_logits": -5.445919990539551, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -19.251304626464844, "logits_per_token": -2.7229599952697754, "logits_per_char": -0.32034823473762064, "bits_per_byte": 0.4621648096138309, "num_chars": 17}, {"sum_logits": -13.038609504699707, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -21.589054107666016, "logits_per_token": -6.5193047523498535, "logits_per_char": -0.9313292503356934, "bits_per_byte": 1.343624090895106, "num_chars": 14}, {"sum_logits": -8.458210945129395, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.369548797607422, "logits_per_token": -4.229105472564697, "logits_per_char": -0.4975418203017291, "bits_per_byte": 0.717801116784688, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 327, "native_id": "Mercury_404638", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.865639686584473, "logits_per_token_corr": -3.4328198432922363, "logits_per_char_corr": -0.6865639686584473, "bits_per_byte_corr": 0.9905024328372992}, "model_output": [{"sum_logits": -7.5880560874938965, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -36.92790985107422, "logits_per_token": -1.5176112174987793, "logits_per_char": -0.39937137302599457, "bits_per_byte": 0.5761710993380327, "num_chars": 19}, {"sum_logits": -12.966856002807617, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -26.729265213012695, "logits_per_token": -2.5933712005615233, "logits_per_char": -0.6174693334670294, "bits_per_byte": 0.8908199452945369, "num_chars": 21}, {"sum_logits": -5.485529899597168, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -20.751636505126953, "logits_per_token": -1.8285099665323894, "logits_per_char": -0.4219638384305514, "bits_per_byte": 0.6087651371386656, "num_chars": 13}, {"sum_logits": -6.865639686584473, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -20.321861267089844, "logits_per_token": -3.4328198432922363, "logits_per_char": -0.6865639686584473, "bits_per_byte": 0.9905024328372992, "num_chars": 10}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 328, "native_id": "Mercury_SC_407138", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.28666877746582, "logits_per_token_corr": -2.7858335971832275, "logits_per_char_corr": -0.8254321769431785, "bits_per_byte_corr": 1.1908469082669606}, "model_output": [{"sum_logits": -22.28666877746582, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.30780792236328, "logits_per_token": -2.7858335971832275, "logits_per_char": -0.8254321769431785, "bits_per_byte": 1.1908469082669606, "num_chars": 27}, {"sum_logits": -20.339588165283203, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.9661865234375, "logits_per_token": -3.3899313608805337, "logits_per_char": -0.7533180801956741, "bits_per_byte": 1.0868082585110743, "num_chars": 27}, {"sum_logits": -29.005029678344727, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.78422546386719, "logits_per_token": -4.143575668334961, "logits_per_char": -1.0358939170837402, "bits_per_byte": 1.4944790170648288, "num_chars": 28}, {"sum_logits": -18.178930282592773, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.995746612548828, "logits_per_token": -2.5969900403703963, "logits_per_char": -0.6732937141701028, "bits_per_byte": 0.9713575024956163, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 329, "native_id": "MCAS_2000_4_10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.337529182434082, "logits_per_token_corr": -6.337529182434082, "logits_per_char_corr": -1.5843822956085205, "bits_per_byte_corr": 2.2857804807483264}, "model_output": [{"sum_logits": -3.5495166778564453, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.243241310119629, "logits_per_token": -3.5495166778564453, "logits_per_char": -0.709903335571289, "bits_per_byte": 1.0241740217399682, "num_chars": 5}, {"sum_logits": -3.8570213317871094, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -15.535361289978027, "logits_per_token": -3.8570213317871094, "logits_per_char": -0.5510030473981585, "bits_per_byte": 0.7949293639966009, "num_chars": 7}, {"sum_logits": -2.7747669219970703, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.91931438446045, "logits_per_token": -2.7747669219970703, "logits_per_char": -0.4624611536661784, "bits_per_byte": 0.6671904129984638, "num_chars": 6}, {"sum_logits": -6.337529182434082, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -12.889616966247559, "logits_per_token": -6.337529182434082, "logits_per_char": -1.5843822956085205, "bits_per_byte": 2.2857804807483264, "num_chars": 4}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 330, "native_id": "Mercury_177748", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.466192245483398, "logits_per_token_corr": -7.466192245483398, "logits_per_char_corr": -1.066598892211914, "bits_per_byte_corr": 1.538776932412896}, "model_output": [{"sum_logits": -2.902881622314453, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -11.525116920471191, "logits_per_token": -2.902881622314453, "logits_per_char": -0.4838136037190755, "bits_per_byte": 0.6979954868006298, "num_chars": 6}, {"sum_logits": -3.006816864013672, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -13.666500091552734, "logits_per_token": -3.006816864013672, "logits_per_char": -0.375852108001709, "bits_per_byte": 0.5422399723221182, "num_chars": 8}, {"sum_logits": -4.197227478027344, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -14.553839683532715, "logits_per_token": -4.197227478027344, "logits_per_char": -0.46635860866970485, "bits_per_byte": 0.6728132520041433, "num_chars": 9}, {"sum_logits": -7.466192245483398, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -15.591940879821777, "logits_per_token": -7.466192245483398, "logits_per_char": -1.066598892211914, "bits_per_byte": 1.538776932412896, "num_chars": 7}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 331, "native_id": "MCAS_2004_9_21-v1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.373679161071777, "logits_per_token_corr": -1.2811399300893147, "logits_per_char_corr": -0.23293453274351178, "bits_per_byte_corr": 0.33605349524109374}, "model_output": [{"sum_logits": -26.580902099609375, "num_tokens": 11, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -46.538604736328125, "logits_per_token": -2.416445645419034, "logits_per_char": -0.5111711942232572, "bits_per_byte": 0.7374641469517121, "num_chars": 52}, {"sum_logits": -22.88543701171875, "num_tokens": 9, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -39.397499084472656, "logits_per_token": -2.5428263346354165, "logits_per_char": -0.457708740234375, "bits_per_byte": 0.660334129708142, "num_chars": 50}, {"sum_logits": -15.373679161071777, "num_tokens": 12, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -35.449684143066406, "logits_per_token": -1.2811399300893147, "logits_per_char": -0.23293453274351178, "bits_per_byte": 0.33605349524109374, "num_chars": 66}, {"sum_logits": -32.95796203613281, "num_tokens": 14, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -39.338775634765625, "logits_per_token": -2.354140145438058, "logits_per_char": -0.47765162371206976, "bits_per_byte": 0.6891056288024593, "num_chars": 69}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 332, "native_id": "MDSA_2007_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.085046768188477, "logits_per_token_corr": -4.042523384094238, "logits_per_char_corr": -0.32340187072753906, "bits_per_byte_corr": 0.46657027511316945}, "model_output": [{"sum_logits": -8.085046768188477, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -22.076650619506836, "logits_per_token": -4.042523384094238, "logits_per_char": -0.32340187072753906, "bits_per_byte": 0.46657027511316945, "num_chars": 25}, {"sum_logits": -10.22360897064209, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -20.139446258544922, "logits_per_token": -5.111804485321045, "logits_per_char": -0.7302577836172921, "bits_per_byte": 1.05353928299599, "num_chars": 14}, {"sum_logits": -7.945991039276123, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -15.688706398010254, "logits_per_token": -3.9729955196380615, "logits_per_char": -0.6621659199396769, "bits_per_byte": 0.9553034889433365, "num_chars": 12}, {"sum_logits": -7.60127067565918, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -14.321510314941406, "logits_per_token": -7.60127067565918, "logits_per_char": -0.760127067565918, "bits_per_byte": 1.0966315508236077, "num_chars": 10}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 333, "native_id": "Mercury_401763", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.292649269104004, "logits_per_token_corr": -1.5365811586380005, "logits_per_char_corr": -1.0243874390920003, "bits_per_byte_corr": 1.4778786783280355}, "model_output": [{"sum_logits": -12.085105895996094, "num_tokens": 5, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -25.27395248413086, "logits_per_token": -2.4170211791992187, "logits_per_char": -1.5106382369995117, "bits_per_byte": 2.179390293098008, "num_chars": 8}, {"sum_logits": -12.292649269104004, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -26.20246696472168, "logits_per_token": -1.5365811586380005, "logits_per_char": -1.0243874390920003, "bits_per_byte": 1.4778786783280355, "num_chars": 12}, {"sum_logits": -15.307478904724121, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -33.25303649902344, "logits_per_token": -2.551246484120687, "logits_per_char": -1.7008309894137912, "bits_per_byte": 2.4537804338193085, "num_chars": 9}, {"sum_logits": -13.591352462768555, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -30.421907424926758, "logits_per_token": -1.6989190578460693, "logits_per_char": -1.132612705230713, "bits_per_byte": 1.6340147330853567, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 334, "native_id": "Mercury_7268118", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.528709411621094, "logits_per_token_corr": -2.7753372192382812, "logits_per_char_corr": -0.5986021453259038, "bits_per_byte_corr": 0.8636003465277965}, "model_output": [{"sum_logits": -21.690256118774414, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -43.54492950439453, "logits_per_token": -2.7112820148468018, "logits_per_char": -0.44265828813825336, "bits_per_byte": 0.6386209171059148, "num_chars": 49}, {"sum_logits": -19.991451263427734, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -42.98828887939453, "logits_per_token": -2.498931407928467, "logits_per_char": -0.42535002688144113, "bits_per_byte": 0.6136503744242833, "num_chars": 47}, {"sum_logits": -30.528709411621094, "num_tokens": 11, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -41.562442779541016, "logits_per_token": -2.7753372192382812, "logits_per_char": -0.5986021453259038, "bits_per_byte": 0.8636003465277965, "num_chars": 51}, {"sum_logits": -27.981060028076172, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -38.6829719543457, "logits_per_token": -3.9972942897251675, "logits_per_char": -0.5380973082322341, "bits_per_byte": 0.7763103181029019, "num_chars": 52}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 335, "native_id": "Mercury_403232", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.610404014587402, "logits_per_token_corr": -2.8701346715291343, "logits_per_char_corr": -0.7175336678822836, "bits_per_byte_corr": 1.0351822643253827}, "model_output": [{"sum_logits": -7.207932472229004, "num_tokens": 3, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -19.564373016357422, "logits_per_token": -2.402644157409668, "logits_per_char": -0.600661039352417, "bits_per_byte": 0.866570702729565, "num_chars": 12}, {"sum_logits": -5.152805805206299, "num_tokens": 3, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -17.558359146118164, "logits_per_token": -1.7176019350687664, "logits_per_char": -0.4684368913823908, "bits_per_byte": 0.6758115801673027, "num_chars": 11}, {"sum_logits": -6.906764984130859, "num_tokens": 3, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -18.29916000366211, "logits_per_token": -2.3022549947102866, "logits_per_char": -0.5755637486775717, "bits_per_byte": 0.8303629659331908, "num_chars": 12}, {"sum_logits": -8.610404014587402, "num_tokens": 3, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -19.116397857666016, "logits_per_token": -2.8701346715291343, "logits_per_char": -0.7175336678822836, "bits_per_byte": 1.0351822643253827, "num_chars": 12}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 336, "native_id": "Mercury_415081", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.099509239196777, "logits_per_token_corr": -1.016584873199463, "logits_per_char_corr": -0.7624386548995972, "bits_per_byte_corr": 1.0999664664064908}, "model_output": [{"sum_logits": -6.099509239196777, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": true, "sum_logits_uncond": -19.650962829589844, "logits_per_token": -1.016584873199463, "logits_per_char": -0.7624386548995972, "bits_per_byte": 1.0999664664064908, "num_chars": 8}, {"sum_logits": -7.77857780456543, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -19.85146713256836, "logits_per_token": -1.2964296340942383, "logits_per_char": -0.9723222255706787, "bits_per_byte": 1.402764452977946, "num_chars": 8}, {"sum_logits": -7.437078475952148, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -20.165538787841797, "logits_per_token": -1.2395130793253581, "logits_per_char": -0.9296348094940186, "bits_per_byte": 1.3411795294957403, "num_chars": 8}, {"sum_logits": -6.2013678550720215, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -19.205352783203125, "logits_per_token": -1.5503419637680054, "logits_per_char": -0.8859096935817173, "bits_per_byte": 1.278097521606723, "num_chars": 7}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 337, "native_id": "Mercury_7206378", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.555587768554688, "logits_per_token_corr": -3.365083966936384, "logits_per_char_corr": -0.5745265309403582, "bits_per_byte_corr": 0.8288665770473899}, "model_output": [{"sum_logits": -26.407649993896484, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -43.155677795410156, "logits_per_token": -2.9341833326551647, "logits_per_char": -0.586836666531033, "bits_per_byte": 0.8466263486167398, "num_chars": 45}, {"sum_logits": -23.555587768554688, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -48.74530029296875, "logits_per_token": -3.365083966936384, "logits_per_char": -0.5745265309403582, "bits_per_byte": 0.8288665770473899, "num_chars": 41}, {"sum_logits": -24.78253936767578, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -49.342498779296875, "logits_per_token": -2.753615485297309, "logits_per_char": -0.485932144464231, "bits_per_byte": 0.701051895027589, "num_chars": 51}, {"sum_logits": -36.610687255859375, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -52.29158020019531, "logits_per_token": -3.6610687255859373, "logits_per_char": -0.5811220199342758, "bits_per_byte": 0.8383818563111594, "num_chars": 63}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 338, "native_id": "CSZ30169", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.890392303466797, "logits_per_token_corr": -2.1112990379333496, "logits_per_char_corr": -0.625570085313585, "bits_per_byte_corr": 0.9025068598110433}, "model_output": [{"sum_logits": -16.890392303466797, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -26.50343894958496, "logits_per_token": -2.1112990379333496, "logits_per_char": -0.625570085313585, "bits_per_byte": 0.9025068598110433, "num_chars": 27}, {"sum_logits": -19.183547973632812, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -31.422922134399414, "logits_per_token": -2.3979434967041016, "logits_per_char": -0.7673419189453125, "bits_per_byte": 1.1070403811294185, "num_chars": 25}, {"sum_logits": -13.31986141204834, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.68646812438965, "logits_per_token": -1.9028373445783342, "logits_per_char": -0.4933282004462348, "bits_per_byte": 0.7117221483149708, "num_chars": 27}, {"sum_logits": -12.481671333312988, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -24.020259857177734, "logits_per_token": -1.7830959047589983, "logits_per_char": -0.48006428205049956, "bits_per_byte": 0.6925863590226738, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 339, "native_id": "Mercury_7013948", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.274970054626465, "logits_per_token_corr": -3.068742513656616, "logits_per_char_corr": -0.3719687895341353, "bits_per_byte_corr": 0.5366375280267531}, "model_output": [{"sum_logits": -16.247079849243164, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -28.285324096679688, "logits_per_token": -4.061769962310791, "logits_per_char": -0.6017436981201172, "bits_per_byte": 0.8681326491647022, "num_chars": 27}, {"sum_logits": -15.409056663513184, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -31.347999572753906, "logits_per_token": -3.0818113327026366, "logits_per_char": -0.4970663439842962, "bits_per_byte": 0.717115149459467, "num_chars": 31}, {"sum_logits": -11.359437942504883, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -25.035524368286133, "logits_per_token": -2.8398594856262207, "logits_per_char": -0.42071992379647716, "bits_per_byte": 0.6069705476647963, "num_chars": 27}, {"sum_logits": -12.274970054626465, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -26.960390090942383, "logits_per_token": -3.068742513656616, "logits_per_char": -0.3719687895341353, "bits_per_byte": 0.5366375280267531, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 340, "native_id": "Mercury_SC_402164", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.342008590698242, "logits_per_token_corr": -5.342008590698242, "logits_per_char_corr": -0.44516738255818683, "bits_per_byte_corr": 0.6422407751826775}, "model_output": [{"sum_logits": -10.484112739562988, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -11.462020874023438, "logits_per_token": -10.484112739562988, "logits_per_char": -1.1649014155069988, "bits_per_byte": 1.6805974952776883, "num_chars": 9}, {"sum_logits": -12.548543930053711, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -14.03209114074707, "logits_per_token": -12.548543930053711, "logits_per_char": -1.1407767209139736, "bits_per_byte": 1.6457929180253452, "num_chars": 11}, {"sum_logits": -5.342008590698242, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -14.272747993469238, "logits_per_token": -5.342008590698242, "logits_per_char": -0.44516738255818683, "bits_per_byte": 0.6422407751826775, "num_chars": 12}, {"sum_logits": -6.328403472900391, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -14.482308387756348, "logits_per_token": -6.328403472900391, "logits_per_char": -0.5273669560750326, "bits_per_byte": 0.7608296922587039, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 341, "native_id": "Mercury_400880", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.288447380065918, "logits_per_token_corr": -2.5721118450164795, "logits_per_char_corr": -0.5144223690032959, "bits_per_byte_corr": 0.7421546006839406}, "model_output": [{"sum_logits": -10.632312774658203, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.401220321655273, "logits_per_token": -2.658078193664551, "logits_per_char": -0.6254301632151884, "bits_per_byte": 0.9023049948935755, "num_chars": 17}, {"sum_logits": -11.206583023071289, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.575275421142578, "logits_per_token": -2.8016457557678223, "logits_per_char": -0.659210766063017, "bits_per_byte": 0.9510401031004124, "num_chars": 17}, {"sum_logits": -10.288447380065918, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.253393173217773, "logits_per_token": -2.5721118450164795, "logits_per_char": -0.5144223690032959, "bits_per_byte": 0.7421546006839406, "num_chars": 20}, {"sum_logits": -11.113380432128906, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.31021499633789, "logits_per_token": -2.7783451080322266, "logits_per_char": -0.5556690216064453, "bits_per_byte": 0.8016609418478168, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 342, "native_id": "Mercury_7040793", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.1995062828063965, "logits_per_token_corr": -2.3998354276021323, "logits_per_char_corr": -0.3789213833055998, "bits_per_byte_corr": 0.5466680005821677}, "model_output": [{"sum_logits": -6.0656657218933105, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.811059951782227, "logits_per_token": -2.021888573964437, "logits_per_char": -0.35680386599372416, "bits_per_byte": 0.5147591680395259, "num_chars": 17}, {"sum_logits": -7.1995062828063965, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.488840103149414, "logits_per_token": -2.3998354276021323, "logits_per_char": -0.3789213833055998, "bits_per_byte": 0.5466680005821677, "num_chars": 19}, {"sum_logits": -12.684858322143555, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.399555206298828, "logits_per_token": -2.536971664428711, "logits_per_char": -0.5073943328857422, "bits_per_byte": 0.7320152878299501, "num_chars": 25}, {"sum_logits": -14.505945205688477, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.27143859863281, "logits_per_token": -2.0722778865269254, "logits_per_char": -0.41445557730538507, "bits_per_byte": 0.5979330060476811, "num_chars": 35}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 343, "native_id": "MDSA_2010_5_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.28240966796875, "logits_per_token_corr": -1.2676506042480469, "logits_per_char_corr": -0.23313114560883622, "bits_per_byte_corr": 0.3363371476468725}, "model_output": [{"sum_logits": -12.425024032592773, "num_tokens": 4, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -34.53874969482422, "logits_per_token": -3.1062560081481934, "logits_per_char": -0.46018607528121386, "bits_per_byte": 0.6639081686948394, "num_chars": 27}, {"sum_logits": -13.785880088806152, "num_tokens": 6, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -33.94607925415039, "logits_per_token": -2.297646681467692, "logits_per_char": -0.3938822882516044, "bits_per_byte": 0.5682520239549951, "num_chars": 35}, {"sum_logits": -25.991241455078125, "num_tokens": 16, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -57.71766662597656, "logits_per_token": -1.6244525909423828, "logits_per_char": -0.3057793112362132, "bits_per_byte": 0.44114629592724464, "num_chars": 85}, {"sum_logits": -20.28240966796875, "num_tokens": 16, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -54.13610076904297, "logits_per_token": -1.2676506042480469, "logits_per_char": -0.23313114560883622, "bits_per_byte": 0.3363371476468725, "num_chars": 87}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 344, "native_id": "LEAP__8_10365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.229942321777344, "logits_per_token_corr": -1.948662821451823, "logits_per_char_corr": -0.6089571317036947, "bits_per_byte_corr": 0.8785394340235189}, "model_output": [{"sum_logits": -29.229942321777344, "num_tokens": 15, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -59.048763275146484, "logits_per_token": -1.948662821451823, "logits_per_char": -0.6089571317036947, "bits_per_byte": 0.8785394340235189, "num_chars": 48}, {"sum_logits": -22.696245193481445, "num_tokens": 11, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -52.65888214111328, "logits_per_token": -2.063295017589222, "logits_per_char": -0.5674061298370361, "bits_per_byte": 0.8185940096864793, "num_chars": 40}, {"sum_logits": -29.07337188720703, "num_tokens": 13, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -62.22792053222656, "logits_per_token": -2.2364132220928488, "logits_per_char": -0.49276901503740733, "bits_per_byte": 0.7109154142987173, "num_chars": 59}, {"sum_logits": -33.910560607910156, "num_tokens": 15, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -60.828346252441406, "logits_per_token": -2.2607040405273438, "logits_per_char": -0.6782112121582031, "bits_per_byte": 0.9784519524566353, "num_chars": 50}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 345, "native_id": "Mercury_SC_401295", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.267929077148438, "logits_per_token_corr": -2.3879935524680396, "logits_per_char_corr": -0.5051524822528546, "bits_per_byte_corr": 0.728780981039467}, "model_output": [{"sum_logits": -23.004653930664062, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -42.173065185546875, "logits_per_token": -2.300465393066406, "logits_per_char": -0.5112145317925347, "bits_per_byte": 0.7375266698479931, "num_chars": 45}, {"sum_logits": -26.267929077148438, "num_tokens": 11, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -45.57341766357422, "logits_per_token": -2.3879935524680396, "logits_per_char": -0.5051524822528546, "bits_per_byte": 0.728780981039467, "num_chars": 52}, {"sum_logits": -29.804248809814453, "num_tokens": 11, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -52.138328552246094, "logits_per_token": -2.7094771645285864, "logits_per_char": -0.5731586309579703, "bits_per_byte": 0.8268931145263653, "num_chars": 52}, {"sum_logits": -26.873271942138672, "num_tokens": 11, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -42.21795654296875, "logits_per_token": -2.4430247220126065, "logits_per_char": -0.4334398700344947, "bits_per_byte": 0.6253215510227715, "num_chars": 62}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 346, "native_id": "MCAS_2012_5_23625", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.30773162841797, "logits_per_token_corr": -4.576932907104492, "logits_per_char_corr": -1.0769253899069393, "bits_per_byte_corr": 1.553674919427271}, "model_output": [{"sum_logits": -12.463446617126465, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.3189697265625, "logits_per_token": -3.115861654281616, "logits_per_char": -0.6924137009514703, "bits_per_byte": 0.9989418126069776, "num_chars": 18}, {"sum_logits": -10.363062858581543, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.742258071899414, "logits_per_token": -2.5907657146453857, "logits_per_char": -0.545424360977976, "bits_per_byte": 0.7868810207635231, "num_chars": 19}, {"sum_logits": -18.30773162841797, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -34.40748596191406, "logits_per_token": -4.576932907104492, "logits_per_char": -1.0769253899069393, "bits_per_byte": 1.553674919427271, "num_chars": 17}, {"sum_logits": -10.612178802490234, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.60696792602539, "logits_per_token": -2.6530447006225586, "logits_per_char": -0.6242458119111902, "bits_per_byte": 0.9005963371406257, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 347, "native_id": "Mercury_7268048", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.993364334106445, "logits_per_token_corr": -2.398672866821289, "logits_per_char_corr": -0.3997788111368815, "bits_per_byte_corr": 0.5767589082800788}, "model_output": [{"sum_logits": -22.13823127746582, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -30.89556121826172, "logits_per_token": -3.68970521291097, "logits_per_char": -0.7379410425821941, "bits_per_byte": 1.0646238826025276, "num_chars": 30}, {"sum_logits": -20.22488021850586, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -26.530235290527344, "logits_per_token": -3.3708133697509766, "logits_per_char": -0.6128751581365411, "bits_per_byte": 0.8841919513282623, "num_chars": 33}, {"sum_logits": -11.993364334106445, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -21.343286514282227, "logits_per_token": -2.398672866821289, "logits_per_char": -0.3997788111368815, "bits_per_byte": 0.5767589082800788, "num_chars": 30}, {"sum_logits": -15.234379768371582, "num_tokens": 8, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.84772300720215, "logits_per_token": -1.9042974710464478, "logits_per_char": -0.4616478717688358, "bits_per_byte": 0.666017095238322, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 348, "native_id": "Mercury_SC_402629", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.361995220184326, "logits_per_token_corr": -4.361995220184326, "logits_per_char_corr": -0.5452494025230408, "bits_per_byte_corr": 0.7866286090682263}, "model_output": [{"sum_logits": -4.361995220184326, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -13.302153587341309, "logits_per_token": -4.361995220184326, "logits_per_char": -0.5452494025230408, "bits_per_byte": 0.7866286090682263, "num_chars": 8}, {"sum_logits": -7.821470260620117, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.318639755249023, "logits_per_token": -3.9107351303100586, "logits_per_char": -0.7821470260620117, "bits_per_byte": 1.1283996357465258, "num_chars": 10}, {"sum_logits": -8.204327583312988, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.698421478271484, "logits_per_token": -4.102163791656494, "logits_per_char": -0.6836939652760824, "bits_per_byte": 0.9863618931902238, "num_chars": 12}, {"sum_logits": -8.046475410461426, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.841793060302734, "logits_per_token": -4.023237705230713, "logits_per_char": -0.42349870581375926, "bits_per_byte": 0.6109794827008435, "num_chars": 19}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 349, "native_id": "NCEOGA_2013_8_42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.10234832763672, "logits_per_token_corr": -3.620469665527344, "logits_per_char_corr": -0.5656983852386475, "bits_per_byte_corr": 0.8161302550232774}, "model_output": [{"sum_logits": -14.526383399963379, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.75925064086914, "logits_per_token": -2.9052766799926757, "logits_per_char": -0.4842127799987793, "bits_per_byte": 0.6985713764397995, "num_chars": 30}, {"sum_logits": -22.84538459777832, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -41.98744201660156, "logits_per_token": -3.263626371111189, "logits_per_char": -0.7369478902509136, "bits_per_byte": 1.063191066659341, "num_chars": 31}, {"sum_logits": -27.199541091918945, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.03485870361328, "logits_per_token": -3.885648727416992, "logits_per_char": -0.7157773971557617, "bits_per_byte": 1.0326485012577693, "num_chars": 38}, {"sum_logits": -18.10234832763672, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.21209716796875, "logits_per_token": -3.620469665527344, "logits_per_char": -0.5656983852386475, "bits_per_byte": 0.8161302550232774, "num_chars": 32}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 350, "native_id": "Mercury_412463", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.072803497314453, "logits_per_token_corr": -4.072803497314453, "logits_per_char_corr": -2.0364017486572266, "bits_per_byte_corr": 2.9379067040475046}, "model_output": [{"sum_logits": -5.147940635681152, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -6.753307342529297, "logits_per_token": -5.147940635681152, "logits_per_char": -2.573970317840576, "bits_per_byte": 3.713454212946656, "num_chars": 2}, {"sum_logits": -3.255479097366333, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -5.19705867767334, "logits_per_token": -3.255479097366333, "logits_per_char": -1.6277395486831665, "bits_per_byte": 2.348331774745731, "num_chars": 2}, {"sum_logits": -2.93022084236145, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -2.93022084236145, "logits_per_char": -1.465110421180725, "bits_per_byte": 2.1137075389936912, "num_chars": 2}, {"sum_logits": -4.072803497314453, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -7.217063903808594, "logits_per_token": -4.072803497314453, "logits_per_char": -2.0364017486572266, "bits_per_byte": 2.9379067040475046, "num_chars": 2}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 351, "native_id": "Mercury_409295", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.327144622802734, "logits_per_token_corr": -3.2585716247558594, "logits_per_char_corr": -0.6665260141546075, "bits_per_byte_corr": 0.9615937752450302}, "model_output": [{"sum_logits": -29.6142578125, "num_tokens": 11, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -42.805877685546875, "logits_per_token": -2.6922052556818183, "logits_per_char": -0.6169637044270834, "bits_per_byte": 0.8900904767860769, "num_chars": 48}, {"sum_logits": -29.327144622802734, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.814247131347656, "logits_per_token": -3.2585716247558594, "logits_per_char": -0.6665260141546075, "bits_per_byte": 0.9615937752450302, "num_chars": 44}, {"sum_logits": -22.53150177001953, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -27.780750274658203, "logits_per_token": -3.218785967145647, "logits_per_char": -0.5632875442504883, "bits_per_byte": 0.8126521466852858, "num_chars": 40}, {"sum_logits": -39.576805114746094, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -40.43367004394531, "logits_per_token": -4.397422790527344, "logits_per_char": -0.9652879296279535, "bits_per_byte": 1.3926161091052238, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 352, "native_id": "Mercury_404609", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.37908172607422, "logits_per_token_corr": -4.344770431518555, "logits_per_char_corr": -0.7899582602761008, "bits_per_byte_corr": 1.1396688646104225}, "model_output": [{"sum_logits": -11.110757827758789, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -22.642215728759766, "logits_per_token": -5.5553789138793945, "logits_per_char": -1.1110757827758788, "bits_per_byte": 1.6029435218637351, "num_chars": 10}, {"sum_logits": -17.785980224609375, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -28.479164123535156, "logits_per_token": -5.928660074869792, "logits_per_char": -0.9361042223478618, "bits_per_byte": 1.35051291933745, "num_chars": 19}, {"sum_logits": -17.427940368652344, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.1961669921875, "logits_per_token": -4.356985092163086, "logits_per_char": -0.8713970184326172, "bits_per_byte": 1.2571601571390687, "num_chars": 20}, {"sum_logits": -17.37908172607422, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.290382385253906, "logits_per_token": -4.344770431518555, "logits_per_char": -0.7899582602761008, "bits_per_byte": 1.1396688646104225, "num_chars": 22}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 353, "native_id": "Mercury_7230090", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.079375267028809, "logits_per_token_corr": -2.693125089009603, "logits_per_char_corr": -0.5770982333592006, "bits_per_byte_corr": 0.8325767593736987}, "model_output": [{"sum_logits": -9.135750770568848, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -16.96127700805664, "logits_per_token": -2.283937692642212, "logits_per_char": -0.507541709476047, "bits_per_byte": 0.7322279073059261, "num_chars": 18}, {"sum_logits": -8.966513633728027, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -20.164501190185547, "logits_per_token": -2.241628408432007, "logits_per_char": -0.640465259552002, "bits_per_byte": 0.9239960538179999, "num_chars": 14}, {"sum_logits": -8.079375267028809, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -16.869129180908203, "logits_per_token": -2.693125089009603, "logits_per_char": -0.5770982333592006, "bits_per_byte": 0.8325767593736987, "num_chars": 14}, {"sum_logits": -8.595710754394531, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -19.29450225830078, "logits_per_token": -4.297855377197266, "logits_per_char": -0.573047383626302, "bits_per_byte": 0.8267326185526552, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 354, "native_id": "Mercury_7057488", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.700729370117188, "logits_per_token_corr": -3.0875911712646484, "logits_per_char_corr": -0.5744355667469113, "bits_per_byte_corr": 0.8287353434566055}, "model_output": [{"sum_logits": -11.061117172241211, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -32.17509460449219, "logits_per_token": -1.8435195287068684, "logits_per_char": -0.44244468688964844, "bits_per_byte": 0.6383127556438245, "num_chars": 25}, {"sum_logits": -15.982603073120117, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.190574645996094, "logits_per_token": -1.5982603073120116, "logits_per_char": -0.3898195871492712, "bits_per_byte": 0.5623907852220407, "num_chars": 41}, {"sum_logits": -12.200551986694336, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -35.55897521972656, "logits_per_token": -2.0334253311157227, "logits_per_char": -0.3935661931191721, "bits_per_byte": 0.5677959950749856, "num_chars": 31}, {"sum_logits": -24.700729370117188, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -48.42240524291992, "logits_per_token": -3.0875911712646484, "logits_per_char": -0.5744355667469113, "bits_per_byte": 0.8287353434566055, "num_chars": 43}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 355, "native_id": "MDSA_2009_4_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.7442498207092285, "logits_per_token_corr": -1.248083273569743, "logits_per_char_corr": -0.22024998945348404, "bits_per_byte_corr": 0.3177535675406162}, "model_output": [{"sum_logits": -12.158358573913574, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -19.671218872070312, "logits_per_token": -4.052786191304524, "logits_per_char": -1.013196547826131, "bits_per_byte": 1.461733634995627, "num_chars": 12}, {"sum_logits": -10.61471176147461, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -19.33937644958496, "logits_per_token": -3.5382372538248696, "logits_per_char": -0.8845593134562174, "bits_per_byte": 1.2761493348963477, "num_chars": 12}, {"sum_logits": -4.792230606079102, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -19.80863380432129, "logits_per_token": -1.5974102020263672, "logits_per_char": -0.29951441287994385, "bits_per_byte": 0.4321079581369749, "num_chars": 16}, {"sum_logits": -3.7442498207092285, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -22.464696884155273, "logits_per_token": -1.248083273569743, "logits_per_char": -0.22024998945348404, "bits_per_byte": 0.3177535675406162, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 356, "native_id": "Mercury_7150728", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.56298065185547, "logits_per_token_corr": -3.17366451687283, "logits_per_char_corr": -0.4200438331155216, "bits_per_byte_corr": 0.6059951549921898}, "model_output": [{"sum_logits": -28.56298065185547, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -39.246856689453125, "logits_per_token": -3.17366451687283, "logits_per_char": -0.4200438331155216, "bits_per_byte": 0.6059951549921898, "num_chars": 68}, {"sum_logits": -36.807579040527344, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -50.01317596435547, "logits_per_token": -4.089731004503038, "logits_per_char": -0.7361515808105469, "bits_per_byte": 1.06204223497871, "num_chars": 50}, {"sum_logits": -29.134836196899414, "num_tokens": 13, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -48.82828903198242, "logits_per_token": -2.2411412459153395, "logits_per_char": -0.4938107829982952, "bits_per_byte": 0.7124183677696483, "num_chars": 59}, {"sum_logits": -42.13251876831055, "num_tokens": 10, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -55.82200622558594, "logits_per_token": -4.2132518768310545, "logits_per_char": -0.6795567543275894, "bits_per_byte": 0.9803931594717172, "num_chars": 62}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 357, "native_id": "Mercury_402207", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.352941513061523, "logits_per_token_corr": -2.0352941513061524, "logits_per_char_corr": -0.4330413087885431, "bits_per_byte_corr": 0.6247465486897462}, "model_output": [{"sum_logits": -18.687597274780273, "num_tokens": 10, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -42.15176010131836, "logits_per_token": -1.8687597274780274, "logits_per_char": -0.3976084526548994, "bits_per_byte": 0.5736277428611697, "num_chars": 47}, {"sum_logits": -20.352941513061523, "num_tokens": 10, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -42.86607360839844, "logits_per_token": -2.0352941513061524, "logits_per_char": -0.4330413087885431, "bits_per_byte": 0.6247465486897462, "num_chars": 47}, {"sum_logits": -31.15363883972168, "num_tokens": 12, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -52.70458221435547, "logits_per_token": -2.5961365699768066, "logits_per_char": -0.587804506409843, "bits_per_byte": 0.8480226464102747, "num_chars": 53}, {"sum_logits": -28.999374389648438, "num_tokens": 12, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -48.620697021484375, "logits_per_token": -2.416614532470703, "logits_per_char": -0.5272613525390625, "bits_per_byte": 0.7606773385610593, "num_chars": 55}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 358, "native_id": "Mercury_411732", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.347654342651367, "logits_per_token_corr": -3.724609057108561, "logits_per_char_corr": -1.5962610244750977, "bits_per_byte_corr": 2.302917863976214}, "model_output": [{"sum_logits": -18.34292984008789, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -33.183189392089844, "logits_per_token": -3.057154973347982, "logits_per_char": -1.410994603083684, "bits_per_byte": 2.0356349165913845, "num_chars": 13}, {"sum_logits": -22.751293182373047, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -34.45304870605469, "logits_per_token": -3.7918821970621743, "logits_per_char": -1.7500994755671575, "bits_per_byte": 2.5248598344649276, "num_chars": 13}, {"sum_logits": -23.470020294189453, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -37.728721618652344, "logits_per_token": -3.9116700490315757, "logits_per_char": -1.6764300210135323, "bits_per_byte": 2.4185772777153414, "num_chars": 14}, {"sum_logits": -22.347654342651367, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -38.77568817138672, "logits_per_token": -3.724609057108561, "logits_per_char": -1.5962610244750977, "bits_per_byte": 2.302917863976214, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 359, "native_id": "Mercury_7270113", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -25.5356502532959, "logits_per_token_corr": -3.1919562816619873, "logits_per_char_corr": -0.6228207378852658, "bits_per_byte_corr": 0.8985403899105234}, "model_output": [{"sum_logits": -29.902572631835938, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -44.38970947265625, "logits_per_token": -3.737821578979492, "logits_per_char": -0.7869098061009457, "bits_per_byte": 1.1352708748895457, "num_chars": 38}, {"sum_logits": -43.33850860595703, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -57.092994689941406, "logits_per_token": -3.9398644187233667, "logits_per_char": -0.9849661046808417, "bits_per_byte": 1.4210057146677908, "num_chars": 44}, {"sum_logits": -25.5356502532959, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -43.35490417480469, "logits_per_token": -3.1919562816619873, "logits_per_char": -0.6228207378852658, "bits_per_byte": 0.8985403899105234, "num_chars": 41}, {"sum_logits": -19.92767906188965, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -35.112945556640625, "logits_per_token": -2.846811294555664, "logits_per_char": -0.5693622589111328, "bits_per_byte": 0.8214161074010194, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 360, "native_id": "AKDE&ED_2008_8_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -41.645198822021484, "logits_per_token_corr": -2.9746570587158203, "logits_per_char_corr": -0.6940866470336914, "bits_per_byte_corr": 1.0013553636234742}, "model_output": [{"sum_logits": -41.670650482177734, "num_tokens": 14, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -51.08928298950195, "logits_per_token": -2.976475034441267, "logits_per_char": -0.6945108413696289, "bits_per_byte": 1.001967346688305, "num_chars": 60}, {"sum_logits": -41.645198822021484, "num_tokens": 14, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -51.34856033325195, "logits_per_token": -2.9746570587158203, "logits_per_char": -0.6940866470336914, "bits_per_byte": 1.0013553636234742, "num_chars": 60}, {"sum_logits": -40.46282196044922, "num_tokens": 14, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -51.433006286621094, "logits_per_token": -2.8902015686035156, "logits_per_char": -0.674380366007487, "bits_per_byte": 0.9729252097125844, "num_chars": 60}, {"sum_logits": -40.890296936035156, "num_tokens": 14, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -52.09779357910156, "logits_per_token": -2.9207354954310825, "logits_per_char": -0.6815049489339192, "bits_per_byte": 0.9832038101689579, "num_chars": 60}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 361, "native_id": "MCAS_1999_8_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.909992218017578, "logits_per_token_corr": -2.701427459716797, "logits_per_char_corr": -0.4397672608841297, "bits_per_byte_corr": 0.6344500464233127}, "model_output": [{"sum_logits": -18.909992218017578, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -37.43067169189453, "logits_per_token": -2.701427459716797, "logits_per_char": -0.4397672608841297, "bits_per_byte": 0.6344500464233127, "num_chars": 43}, {"sum_logits": -12.363666534423828, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -26.25274658203125, "logits_per_token": -1.5454583168029785, "logits_per_char": -0.35324761526925225, "bits_per_byte": 0.5096285827551689, "num_chars": 35}, {"sum_logits": -19.178455352783203, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.75409698486328, "logits_per_token": -2.3973069190979004, "logits_per_char": -0.5046961934942948, "bits_per_byte": 0.728122695510279, "num_chars": 38}, {"sum_logits": -22.623844146728516, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -34.66190719604492, "logits_per_token": -3.231977735246931, "logits_per_char": -0.685571034749349, "bits_per_byte": 0.9890699320107116, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 362, "native_id": "NYSEDREGENTS_2015_4_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.799602508544922, "logits_per_token_corr": -3.828514644077846, "logits_per_char_corr": -0.8645033067272555, "bits_per_byte_corr": 1.247214633448418}, "model_output": [{"sum_logits": -16.065608978271484, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.806934356689453, "logits_per_token": -2.6776014963785806, "logits_per_char": -0.4725179111256319, "bits_per_byte": 0.6816992471126508, "num_chars": 34}, {"sum_logits": -7.508414268493652, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.92841339111328, "logits_per_token": -3.754207134246826, "logits_per_char": -0.3951796983417712, "bits_per_byte": 0.5701237910580794, "num_chars": 19}, {"sum_logits": -26.799602508544922, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.645877838134766, "logits_per_token": -3.828514644077846, "logits_per_char": -0.8645033067272555, "bits_per_byte": 1.247214633448418, "num_chars": 31}, {"sum_logits": -22.051998138427734, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -24.946186065673828, "logits_per_token": -5.512999534606934, "logits_per_char": -0.9188332557678223, "bits_per_byte": 1.32559618150105, "num_chars": 24}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 363, "native_id": "Mercury_7122640", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.026737213134766, "logits_per_token_corr": -2.513368606567383, "logits_per_char_corr": -0.41889476776123047, "bits_per_byte_corr": 0.6043374041038954}, "model_output": [{"sum_logits": -5.026737213134766, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -20.763078689575195, "logits_per_token": -2.513368606567383, "logits_per_char": -0.41889476776123047, "bits_per_byte": 0.6043374041038954, "num_chars": 12}, {"sum_logits": -5.31810188293457, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -18.53467559814453, "logits_per_token": -2.659050941467285, "logits_per_char": -0.37986442020961214, "bits_per_byte": 0.5480285152469625, "num_chars": 14}, {"sum_logits": -4.779272079467773, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.70150375366211, "logits_per_token": -2.3896360397338867, "logits_per_char": -0.341376577104841, "bits_per_byte": 0.4925022948651568, "num_chars": 14}, {"sum_logits": -7.453046798706055, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -21.153072357177734, "logits_per_token": -3.7265233993530273, "logits_per_char": -0.4658154249191284, "bits_per_byte": 0.6720296035008947, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 364, "native_id": "Mercury_402547", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.545061111450195, "logits_per_token_corr": -3.636265277862549, "logits_per_char_corr": -2.4241768519083657, "bits_per_byte_corr": 3.497347922488531}, "model_output": [{"sum_logits": -5.483089923858643, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -5.818438529968262, "logits_per_token": -5.483089923858643, "logits_per_char": -2.7415449619293213, "bits_per_byte": 3.9552133209523954, "num_chars": 2}, {"sum_logits": -16.17609214782715, "num_tokens": 4, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -25.068017959594727, "logits_per_token": -4.044023036956787, "logits_per_char": -2.3108703068324496, "bits_per_byte": 3.3338811318071278, "num_chars": 7}, {"sum_logits": -14.545061111450195, "num_tokens": 4, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -21.997798919677734, "logits_per_token": -3.636265277862549, "logits_per_char": -2.4241768519083657, "bits_per_byte": 3.497347922488531, "num_chars": 6}, {"sum_logits": -1.9689888954162598, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": true, "sum_logits_uncond": -10.635485649108887, "logits_per_token": -1.9689888954162598, "logits_per_char": -0.6563296318054199, "bits_per_byte": 0.9468835049948389, "num_chars": 3}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 365, "native_id": "Mercury_7133945", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.925536632537842, "logits_per_token_corr": -1.2313841581344604, "logits_per_char_corr": -0.2238880287517201, "bits_per_byte_corr": 0.3230021487947443}, "model_output": [{"sum_logits": -5.161733150482178, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -21.162479400634766, "logits_per_token": -1.0323466300964355, "logits_per_char": -0.16130416095256805, "bits_per_byte": 0.2327127130811923, "num_chars": 32}, {"sum_logits": -4.925536632537842, "num_tokens": 4, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -18.767602920532227, "logits_per_token": -1.2313841581344604, "logits_per_char": -0.2238880287517201, "bits_per_byte": 0.3230021487947443, "num_chars": 22}, {"sum_logits": -10.944544792175293, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.244140625, "logits_per_token": -2.1889089584350585, "logits_per_char": -0.4758497735728388, "bits_per_byte": 0.6865061085421639, "num_chars": 23}, {"sum_logits": -9.00600814819336, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -22.221668243408203, "logits_per_token": -1.801201629638672, "logits_per_char": -0.4288575308663504, "bits_per_byte": 0.6187106330292138, "num_chars": 21}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 366, "native_id": "Mercury_7199028", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.725311279296875, "logits_per_token_corr": -3.3626556396484375, "logits_per_char_corr": -0.6113919344815341, "bits_per_byte_corr": 0.8820521119166529}, "model_output": [{"sum_logits": -4.469013214111328, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -20.624235153198242, "logits_per_token": -2.234506607055664, "logits_per_char": -0.49655702379014754, "bits_per_byte": 0.7163803557411436, "num_chars": 9}, {"sum_logits": -6.725311279296875, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -18.9022216796875, "logits_per_token": -3.3626556396484375, "logits_per_char": -0.6113919344815341, "bits_per_byte": 0.8820521119166529, "num_chars": 11}, {"sum_logits": -5.974857330322266, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -20.211246490478516, "logits_per_token": -2.987428665161133, "logits_per_char": -0.49790477752685547, "bits_per_byte": 0.718324753373433, "num_chars": 12}, {"sum_logits": -5.481595993041992, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -19.680030822753906, "logits_per_token": -2.740797996520996, "logits_per_char": -0.4983269084583629, "bits_per_byte": 0.7189337595749251, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 367, "native_id": "Mercury_7217298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.782642364501953, "logits_per_token_corr": -3.463773727416992, "logits_per_char_corr": -0.546911641171104, "bits_per_byte_corr": 0.7890267125225628}, "model_output": [{"sum_logits": -20.782642364501953, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -35.26179504394531, "logits_per_token": -3.463773727416992, "logits_per_char": -0.546911641171104, "bits_per_byte": 0.7890267125225628, "num_chars": 38}, {"sum_logits": -16.611988067626953, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.750205993652344, "logits_per_token": -3.3223976135253905, "logits_per_char": -0.3955235254196894, "bits_per_byte": 0.5706198286783157, "num_chars": 42}, {"sum_logits": -16.817363739013672, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -30.460681915283203, "logits_per_token": -4.204340934753418, "logits_per_char": -0.5255426168441772, "bits_per_byte": 0.7581977270974478, "num_chars": 32}, {"sum_logits": -13.395153045654297, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.597026824951172, "logits_per_token": -3.348788261413574, "logits_per_char": -0.4185985326766968, "bits_per_byte": 0.603910027216501, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 368, "native_id": "Mercury_7057680", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.309343338012695, "logits_per_token_corr": -2.384890556335449, "logits_per_char_corr": -0.6504246971823953, "bits_per_byte_corr": 0.9383644850974215}, "model_output": [{"sum_logits": -18.207529067993164, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.653602600097656, "logits_per_token": -3.641505813598633, "logits_per_char": -0.9103764533996582, "bits_per_byte": 1.313395594662713, "num_chars": 20}, {"sum_logits": -14.774786949157715, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -22.933141708374023, "logits_per_token": -2.954957389831543, "logits_per_char": -0.6423820412677267, "bits_per_byte": 0.9267613852937445, "num_chars": 23}, {"sum_logits": -14.309343338012695, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -26.507465362548828, "logits_per_token": -2.384890556335449, "logits_per_char": -0.6504246971823953, "bits_per_byte": 0.9383644850974215, "num_chars": 22}, {"sum_logits": -19.25305938720703, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -26.587947845458984, "logits_per_token": -3.8506118774414064, "logits_per_char": -1.2033162117004395, "bits_per_byte": 1.7360183312427653, "num_chars": 16}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 369, "native_id": "Mercury_SC_400404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.89714813232422, "logits_per_token_corr": -4.556735447474888, "logits_per_char_corr": -0.9967858791351318, "bits_per_byte_corr": 1.4380580446574336}, "model_output": [{"sum_logits": -19.405622482299805, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -26.491527557373047, "logits_per_token": -4.851405620574951, "logits_per_char": -0.8820737491954457, "bits_per_byte": 1.272563423663519, "num_chars": 22}, {"sum_logits": -27.353633880615234, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -32.70228958129883, "logits_per_token": -4.558938980102539, "logits_per_char": -0.8548010587692261, "bits_per_byte": 1.233217248433884, "num_chars": 32}, {"sum_logits": -21.67678451538086, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -27.929607391357422, "logits_per_token": -3.612797419230143, "logits_per_char": -0.7741708755493164, "bits_per_byte": 1.1168924829564681, "num_chars": 28}, {"sum_logits": -31.89714813232422, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -38.75870132446289, "logits_per_token": -4.556735447474888, "logits_per_char": -0.9967858791351318, "bits_per_byte": 1.4380580446574336, "num_chars": 32}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 370, "native_id": "Mercury_SC_408030", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.83676528930664, "logits_per_token_corr": -2.10459566116333, "logits_per_char_corr": -0.5261489152908325, "bits_per_byte_corr": 0.7590724308597367}, "model_output": [{"sum_logits": -16.83676528930664, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.47028350830078, "logits_per_token": -2.10459566116333, "logits_per_char": -0.5261489152908325, "bits_per_byte": 0.7590724308597367, "num_chars": 32}, {"sum_logits": -15.41428279876709, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -28.341140747070312, "logits_per_token": -2.20204039982387, "logits_per_char": -0.45336125878726735, "bits_per_byte": 0.6540620397840385, "num_chars": 34}, {"sum_logits": -12.599961280822754, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.875507354736328, "logits_per_token": -1.399995697869195, "logits_per_char": -0.2519992256164551, "bits_per_byte": 0.36355803310498, "num_chars": 50}, {"sum_logits": -20.349748611450195, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -36.06446838378906, "logits_per_token": -2.2610831790500217, "logits_per_char": -0.41530099207041216, "bits_per_byte": 0.5991526817366808, "num_chars": 49}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 371, "native_id": "Mercury_415083", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.30327844619751, "logits_per_token_corr": -1.8258196115493774, "logits_per_char_corr": -1.2172130743662517, "bits_per_byte_corr": 1.756067266094662}, "model_output": [{"sum_logits": -8.459869384765625, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.159381866455078, "logits_per_token": -2.1149673461914062, "logits_per_char": -1.4099782307942708, "bits_per_byte": 2.03416860132975, "num_chars": 6}, {"sum_logits": -7.30327844619751, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.9197940826416, "logits_per_token": -1.8258196115493774, "logits_per_char": -1.2172130743662517, "bits_per_byte": 1.756067266094662, "num_chars": 6}, {"sum_logits": -5.478504657745361, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.339717864990234, "logits_per_token": -1.3696261644363403, "logits_per_char": -0.7826435225350517, "bits_per_byte": 1.129115928746, "num_chars": 7}, {"sum_logits": -7.084232330322266, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -19.033226013183594, "logits_per_token": -1.7710580825805664, "logits_per_char": -1.012033190046038, "bits_per_byte": 1.4600552644955058, "num_chars": 7}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 372, "native_id": "Mercury_409114", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.860999584197998, "logits_per_token_corr": -1.2152498960494995, "logits_per_char_corr": -0.2113478080086086, "bits_per_byte_corr": 0.30491043451699146}, "model_output": [{"sum_logits": -5.616112232208252, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -20.039146423339844, "logits_per_token": -1.404028058052063, "logits_per_char": -0.2441787927047066, "bits_per_byte": 0.3522755333255875, "num_chars": 23}, {"sum_logits": -4.860999584197998, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -20.862777709960938, "logits_per_token": -1.2152498960494995, "logits_per_char": -0.2113478080086086, "bits_per_byte": 0.30491043451699146, "num_chars": 23}, {"sum_logits": -3.7405900955200195, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": true, "sum_logits_uncond": -19.83544158935547, "logits_per_token": -0.9351475238800049, "logits_per_char": -0.15585792064666748, "bits_per_byte": 0.22485544920037429, "num_chars": 24}, {"sum_logits": -5.412535190582275, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -21.27185821533203, "logits_per_token": -1.3531337976455688, "logits_per_char": -0.22552229960759482, "bits_per_byte": 0.32535990325398584, "num_chars": 24}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 373, "native_id": "Mercury_SC_415006", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.676448822021484, "logits_per_token_corr": -2.24331352927468, "logits_per_char_corr": -0.7050413949148996, "bits_per_byte_corr": 1.0171597240658936}, "model_output": [{"sum_logits": -24.676448822021484, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -30.12211036682129, "logits_per_token": -2.24331352927468, "logits_per_char": -0.7050413949148996, "bits_per_byte": 1.0171597240658936, "num_chars": 35}, {"sum_logits": -33.649024963378906, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -37.87051773071289, "logits_per_token": -4.206128120422363, "logits_per_char": -0.989677204805262, "bits_per_byte": 1.4278023954544283, "num_chars": 34}, {"sum_logits": -31.95784568786621, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.51308822631836, "logits_per_token": -3.5508717430962458, "logits_per_char": -0.913081305367606, "bits_per_byte": 1.3172978711832128, "num_chars": 35}, {"sum_logits": -32.043251037597656, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -30.309036254882812, "logits_per_token": -3.5603612263997397, "logits_per_char": -0.971007607199929, "bits_per_byte": 1.4008678595738024, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 374, "native_id": "MSA_2012_5_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.46325397491455, "logits_per_token_corr": -1.4463253974914552, "logits_per_char_corr": -0.4132358278547015, "bits_per_byte_corr": 0.5796129106872723}, "model_output": [{"sum_logits": -19.67188262939453, "num_tokens": 10, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -42.35392379760742, "logits_per_token": -1.967188262939453, "logits_per_char": -0.5785847832174862, "bits_per_byte": 0.8108722146970749, "num_chars": 34}, {"sum_logits": -15.378040313720703, "num_tokens": 10, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -37.468318939208984, "logits_per_token": -1.5378040313720702, "logits_per_char": -0.45229530334472656, "bits_per_byte": 0.6338806428346102, "num_chars": 34}, {"sum_logits": -14.46325397491455, "num_tokens": 10, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -42.14158630371094, "logits_per_token": -1.4463253974914552, "logits_per_char": -0.4132358278547015, "bits_per_byte": 0.5796129106872723, "num_chars": 35}, {"sum_logits": -17.694541931152344, "num_tokens": 10, "num_tokens_all": 278, "is_greedy": false, "sum_logits_uncond": -44.526893615722656, "logits_per_token": -1.7694541931152343, "logits_per_char": -0.505558340890067, "bits_per_byte": 0.7091063304137124, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 375, "native_id": "Mercury_SC_402612", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.413522720336914, "logits_per_token_corr": -2.7413522720336916, "logits_per_char_corr": -0.5594596473538146, "bits_per_byte_corr": 0.8071296588154164}, "model_output": [{"sum_logits": -23.960939407348633, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -34.06426239013672, "logits_per_token": -3.4229913439069475, "logits_per_char": -0.7487793564796448, "bits_per_byte": 1.080260264313989, "num_chars": 32}, {"sum_logits": -18.264873504638672, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.81122589111328, "logits_per_token": -2.6092676435198103, "logits_per_char": -0.5534810152920809, "bits_per_byte": 0.7985043159886474, "num_chars": 33}, {"sum_logits": -20.63610076904297, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.035743713378906, "logits_per_token": -2.948014395577567, "logits_per_char": -0.6069441402659697, "bits_per_byte": 0.8756353012589589, "num_chars": 34}, {"sum_logits": -27.413522720336914, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -48.74095153808594, "logits_per_token": -2.7413522720336916, "logits_per_char": -0.5594596473538146, "bits_per_byte": 0.8071296588154164, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 376, "native_id": "Mercury_SC_405937", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.300460815429688, "logits_per_token_corr": -2.325115203857422, "logits_per_char_corr": -0.3207055453596444, "bits_per_byte_corr": 0.4626802998762819}, "model_output": [{"sum_logits": -23.320646286010742, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -40.0672721862793, "logits_per_token": -3.8867743810017905, "logits_per_char": -0.6859013613532571, "bits_per_byte": 0.9895464925640438, "num_chars": 34}, {"sum_logits": -9.300460815429688, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -26.96239471435547, "logits_per_token": -2.325115203857422, "logits_per_char": -0.3207055453596444, "bits_per_byte": 0.4626802998762819, "num_chars": 29}, {"sum_logits": -10.966538429260254, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -26.886240005493164, "logits_per_token": -2.1933076858520506, "logits_per_char": -0.3046260674794515, "bits_per_byte": 0.4394825168784271, "num_chars": 36}, {"sum_logits": -24.96970558166504, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.21821594238281, "logits_per_token": -4.993941116333008, "logits_per_char": -0.6090172093089034, "bits_per_byte": 0.878626107686622, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 377, "native_id": "Mercury_SC_416459", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.46385383605957, "logits_per_token_corr": -6.731926918029785, "logits_per_char_corr": -1.346385383605957, "bits_per_byte_corr": 1.9424235160550942}, "model_output": [{"sum_logits": -9.228655815124512, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.067554473876953, "logits_per_token": -3.076218605041504, "logits_per_char": -1.3183794021606445, "bits_per_byte": 1.9020194255086846, "num_chars": 7}, {"sum_logits": -15.911696434020996, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.329957962036133, "logits_per_token": -3.977924108505249, "logits_per_char": -1.9889620542526245, "bits_per_byte": 2.869465692188648, "num_chars": 8}, {"sum_logits": -13.46385383605957, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.810829162597656, "logits_per_token": -6.731926918029785, "logits_per_char": -1.346385383605957, "bits_per_byte": 1.9424235160550942, "num_chars": 10}, {"sum_logits": -10.593957901000977, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.115394592285156, "logits_per_token": -5.296978950500488, "logits_per_char": -0.7062638600667318, "bits_per_byte": 1.0189233684781027, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 378, "native_id": "NAEP_2000_8_S21+4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.358856201171875, "logits_per_token_corr": -1.613238016764323, "logits_per_char_corr": -0.3652614377579599, "bits_per_byte_corr": 0.5269608648817601}, "model_output": [{"sum_logits": -6.154768466949463, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -14.988409042358398, "logits_per_token": -3.0773842334747314, "logits_per_char": -0.41031789779663086, "bits_per_byte": 0.5919635963396092, "num_chars": 15}, {"sum_logits": -7.604063510894775, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -16.098846435546875, "logits_per_token": -1.520812702178955, "logits_per_char": -0.34563925049521704, "bits_per_byte": 0.4986520326263861, "num_chars": 22}, {"sum_logits": -19.358856201171875, "num_tokens": 12, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -36.53599548339844, "logits_per_token": -1.613238016764323, "logits_per_char": -0.3652614377579599, "bits_per_byte": 0.5269608648817601, "num_chars": 53}, {"sum_logits": -16.817249298095703, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.562824249267578, "logits_per_token": -1.6817249298095702, "logits_per_char": -0.30030802318028044, "bits_per_byte": 0.4332528957816697, "num_chars": 56}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 379, "native_id": "Mercury_7072380", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.9051477909088135, "logits_per_token_corr": -1.9525738954544067, "logits_per_char_corr": -0.6508579651514689, "bits_per_byte_corr": 0.9389895586477806}, "model_output": [{"sum_logits": -6.914876937866211, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.564711570739746, "logits_per_token": -3.4574384689331055, "logits_per_char": -0.8643596172332764, "bits_per_byte": 1.2470073333280263, "num_chars": 8}, {"sum_logits": -3.9051477909088135, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.23455238342285, "logits_per_token": -1.9525738954544067, "logits_per_char": -0.6508579651514689, "bits_per_byte": 0.9389895586477806, "num_chars": 6}, {"sum_logits": -10.338186264038086, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.486549377441406, "logits_per_token": -5.169093132019043, "logits_per_char": -1.4768837520054408, "bits_per_byte": 2.130692864989266, "num_chars": 7}, {"sum_logits": -5.768912315368652, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -14.53095817565918, "logits_per_token": -2.884456157684326, "logits_per_char": -0.7211140394210815, "bits_per_byte": 1.0403476485889502, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 380, "native_id": "Mercury_SC_401373", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.238183975219727, "logits_per_token_corr": -2.4709093305799694, "logits_per_char_corr": -0.5423947311029201, "bits_per_byte_corr": 0.7825101887670478}, "model_output": [{"sum_logits": -22.238183975219727, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.082801818847656, "logits_per_token": -2.4709093305799694, "logits_per_char": -0.5423947311029201, "bits_per_byte": 0.7825101887670478, "num_chars": 41}, {"sum_logits": -35.424888610839844, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -44.27742004394531, "logits_per_token": -3.9360987345377603, "logits_per_char": -0.9322339108115748, "bits_per_byte": 1.3449292400773494, "num_chars": 38}, {"sum_logits": -14.804475784301758, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -30.42316436767578, "logits_per_token": -1.8505594730377197, "logits_per_char": -0.40012096714329076, "bits_per_byte": 0.5772525350537362, "num_chars": 37}, {"sum_logits": -33.54096984863281, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -42.6078987121582, "logits_per_token": -3.726774427625868, "logits_per_char": -0.7136376563538896, "bits_per_byte": 1.0295615078141185, "num_chars": 47}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 381, "native_id": "Mercury_SC_400579", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.857447624206543, "logits_per_token_corr": -2.7143619060516357, "logits_per_char_corr": -0.4342979049682617, "bits_per_byte_corr": 0.6265594337666276}, "model_output": [{"sum_logits": -8.909128189086914, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.759235382080078, "logits_per_token": -2.9697093963623047, "logits_per_char": -0.6853175530066857, "bits_per_byte": 0.9887042351576152, "num_chars": 13}, {"sum_logits": -25.84619140625, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -29.36208724975586, "logits_per_token": -6.4615478515625, "logits_per_char": -1.2307710193452381, "bits_per_byte": 1.7756272460805052, "num_chars": 21}, {"sum_logits": -10.857447624206543, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -26.79152488708496, "logits_per_token": -2.7143619060516357, "logits_per_char": -0.4342979049682617, "bits_per_byte": 0.6265594337666276, "num_chars": 25}, {"sum_logits": -18.290210723876953, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -27.056819915771484, "logits_per_token": -4.572552680969238, "logits_per_char": -0.6096736907958984, "bits_per_byte": 0.8795732102723458, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 382, "native_id": "MCAS_2003_5_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.004645347595215, "logits_per_token_corr": -1.4004645347595215, "logits_per_char_corr": -0.31121434105767143, "bits_per_byte_corr": 0.44898738649775166}, "model_output": [{"sum_logits": -11.073301315307617, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.348102569580078, "logits_per_token": -1.3841626644134521, "logits_per_char": -0.2914026661923057, "bits_per_byte": 0.42040518141776345, "num_chars": 38}, {"sum_logits": -16.584766387939453, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -34.71786117553711, "logits_per_token": -1.8427518208821614, "logits_per_char": -0.43644122073524877, "bits_per_byte": 0.6296515847947212, "num_chars": 38}, {"sum_logits": -14.004645347595215, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -32.436317443847656, "logits_per_token": -1.4004645347595215, "logits_per_char": -0.31121434105767143, "bits_per_byte": 0.44898738649775166, "num_chars": 45}, {"sum_logits": -12.088749885559082, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.420001983642578, "logits_per_token": -1.5110937356948853, "logits_per_char": -0.3181249969883969, "bits_per_byte": 0.45895735553830636, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 383, "native_id": "MSA_2015_8_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.92430877685547, "logits_per_token_corr": -1.991589864095052, "logits_per_char_corr": -0.5121231079101562, "bits_per_byte_corr": 0.7388374681071567}, "model_output": [{"sum_logits": -17.92430877685547, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -37.19586944580078, "logits_per_token": -1.991589864095052, "logits_per_char": -0.5121231079101562, "bits_per_byte": 0.7388374681071567, "num_chars": 35}, {"sum_logits": -22.383281707763672, "num_tokens": 10, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -37.45439147949219, "logits_per_token": -2.2383281707763674, "logits_per_char": -0.6583318149342257, "bits_per_byte": 0.9497720446657205, "num_chars": 34}, {"sum_logits": -20.01882553100586, "num_tokens": 11, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -37.38124465942383, "logits_per_token": -1.8198932300914417, "logits_per_char": -0.5004706382751465, "bits_per_byte": 0.7220265079506069, "num_chars": 40}, {"sum_logits": -24.153221130371094, "num_tokens": 10, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -37.84939956665039, "logits_per_token": -2.415322113037109, "logits_per_char": -0.5367382473415798, "bits_per_byte": 0.7743496076956874, "num_chars": 45}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 384, "native_id": "Mercury_SC_415416", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.06089782714844, "logits_per_token_corr": -3.673433091905382, "logits_per_char_corr": -0.8063633616377668, "bits_per_byte_corr": 1.1356379367285243}, "model_output": [{"sum_logits": -29.252023696899414, "num_tokens": 8, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.8807373046875, "logits_per_token": -3.6565029621124268, "logits_per_char": -0.7905952350513356, "bits_per_byte": 1.1405878249598256, "num_chars": 37}, {"sum_logits": -34.634124755859375, "num_tokens": 12, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -39.219215393066406, "logits_per_token": -2.8861770629882812, "logits_per_char": -0.6660408606896033, "bits_per_byte": 0.942763774166866, "num_chars": 52}, {"sum_logits": -33.06089782714844, "num_tokens": 9, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -35.2050895690918, "logits_per_token": -3.673433091905382, "logits_per_char": -0.8063633616377668, "bits_per_byte": 1.1356379367285243, "num_chars": 41}, {"sum_logits": -29.677080154418945, "num_tokens": 11, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -35.32498550415039, "logits_per_token": -2.6979163776744497, "logits_per_char": -0.5935416030883789, "bits_per_byte": 0.8562995273375049, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 385, "native_id": "NYSEDREGENTS_2012_8_42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -1.1955223083496094, "logits_per_token_corr": -1.1955223083496094, "logits_per_char_corr": -0.13283581203884548, "bits_per_byte_corr": 0.1916415672810385}, "model_output": [{"sum_logits": -0.7819194197654724, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": true, "sum_logits_uncond": -13.385931015014648, "logits_per_token": -0.7819194197654724, "logits_per_char": -0.08687993552949694, "bits_per_byte": 0.12534125214124814, "num_chars": 9}, {"sum_logits": -1.1955223083496094, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -11.988180160522461, "logits_per_token": -1.1955223083496094, "logits_per_char": -0.13283581203884548, "bits_per_byte": 0.1916415672810385, "num_chars": 9}, {"sum_logits": -6.631292343139648, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.153175354003906, "logits_per_token": -2.2104307810465493, "logits_per_char": -0.414455771446228, "bits_per_byte": 0.5979332861337124, "num_chars": 16}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 386, "native_id": "NCEOGA_2013_5_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.389535903930664, "logits_per_token_corr": -3.4631786346435547, "logits_per_char_corr": -0.5771964391072592, "bits_per_byte_corr": 0.8327184403194096}, "model_output": [{"sum_logits": -7.777006149291992, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -19.051870346069336, "logits_per_token": -3.888503074645996, "logits_per_char": -0.43205589718288845, "bits_per_byte": 0.6233249002530329, "num_chars": 18}, {"sum_logits": -10.389535903930664, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.467514038085938, "logits_per_token": -3.4631786346435547, "logits_per_char": -0.5771964391072592, "bits_per_byte": 0.8327184403194096, "num_chars": 18}, {"sum_logits": -13.39389419555664, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -20.938383102416992, "logits_per_token": -6.69694709777832, "logits_per_char": -0.7878761291503906, "bits_per_byte": 1.1366649843608776, "num_chars": 17}, {"sum_logits": -8.29804515838623, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -20.957012176513672, "logits_per_token": -4.149022579193115, "logits_per_char": -0.46100250879923504, "bits_per_byte": 0.6650860332825049, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 387, "native_id": "MEAP_2005_8_45", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.76894760131836, "logits_per_token_corr": -1.6153588728471235, "logits_per_char_corr": -0.34171053079458386, "bits_per_byte_corr": 0.4929840881972357}, "model_output": [{"sum_logits": -12.756909370422363, "num_tokens": 7, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -31.677465438842773, "logits_per_token": -1.8224156243460519, "logits_per_char": -0.34478133433573954, "bits_per_byte": 0.4974143212376085, "num_chars": 37}, {"sum_logits": -13.813535690307617, "num_tokens": 9, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -41.206878662109375, "logits_per_token": -1.5348372989230685, "logits_per_char": -0.3069674597846137, "bits_per_byte": 0.44286043194586266, "num_chars": 45}, {"sum_logits": -15.620532989501953, "num_tokens": 9, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -38.53540802001953, "logits_per_token": -1.7356147766113281, "logits_per_char": -0.31241065979003907, "bits_per_byte": 0.45071330960026224, "num_chars": 50}, {"sum_logits": -17.76894760131836, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -42.32838821411133, "logits_per_token": -1.6153588728471235, "logits_per_char": -0.34171053079458386, "bits_per_byte": 0.4929840881972357, "num_chars": 52}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 388, "native_id": "Mercury_SC_400594", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.568866729736328, "logits_per_token_corr": -2.313773345947266, "logits_per_char_corr": -0.42847654554578996, "bits_per_byte_corr": 0.6181609873965893}, "model_output": [{"sum_logits": -12.860492706298828, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -22.255252838134766, "logits_per_token": -3.215123176574707, "logits_per_char": -0.9186066218784877, "bits_per_byte": 1.3252692179128096, "num_chars": 14}, {"sum_logits": -11.954463958740234, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -20.03868865966797, "logits_per_token": -2.9886159896850586, "logits_per_char": -0.7471539974212646, "bits_per_byte": 1.0779153668607984, "num_chars": 16}, {"sum_logits": -12.084161758422852, "num_tokens": 5, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -27.04434585571289, "logits_per_token": -2.4168323516845702, "logits_per_char": -0.44756154660825376, "bits_per_byte": 0.6456948237847863, "num_chars": 27}, {"sum_logits": -11.568866729736328, "num_tokens": 5, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -24.480220794677734, "logits_per_token": -2.313773345947266, "logits_per_char": -0.42847654554578996, "bits_per_byte": 0.6181609873965893, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 389, "native_id": "NCEOGA_2013_8_43", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.02737045288086, "logits_per_token_corr": -1.716240746634347, "logits_per_char_corr": -0.3384136683504346, "bits_per_byte_corr": 0.48822772109856516}, "model_output": [{"sum_logits": -24.02737045288086, "num_tokens": 14, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.46078872680664, "logits_per_token": -1.716240746634347, "logits_per_char": -0.3384136683504346, "bits_per_byte": 0.48822772109856516, "num_chars": 71}, {"sum_logits": -22.136062622070312, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -31.923625946044922, "logits_per_token": -2.459562513563368, "logits_per_char": -0.45175638004225127, "bits_per_byte": 0.651746689177374, "num_chars": 49}, {"sum_logits": -25.64678955078125, "num_tokens": 13, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -33.29030227661133, "logits_per_token": -1.9728299654447115, "logits_per_char": -0.38278790374300375, "bits_per_byte": 0.5522462104427102, "num_chars": 67}, {"sum_logits": -22.5843505859375, "num_tokens": 12, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -33.50815963745117, "logits_per_token": -1.8820292154947917, "logits_per_char": -0.3474515474759615, "bits_per_byte": 0.5012666244931261, "num_chars": 65}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 390, "native_id": "MCAS_2006_8_13", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.184280395507812, "logits_per_token_corr": -1.5184280395507812, "logits_per_char_corr": -0.3796070098876953, "bits_per_byte_corr": 0.5476571506520591}, "model_output": [{"sum_logits": -21.736377716064453, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -40.818084716796875, "logits_per_token": -2.415153079562717, "logits_per_char": -0.5434094429016113, "bits_per_byte": 0.7839741084469521, "num_chars": 40}, {"sum_logits": -23.406505584716797, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -37.496063232421875, "logits_per_token": -2.6007228427463107, "logits_per_char": -0.5572977520170665, "bits_per_byte": 0.8040107031341669, "num_chars": 42}, {"sum_logits": -15.184280395507812, "num_tokens": 10, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.452232360839844, "logits_per_token": -1.5184280395507812, "logits_per_char": -0.3796070098876953, "bits_per_byte": 0.5476571506520591, "num_chars": 40}, {"sum_logits": -22.186325073242188, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.032249450683594, "logits_per_token": -2.465147230360243, "logits_per_char": -0.672312881007339, "bits_per_byte": 0.9699424593557565, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 391, "native_id": "Mercury_7168823", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.321178436279297, "logits_per_token_corr": -3.4801309373643665, "logits_per_char_corr": -0.7830294609069824, "bits_per_byte_corr": 1.1296727201212735}, "model_output": [{"sum_logits": -20.224124908447266, "num_tokens": 8, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -30.258766174316406, "logits_per_token": -2.528015613555908, "logits_per_char": -0.5185673053448017, "bits_per_byte": 0.7481344797886357, "num_chars": 39}, {"sum_logits": -12.683062553405762, "num_tokens": 7, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -23.7745418548584, "logits_per_token": -1.811866079057966, "logits_per_char": -0.31707656383514404, "bits_per_byte": 0.4574447862274038, "num_chars": 40}, {"sum_logits": -23.587493896484375, "num_tokens": 8, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -44.78626251220703, "logits_per_token": -2.948436737060547, "logits_per_char": -0.5753047291825457, "bits_per_byte": 0.8299892797922231, "num_chars": 41}, {"sum_logits": -31.321178436279297, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -44.59305953979492, "logits_per_token": -3.4801309373643665, "logits_per_char": -0.7830294609069824, "bits_per_byte": 1.1296727201212735, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 392, "native_id": "Mercury_7158935", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.66586685180664, "logits_per_token_corr": -2.9443111419677734, "logits_per_char_corr": -0.5195843191707835, "bits_per_byte_corr": 0.7496017205918962}, "model_output": [{"sum_logits": -11.952163696289062, "num_tokens": 6, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -31.787761688232422, "logits_per_token": -1.9920272827148438, "logits_per_char": -0.36218677867542615, "bits_per_byte": 0.5225250694709612, "num_chars": 33}, {"sum_logits": -17.37125015258789, "num_tokens": 7, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -36.02149963378906, "logits_per_token": -2.4816071646554128, "logits_per_char": -0.4963214329310826, "bits_per_byte": 0.7160404699770916, "num_chars": 35}, {"sum_logits": -17.66586685180664, "num_tokens": 6, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -35.3392448425293, "logits_per_token": -2.9443111419677734, "logits_per_char": -0.5195843191707835, "bits_per_byte": 0.7496017205918962, "num_chars": 34}, {"sum_logits": -21.399991989135742, "num_tokens": 7, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -37.78729248046875, "logits_per_token": -3.0571417127336775, "logits_per_char": -0.5487177433111728, "bits_per_byte": 0.7916323671233809, "num_chars": 39}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 393, "native_id": "Mercury_7172708", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.6491289138793945, "logits_per_token_corr": -1.6622822284698486, "logits_per_char_corr": -0.41557055711746216, "bits_per_byte_corr": 0.5995415818932571}, "model_output": [{"sum_logits": -8.787914276123047, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.026029586791992, "logits_per_token": -2.1969785690307617, "logits_per_char": -0.5492446422576904, "bits_per_byte": 0.792392521620572, "num_chars": 16}, {"sum_logits": -6.6491289138793945, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.75766372680664, "logits_per_token": -1.6622822284698486, "logits_per_char": -0.41557055711746216, "bits_per_byte": 0.5995415818932571, "num_chars": 16}, {"sum_logits": -14.042952537536621, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -23.284568786621094, "logits_per_token": -3.5107381343841553, "logits_per_char": -0.8776845335960388, "bits_per_byte": 1.2662311240848576, "num_chars": 16}, {"sum_logits": -12.861934661865234, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -25.511112213134766, "logits_per_token": -3.2154836654663086, "logits_per_char": -0.6430967330932618, "bits_per_byte": 0.9277924676462086, "num_chars": 20}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 394, "native_id": "ACTAAP_2010_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.341209411621094, "logits_per_token_corr": -1.8617674509684246, "logits_per_char_corr": -0.4380629296396293, "bits_per_byte_corr": 0.6319912161888381}, "model_output": [{"sum_logits": -22.341209411621094, "num_tokens": 12, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -49.3102912902832, "logits_per_token": -1.8617674509684246, "logits_per_char": -0.4380629296396293, "bits_per_byte": 0.6319912161888381, "num_chars": 51}, {"sum_logits": -35.29743576049805, "num_tokens": 11, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -50.83946228027344, "logits_per_token": -3.2088577964089136, "logits_per_char": -0.6659893539716613, "bits_per_byte": 0.9608195382604504, "num_chars": 53}, {"sum_logits": -24.19140625, "num_tokens": 13, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -51.950584411621094, "logits_per_token": -1.8608774038461537, "logits_per_char": -0.3839905753968254, "bits_per_byte": 0.5539812988734976, "num_chars": 63}, {"sum_logits": -32.00843811035156, "num_tokens": 14, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -46.74653625488281, "logits_per_token": -2.2863170078822543, "logits_per_char": -0.4572634015764509, "bits_per_byte": 0.6596916418348383, "num_chars": 70}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 395, "native_id": "Mercury_7093048", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.094205856323242, "logits_per_token_corr": -2.8188411712646486, "logits_per_char_corr": -0.5220076243082682, "bits_per_byte_corr": 0.7530978108963087}, "model_output": [{"sum_logits": -9.574271202087402, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.12948226928711, "logits_per_token": -4.787135601043701, "logits_per_char": -0.9574271202087402, "bits_per_byte": 1.3812753583387434, "num_chars": 10}, {"sum_logits": -9.662458419799805, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.71380043029785, "logits_per_token": -3.2208194732666016, "logits_per_char": -0.4831229209899902, "bits_per_byte": 0.6969990422525502, "num_chars": 20}, {"sum_logits": -14.094205856323242, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -30.105989456176758, "logits_per_token": -2.8188411712646486, "logits_per_char": -0.5220076243082682, "bits_per_byte": 0.7530978108963087, "num_chars": 27}, {"sum_logits": -16.340181350708008, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -27.70608139038086, "logits_per_token": -2.7233635584513345, "logits_per_char": -0.5106306672096252, "bits_per_byte": 0.7366843313096783, "num_chars": 32}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 396, "native_id": "Mercury_7081603", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.14590835571289, "logits_per_token_corr": -2.8314462141557173, "logits_per_char_corr": -0.5105886615690638, "bits_per_byte_corr": 0.7366237299803509}, "model_output": [{"sum_logits": -24.11743927001953, "num_tokens": 9, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -21.73569107055664, "logits_per_token": -2.6797154744466147, "logits_per_char": -0.7308314930308949, "bits_per_byte": 1.0543669707219065, "num_chars": 33}, {"sum_logits": -22.68682098388672, "num_tokens": 10, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -30.913982391357422, "logits_per_token": -2.268682098388672, "logits_per_char": -0.45373641967773437, "bits_per_byte": 0.6546032825402511, "num_chars": 50}, {"sum_logits": -31.14590835571289, "num_tokens": 11, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -33.651695251464844, "logits_per_token": -2.8314462141557173, "logits_per_char": -0.5105886615690638, "bits_per_byte": 0.7366237299803509, "num_chars": 61}, {"sum_logits": -32.52638244628906, "num_tokens": 13, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -35.84080505371094, "logits_per_token": -2.5020294189453125, "logits_per_char": -0.5246190717143397, "bits_per_byte": 0.7568653331185932, "num_chars": 62}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 397, "native_id": "Mercury_SC_LBS11003", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.465343475341797, "logits_per_token_corr": -2.8294826083713107, "logits_per_char_corr": -0.5418158186242935, "bits_per_byte_corr": 0.7816749946050239}, "model_output": [{"sum_logits": -34.21311950683594, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -48.41462707519531, "logits_per_token": -4.276639938354492, "logits_per_char": -0.7279387129114029, "bits_per_byte": 1.0501935711891304, "num_chars": 47}, {"sum_logits": -20.96676254272461, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.020809173583984, "logits_per_token": -2.620845317840576, "logits_per_char": -0.5113844522615758, "bits_per_byte": 0.7377718132660243, "num_chars": 41}, {"sum_logits": -21.92479705810547, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.926437377929688, "logits_per_token": -3.1321138654436385, "logits_per_char": -0.6448469722972197, "bits_per_byte": 0.93031752906613, "num_chars": 34}, {"sum_logits": -25.465343475341797, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.23554229736328, "logits_per_token": -2.8294826083713107, "logits_per_char": -0.5418158186242935, "bits_per_byte": 0.7816749946050239, "num_chars": 47}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 398, "native_id": "MCAS_2005_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.938806533813477, "logits_per_token_corr": -1.0782338778177898, "logits_per_char_corr": -0.2053778814891028, "bits_per_byte_corr": 0.2962976511328227}, "model_output": [{"sum_logits": -11.324339866638184, "num_tokens": 10, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.535465240478516, "logits_per_token": -1.1324339866638184, "logits_per_char": -0.23110897687016702, "bits_per_byte": 0.33341977483575164, "num_chars": 49}, {"sum_logits": -10.50459098815918, "num_tokens": 10, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -32.323184967041016, "logits_per_token": -1.050459098815918, "logits_per_char": -0.21009181976318358, "bits_per_byte": 0.3030984265039006, "num_chars": 50}, {"sum_logits": -12.938806533813477, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.700477600097656, "logits_per_token": -1.0782338778177898, "logits_per_char": -0.2053778814891028, "bits_per_byte": 0.2962976511328227, "num_chars": 63}, {"sum_logits": -14.6986083984375, "num_tokens": 13, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.51238250732422, "logits_per_token": -1.1306621844951923, "logits_per_char": -0.22613243689903847, "bits_per_byte": 0.3262401452986136, "num_chars": 65}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 399, "native_id": "ACTAAP_2010_7_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.161287307739258, "logits_per_token_corr": -1.935483639056866, "logits_per_char_corr": -0.4574779510498047, "bits_per_byte_corr": 0.6600011712960714}, "model_output": [{"sum_logits": -32.0863151550293, "num_tokens": 11, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -40.25574493408203, "logits_per_token": -2.9169377413663, "logits_per_char": -0.6291434344123391, "bits_per_byte": 0.9076621128351845, "num_chars": 51}, {"sum_logits": -32.23903274536133, "num_tokens": 11, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -38.67567443847656, "logits_per_token": -2.9308211586692114, "logits_per_char": -0.6199813989492563, "bits_per_byte": 0.8944440897081366, "num_chars": 52}, {"sum_logits": -25.161287307739258, "num_tokens": 13, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -35.104026794433594, "logits_per_token": -1.935483639056866, "logits_per_char": -0.4574779510498047, "bits_per_byte": 0.6600011712960714, "num_chars": 55}, {"sum_logits": -27.27825355529785, "num_tokens": 12, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -37.3423957824707, "logits_per_token": -2.273187796274821, "logits_per_char": -0.4871116706303188, "bits_per_byte": 0.7027535915780039, "num_chars": 56}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 400, "native_id": "NYSEDREGENTS_2008_4_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -1.9118447303771973, "logits_per_token_corr": -1.9118447303771973, "logits_per_char_corr": -0.3186407883961995, "bits_per_byte_corr": 0.4597014852444769}, "model_output": [{"sum_logits": -3.8034729957580566, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -11.783369064331055, "logits_per_token": -3.8034729957580566, "logits_per_char": -0.9508682489395142, "bits_per_byte": 1.3718129072847949, "num_chars": 4}, {"sum_logits": -2.035940647125244, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -13.446318626403809, "logits_per_token": -2.035940647125244, "logits_per_char": -0.2908486638750349, "bits_per_byte": 0.4196059250219953, "num_chars": 7}, {"sum_logits": -1.9118447303771973, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": true, "sum_logits_uncond": -13.013647079467773, "logits_per_token": -1.9118447303771973, "logits_per_char": -0.3186407883961995, "bits_per_byte": 0.4597014852444769, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 401, "native_id": "Mercury_7107240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.630268096923828, "logits_per_token_corr": -3.3260536193847656, "logits_per_char_corr": -0.5543422698974609, "bits_per_byte_corr": 0.7997468437367727}, "model_output": [{"sum_logits": -16.630268096923828, "num_tokens": 5, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -29.78818130493164, "logits_per_token": -3.3260536193847656, "logits_per_char": -0.5543422698974609, "bits_per_byte": 0.7997468437367727, "num_chars": 30}, {"sum_logits": -19.15499496459961, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -28.801387786865234, "logits_per_token": -3.1924991607666016, "logits_per_char": -0.47887487411499025, "bits_per_byte": 0.6908704060925194, "num_chars": 40}, {"sum_logits": -34.850982666015625, "num_tokens": 9, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -48.422821044921875, "logits_per_token": -3.8723314073350696, "logits_per_char": -0.6970196533203125, "bits_per_byte": 1.0055867972480816, "num_chars": 50}, {"sum_logits": -37.26767349243164, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -48.561187744140625, "logits_per_token": -2.866744114802434, "logits_per_char": -0.5733488229604867, "bits_per_byte": 0.8271675035852127, "num_chars": 65}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 402, "native_id": "Mercury_7218628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.87507438659668, "logits_per_token_corr": -2.8750082651774087, "logits_per_char_corr": -0.6017459159673646, "bits_per_byte_corr": 0.8681358488419275}, "model_output": [{"sum_logits": -28.848905563354492, "num_tokens": 10, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -32.5244255065918, "logits_per_token": -2.884890556335449, "logits_per_char": -0.5769781112670899, "bits_per_byte": 0.8324034598271092, "num_chars": 50}, {"sum_logits": -25.50751304626465, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -34.16288757324219, "logits_per_token": -2.8341681162516275, "logits_per_char": -0.6540387960580679, "bits_per_byte": 0.9435785276226406, "num_chars": 39}, {"sum_logits": -25.87507438659668, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -31.27726936340332, "logits_per_token": -2.8750082651774087, "logits_per_char": -0.6017459159673646, "bits_per_byte": 0.8681358488419275, "num_chars": 43}, {"sum_logits": -36.7808952331543, "num_tokens": 11, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -41.51302719116211, "logits_per_token": -3.3437177484685723, "logits_per_char": -0.7662686506907145, "bits_per_byte": 1.1054919823409655, "num_chars": 48}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 403, "native_id": "MSA_2013_5_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.628210067749023, "logits_per_token_corr": -3.578526258468628, "logits_per_char_corr": -0.6091108525052984, "bits_per_byte_corr": 0.8787612062616742}, "model_output": [{"sum_logits": -14.12321662902832, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -26.458324432373047, "logits_per_token": -2.3538694381713867, "logits_per_char": -0.441350519657135, "bits_per_byte": 0.6367342060035731, "num_chars": 32}, {"sum_logits": -12.361115455627441, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -29.536540985107422, "logits_per_token": -2.472223091125488, "logits_per_char": -0.33408420150344437, "bits_per_byte": 0.48198162074871465, "num_chars": 37}, {"sum_logits": -15.112030029296875, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.70538902282715, "logits_per_token": -2.1588614327566966, "logits_per_char": -0.3976850007709704, "bits_per_byte": 0.5737381784486147, "num_chars": 38}, {"sum_logits": -28.628210067749023, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -40.67326354980469, "logits_per_token": -3.578526258468628, "logits_per_char": -0.6091108525052984, "bits_per_byte": 0.8787612062616742, "num_chars": 47}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 404, "native_id": "Mercury_7081725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.622081756591797, "logits_per_token_corr": -6.311040878295898, "logits_per_char_corr": -0.5259200731913248, "bits_per_byte_corr": 0.7587422814976301}, "model_output": [{"sum_logits": -12.622081756591797, "num_tokens": 2, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -21.031484603881836, "logits_per_token": -6.311040878295898, "logits_per_char": -0.5259200731913248, "bits_per_byte": 0.7587422814976301, "num_chars": 24}, {"sum_logits": -20.395545959472656, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -36.251930236816406, "logits_per_token": -5.098886489868164, "logits_per_char": -0.6798515319824219, "bits_per_byte": 0.9808184337325092, "num_chars": 30}, {"sum_logits": -15.254622459411621, "num_tokens": 5, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -27.954303741455078, "logits_per_token": -3.0509244918823244, "logits_per_char": -0.448665366453283, "bits_per_byte": 0.6472872992012458, "num_chars": 34}, {"sum_logits": -30.461341857910156, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -32.03221130371094, "logits_per_token": -7.615335464477539, "logits_per_char": -0.8703240530831473, "bits_per_byte": 1.2556121953503416, "num_chars": 35}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 405, "native_id": "Mercury_SC_413542", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.70936584472656, "logits_per_token_corr": -3.745485093858507, "logits_per_char_corr": -0.7839387405750363, "bits_per_byte_corr": 1.130984533389157}, "model_output": [{"sum_logits": -33.70936584472656, "num_tokens": 9, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -43.914703369140625, "logits_per_token": -3.745485093858507, "logits_per_char": -0.7839387405750363, "bits_per_byte": 1.130984533389157, "num_chars": 43}, {"sum_logits": -30.91838836669922, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -34.92662811279297, "logits_per_token": -3.8647985458374023, "logits_per_char": -0.736152103969029, "bits_per_byte": 1.0620429897368577, "num_chars": 42}, {"sum_logits": -42.218849182128906, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -47.29331970214844, "logits_per_token": -5.277356147766113, "logits_per_char": -0.9178010691767153, "bits_per_byte": 1.324107051024787, "num_chars": 46}, {"sum_logits": -23.23411750793457, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -36.30667495727539, "logits_per_token": -2.9042646884918213, "logits_per_char": -0.5666857928764529, "bits_per_byte": 0.8175547831256763, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 406, "native_id": "Mercury_SC_407302", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.082887649536133, "logits_per_token_corr": -2.7261268070765903, "logits_per_char_corr": -0.5157537202577334, "bits_per_byte_corr": 0.7440753345364002}, "model_output": [{"sum_logits": -12.881731033325195, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.36637496948242, "logits_per_token": -2.576346206665039, "logits_per_char": -0.49545119358943057, "bits_per_byte": 0.7147849799945027, "num_chars": 26}, {"sum_logits": -21.93480682373047, "num_tokens": 6, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -38.17946243286133, "logits_per_token": -3.6558011372884116, "logits_per_char": -0.6093001895480685, "bits_per_byte": 0.8790343618743354, "num_chars": 36}, {"sum_logits": -19.082887649536133, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -46.63331604003906, "logits_per_token": -2.7261268070765903, "logits_per_char": -0.5157537202577334, "bits_per_byte": 0.7440753345364002, "num_chars": 37}, {"sum_logits": -25.43724250793457, "num_tokens": 12, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -41.875946044921875, "logits_per_token": -2.1197702089945474, "logits_per_char": -0.47994797184782206, "bits_per_byte": 0.6924185588700662, "num_chars": 53}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 407, "native_id": "Mercury_175053", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.666072845458984, "logits_per_token_corr": -2.888690948486328, "logits_per_char_corr": -0.6190052032470703, "bits_per_byte_corr": 0.8930357370096548}, "model_output": [{"sum_logits": -6.88535213470459, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -18.939292907714844, "logits_per_token": -2.2951173782348633, "logits_per_char": -0.43033450841903687, "bits_per_byte": 0.6208414612199804, "num_chars": 16}, {"sum_logits": -5.907809257507324, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -18.16351890563965, "logits_per_token": -1.9692697525024414, "logits_per_char": -0.42198637553623747, "bits_per_byte": 0.608797651309275, "num_chars": 14}, {"sum_logits": -8.987364768981934, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -18.04732894897461, "logits_per_token": -2.995788256327311, "logits_per_char": -0.6419546263558524, "bits_per_byte": 0.9261447559199811, "num_chars": 14}, {"sum_logits": -8.666072845458984, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -20.098468780517578, "logits_per_token": -2.888690948486328, "logits_per_char": -0.6190052032470703, "bits_per_byte": 0.8930357370096548, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 408, "native_id": "Mercury_7161315", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.410717010498047, "logits_per_token_corr": -2.926339626312256, "logits_per_char_corr": -0.6160715002762643, "bits_per_byte_corr": 0.8888032982822287}, "model_output": [{"sum_logits": -12.34007740020752, "num_tokens": 6, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -30.181041717529297, "logits_per_token": -2.0566795667012534, "logits_per_char": -0.41133591334025066, "bits_per_byte": 0.5934322823159384, "num_chars": 30}, {"sum_logits": -23.410717010498047, "num_tokens": 8, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -35.5305061340332, "logits_per_token": -2.926339626312256, "logits_per_char": -0.6160715002762643, "bits_per_byte": 0.8888032982822287, "num_chars": 38}, {"sum_logits": -25.551162719726562, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -35.11180114746094, "logits_per_token": -3.6501661028180803, "logits_per_char": -0.6723990189401727, "bits_per_byte": 0.9700667301242883, "num_chars": 38}, {"sum_logits": -21.6197509765625, "num_tokens": 8, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -33.18678283691406, "logits_per_token": -2.7024688720703125, "logits_per_char": -0.5404937744140625, "bits_per_byte": 0.7797676879790864, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 409, "native_id": "Mercury_189070", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.597665786743164, "logits_per_token_corr": -1.1775184207492404, "logits_per_char_corr": -0.246457343877748, "bits_per_byte_corr": 0.35556278780334843}, "model_output": [{"sum_logits": -10.597665786743164, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -33.366886138916016, "logits_per_token": -1.1775184207492404, "logits_per_char": -0.246457343877748, "bits_per_byte": 0.35556278780334843, "num_chars": 43}, {"sum_logits": -8.875755310058594, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -28.307687759399414, "logits_per_token": -0.9861950344509549, "logits_per_char": -0.18884585766082115, "bits_per_byte": 0.27244698233988546, "num_chars": 47}, {"sum_logits": -12.753549575805664, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -28.90764808654785, "logits_per_token": -1.4170610639784071, "logits_per_char": -0.29659417618152706, "bits_per_byte": 0.427894947133944, "num_chars": 43}, {"sum_logits": -27.038978576660156, "num_tokens": 14, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -42.319580078125, "logits_per_token": -1.9313556126185827, "logits_per_char": -0.5101694071067954, "bits_per_byte": 0.7223888945051584, "num_chars": 53}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 410, "native_id": "Mercury_7189123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.018043518066406, "logits_per_token_corr": -1.8198221380060369, "logits_per_char_corr": -0.3281646478371542, "bits_per_byte_corr": 0.4734415100300756}, "model_output": [{"sum_logits": -17.319717407226562, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -43.501827239990234, "logits_per_token": -2.1649646759033203, "logits_per_char": -0.4440953181340144, "bits_per_byte": 0.6406941131544095, "num_chars": 39}, {"sum_logits": -20.018043518066406, "num_tokens": 11, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -38.858192443847656, "logits_per_token": -1.8198221380060369, "logits_per_char": -0.3281646478371542, "bits_per_byte": 0.4734415100300756, "num_chars": 61}, {"sum_logits": -17.423622131347656, "num_tokens": 10, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.28993606567383, "logits_per_token": -1.7423622131347656, "logits_per_char": -0.3350696563720703, "bits_per_byte": 0.4834033316007022, "num_chars": 52}, {"sum_logits": -19.779491424560547, "num_tokens": 11, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -44.22532272338867, "logits_per_token": -1.7981355840509587, "logits_per_char": -0.35962711681019177, "bits_per_byte": 0.5188322579916324, "num_chars": 55}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 411, "native_id": "Mercury_SC_402171", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.808666229248047, "logits_per_token_corr": -2.200962914360894, "logits_per_char_corr": -0.5659618922642299, "bits_per_byte_corr": 0.8165104153023248}, "model_output": [{"sum_logits": -12.250141143798828, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -19.422439575195312, "logits_per_token": -3.062535285949707, "logits_per_char": -0.8166760762532552, "bits_per_byte": 1.1782145252240748, "num_chars": 15}, {"sum_logits": -19.585372924804688, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -32.95936584472656, "logits_per_token": -3.9170745849609374, "logits_per_char": -0.8515379532523777, "bits_per_byte": 1.228509582286826, "num_chars": 23}, {"sum_logits": -14.15870475769043, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -29.373313903808594, "logits_per_token": -2.831740951538086, "logits_per_char": -0.5243964725070529, "bits_per_byte": 0.7565441903461344, "num_chars": 27}, {"sum_logits": -19.808666229248047, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -39.051692962646484, "logits_per_token": -2.200962914360894, "logits_per_char": -0.5659618922642299, "bits_per_byte": 0.8165104153023248, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 412, "native_id": "Mercury_7217368", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.1346845626831055, "logits_per_token_corr": -2.0673422813415527, "logits_per_char_corr": -0.4134684562683105, "bits_per_byte_corr": 0.5965088914227353}, "model_output": [{"sum_logits": -5.362216949462891, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.60422134399414, "logits_per_token": -5.362216949462891, "logits_per_char": -0.8937028249104818, "bits_per_byte": 1.289340633527736, "num_chars": 6}, {"sum_logits": -4.1346845626831055, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.756603240966797, "logits_per_token": -2.0673422813415527, "logits_per_char": -0.4134684562683105, "bits_per_byte": 0.5965088914227353, "num_chars": 10}, {"sum_logits": -6.199580192565918, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.03858184814453, "logits_per_token": -3.099790096282959, "logits_per_char": -0.5635981993241743, "bits_per_byte": 0.81310032721952, "num_chars": 11}, {"sum_logits": -7.523494243621826, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.07782554626465, "logits_per_token": -3.761747121810913, "logits_per_char": -0.6839540221474387, "bits_per_byte": 0.986737075948879, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 413, "native_id": "Mercury_LBS10933", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.697873115539551, "logits_per_token_corr": -1.4244682788848877, "logits_per_char_corr": -0.5697873115539551, "bits_per_byte_corr": 0.8220293287409364}, "model_output": [{"sum_logits": -5.697873115539551, "num_tokens": 4, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -14.209173202514648, "logits_per_token": -1.4244682788848877, "logits_per_char": -0.5697873115539551, "bits_per_byte": 0.8220293287409364, "num_chars": 10}, {"sum_logits": -4.576475143432617, "num_tokens": 2, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -10.923490524291992, "logits_per_token": -2.2882375717163086, "logits_per_char": -0.5720593929290771, "bits_per_byte": 0.8253072492733234, "num_chars": 8}, {"sum_logits": -5.713067054748535, "num_tokens": 4, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -15.531494140625, "logits_per_token": -1.4282667636871338, "logits_per_char": -0.5713067054748535, "bits_per_byte": 0.824221350815775, "num_chars": 10}, {"sum_logits": -3.9766740798950195, "num_tokens": 2, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -10.822553634643555, "logits_per_token": -1.9883370399475098, "logits_per_char": -0.49708425998687744, "bits_per_byte": 0.7171409967875435, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 414, "native_id": "Mercury_7223160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.846893310546875, "logits_per_token_corr": -2.4066990443638394, "logits_per_char_corr": -0.45532144082559123, "bits_per_byte_corr": 0.65688998468997}, "model_output": [{"sum_logits": -3.784252643585205, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": true, "sum_logits_uncond": -22.35070037841797, "logits_per_token": -0.7568505287170411, "logits_per_char": -0.16453272363413934, "bits_per_byte": 0.2373705444510977, "num_chars": 23}, {"sum_logits": -5.08834981918335, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -22.603626251220703, "logits_per_token": -1.01766996383667, "logits_per_char": -0.19570576227628267, "bits_per_byte": 0.28234373270959023, "num_chars": 26}, {"sum_logits": -16.846893310546875, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.84886932373047, "logits_per_token": -2.4066990443638394, "logits_per_char": -0.45532144082559123, "bits_per_byte": 0.65688998468997, "num_chars": 37}, {"sum_logits": -19.27699851989746, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -38.63063049316406, "logits_per_token": -3.2128330866495767, "logits_per_char": -0.48192496299743653, "bits_per_byte": 0.6952707541974984, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 415, "native_id": "Mercury_SC_401324", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -29.33733367919922, "logits_per_token_corr": -2.6670303344726562, "logits_per_char_corr": -0.5867466735839844, "bits_per_byte_corr": 0.8464965162383178}, "model_output": [{"sum_logits": -20.190509796142578, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -29.319961547851562, "logits_per_token": -2.8843585423060825, "logits_per_char": -0.6309534311294556, "bits_per_byte": 0.9102733861229956, "num_chars": 32}, {"sum_logits": -27.118825912475586, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -41.157859802246094, "logits_per_token": -3.3898532390594482, "logits_per_char": -0.695354510576297, "bits_per_byte": 1.0031845040689167, "num_chars": 39}, {"sum_logits": -25.100177764892578, "num_tokens": 10, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -40.896846771240234, "logits_per_token": -2.5100177764892577, "logits_per_char": -0.6435943016639123, "bits_per_byte": 0.9285103073555889, "num_chars": 39}, {"sum_logits": -29.33733367919922, "num_tokens": 11, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -45.89837646484375, "logits_per_token": -2.6670303344726562, "logits_per_char": -0.5867466735839844, "bits_per_byte": 0.8464965162383178, "num_chars": 50}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 416, "native_id": "LEAP_2001_8_10379", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -35.94826126098633, "logits_per_token_corr": -3.2680237509987573, "logits_per_char_corr": -0.5063135388871314, "bits_per_byte_corr": 0.7304560316879305}, "model_output": [{"sum_logits": -35.94826126098633, "num_tokens": 11, "num_tokens_all": 261, "is_greedy": false, "sum_logits_uncond": -65.44284057617188, "logits_per_token": -3.2680237509987573, "logits_per_char": -0.5063135388871314, "bits_per_byte": 0.7304560316879305, "num_chars": 71}, {"sum_logits": -28.52004623413086, "num_tokens": 12, "num_tokens_all": 262, "is_greedy": false, "sum_logits_uncond": -46.06702423095703, "logits_per_token": -2.376670519510905, "logits_per_char": -0.46754174154312883, "bits_per_byte": 0.674520151933346, "num_chars": 61}, {"sum_logits": -36.716094970703125, "num_tokens": 18, "num_tokens_all": 268, "is_greedy": false, "sum_logits_uncond": -65.09283447265625, "logits_per_token": -2.0397830539279513, "logits_per_char": -0.38648521021792764, "bits_per_byte": 0.5575802961587333, "num_chars": 95}, {"sum_logits": -32.27582550048828, "num_tokens": 16, "num_tokens_all": 266, "is_greedy": false, "sum_logits_uncond": -49.00091552734375, "logits_per_token": -2.0172390937805176, "logits_per_char": -0.41916656494140625, "bits_per_byte": 0.6047295245478629, "num_chars": 77}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 417, "native_id": "VASoL_2009_5_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -29.33803939819336, "logits_per_token_corr": -4.889673233032227, "logits_per_char_corr": -1.0477871213640486, "bits_per_byte_corr": 1.5116372839003214}, "model_output": [{"sum_logits": -19.89134979248047, "num_tokens": 4, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -21.914989471435547, "logits_per_token": -4.972837448120117, "logits_per_char": -0.9472071329752604, "bits_per_byte": 1.3665310334390428, "num_chars": 21}, {"sum_logits": -23.676313400268555, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -25.795087814331055, "logits_per_token": -4.735262680053711, "logits_per_char": -1.1838156700134277, "bits_per_byte": 1.7078849964562446, "num_chars": 20}, {"sum_logits": -30.260923385620117, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -28.069658279418945, "logits_per_token": -5.0434872309366865, "logits_per_char": -1.1207749402081524, "bits_per_byte": 1.6169364481920876, "num_chars": 27}, {"sum_logits": -29.33803939819336, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -32.89118194580078, "logits_per_token": -4.889673233032227, "logits_per_char": -1.0477871213640486, "bits_per_byte": 1.5116372839003214, "num_chars": 28}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 418, "native_id": "Mercury_416404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.116525650024414, "logits_per_token_corr": -3.2351695166693792, "logits_per_char_corr": -0.5493684084910266, "bits_per_byte_corr": 0.7925710785516359}, "model_output": [{"sum_logits": -18.755233764648438, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.230892181396484, "logits_per_token": -2.679319109235491, "logits_per_char": -0.5358638218470982, "bits_per_byte": 0.773088078371171, "num_chars": 35}, {"sum_logits": -19.489730834960938, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -35.80731964111328, "logits_per_token": -2.436216354370117, "logits_per_char": -0.4872432708740234, "bits_per_byte": 0.7029434505969765, "num_chars": 40}, {"sum_logits": -24.871992111206055, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -36.69994354248047, "logits_per_token": -2.763554679022895, "logits_per_char": -0.4974398422241211, "bits_per_byte": 0.7176539935178434, "num_chars": 50}, {"sum_logits": -29.116525650024414, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -39.11790466308594, "logits_per_token": -3.2351695166693792, "logits_per_char": -0.5493684084910266, "bits_per_byte": 0.7925710785516359, "num_chars": 53}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 419, "native_id": "Mercury_7103530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.21137809753418, "logits_per_token_corr": -3.1730540139334544, "logits_per_char_corr": -0.5417409292081508, "bits_per_byte_corr": 0.7815669520157396}, "model_output": [{"sum_logits": -22.649921417236328, "num_tokens": 5, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -29.401309967041016, "logits_per_token": -4.5299842834472654, "logits_per_char": -0.8711508237398587, "bits_per_byte": 1.2568049732767326, "num_chars": 26}, {"sum_logits": -22.21137809753418, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -30.11241912841797, "logits_per_token": -3.1730540139334544, "logits_per_char": -0.5417409292081508, "bits_per_byte": 0.7815669520157396, "num_chars": 41}, {"sum_logits": -24.434587478637695, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.57524299621582, "logits_per_token": -3.4906553540910994, "logits_per_char": -0.7404420448072029, "bits_per_byte": 1.0682320661098026, "num_chars": 33}, {"sum_logits": -28.790525436401367, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -36.057273864746094, "logits_per_token": -4.7984209060668945, "logits_per_char": -0.7576454062210886, "bits_per_byte": 1.093051270308254, "num_chars": 38}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 420, "native_id": "Mercury_7030870", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.520287036895752, "logits_per_token_corr": -4.520287036895752, "logits_per_char_corr": -1.130071759223938, "bits_per_byte_corr": 1.6303489228822132}, "model_output": [{"sum_logits": -4.520287036895752, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -10.820269584655762, "logits_per_token": -4.520287036895752, "logits_per_char": -1.130071759223938, "bits_per_byte": 1.6303489228822132, "num_chars": 4}, {"sum_logits": -6.873100757598877, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.902543067932129, "logits_per_token": -6.873100757598877, "logits_per_char": -1.3746201515197753, "bits_per_byte": 1.9831576757050402, "num_chars": 5}, {"sum_logits": -2.454601764678955, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": true, "sum_logits_uncond": -13.13008975982666, "logits_per_token": -2.454601764678955, "logits_per_char": -0.4091002941131592, "bits_per_byte": 0.5902069655436951, "num_chars": 6}, {"sum_logits": -4.3513569831848145, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -13.445572853088379, "logits_per_token": -4.3513569831848145, "logits_per_char": -0.48348410924275714, "bits_per_byte": 0.6975201267536447, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 421, "native_id": "LEAP__7_10348", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.063309669494629, "logits_per_token_corr": -1.1875736063176936, "logits_per_char_corr": -0.28398499281510065, "bits_per_byte_corr": 0.40970374082152794}, "model_output": [{"sum_logits": -6.796823501586914, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -35.799415588378906, "logits_per_token": -0.7552026112874349, "logits_per_char": -0.17427752568171576, "bits_per_byte": 0.2514293220395909, "num_chars": 39}, {"sum_logits": -9.911946296691895, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -34.138099670410156, "logits_per_token": -1.1013273662990994, "logits_per_char": -0.24175478772419254, "bits_per_byte": 0.3487784333611072, "num_chars": 41}, {"sum_logits": -13.063309669494629, "num_tokens": 11, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -44.60009765625, "logits_per_token": -1.1875736063176936, "logits_per_char": -0.28398499281510065, "bits_per_byte": 0.40970374082152794, "num_chars": 46}, {"sum_logits": -13.449281692504883, "num_tokens": 11, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -40.195987701416016, "logits_per_token": -1.2226619720458984, "logits_per_char": -0.28615492962776345, "bits_per_byte": 0.41283429790020126, "num_chars": 47}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 422, "native_id": "Mercury_SC_406835", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.17131042480469, "logits_per_token_corr": -2.6809425354003906, "logits_per_char_corr": -0.5644089548211348, "bits_per_byte_corr": 0.8142700001543592}, "model_output": [{"sum_logits": -23.839677810668945, "num_tokens": 12, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -41.38993835449219, "logits_per_token": -1.9866398175557454, "logits_per_char": -0.3505834972157198, "bits_per_byte": 0.5057850728509921, "num_chars": 68}, {"sum_logits": -32.17131042480469, "num_tokens": 12, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -48.88874435424805, "logits_per_token": -2.6809425354003906, "logits_per_char": -0.5644089548211348, "bits_per_byte": 0.8142700001543592, "num_chars": 57}, {"sum_logits": -24.70282745361328, "num_tokens": 10, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -40.017059326171875, "logits_per_token": -2.4702827453613283, "logits_per_char": -0.5146422386169434, "bits_per_byte": 0.7424718054851922, "num_chars": 48}, {"sum_logits": -17.299257278442383, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -25.647823333740234, "logits_per_token": -2.471322468348912, "logits_per_char": -0.5406017899513245, "bits_per_byte": 0.7799235214590332, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 423, "native_id": "Mercury_178255", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.847631454467773, "logits_per_token_corr": -2.237302621205648, "logits_per_char_corr": -0.5264241461660347, "bits_per_byte_corr": 0.7594695050784908}, "model_output": [{"sum_logits": -19.864879608154297, "num_tokens": 8, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -36.08911895751953, "logits_per_token": -2.483109951019287, "logits_per_char": -0.6207774877548218, "bits_per_byte": 0.895592603080034, "num_chars": 32}, {"sum_logits": -26.847631454467773, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -40.617462158203125, "logits_per_token": -2.237302621205648, "logits_per_char": -0.5264241461660347, "bits_per_byte": 0.7594695050784908, "num_chars": 51}, {"sum_logits": -27.529727935791016, "num_tokens": 10, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -50.62989044189453, "logits_per_token": -2.7529727935791017, "logits_per_char": -0.5857388922508727, "bits_per_byte": 0.8450425951067361, "num_chars": 47}, {"sum_logits": -23.19890594482422, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -35.49485397338867, "logits_per_token": -2.1089914495294746, "logits_per_char": -0.43771520650611734, "bits_per_byte": 0.6314895577485177, "num_chars": 53}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 424, "native_id": "MDSA_2012_8_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.7342472076416, "logits_per_token_corr": -3.1048924582345143, "logits_per_char_corr": -0.47248363494873047, "bits_per_byte_corr": 0.6816497970422145}, "model_output": [{"sum_logits": -21.322481155395508, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.2559928894043, "logits_per_token": -3.5537468592325845, "logits_per_char": -0.4738329145643446, "bits_per_byte": 0.6835963960524349, "num_chars": 45}, {"sum_logits": -21.7342472076416, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -41.09777069091797, "logits_per_token": -3.1048924582345143, "logits_per_char": -0.47248363494873047, "bits_per_byte": 0.6816497970422145, "num_chars": 46}, {"sum_logits": -25.735965728759766, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -45.747188568115234, "logits_per_token": -3.6765665326799666, "logits_per_char": -0.5252237903828524, "bits_per_byte": 0.75773775774279, "num_chars": 49}, {"sum_logits": -26.329675674438477, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -44.6739387512207, "logits_per_token": -3.7613822392054965, "logits_per_char": -0.5063399168161246, "bits_per_byte": 0.730494086995278, "num_chars": 52}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 425, "native_id": "Mercury_409645", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.148600578308105, "logits_per_token_corr": -3.0495335261027017, "logits_per_char_corr": -0.3659440231323242, "bits_per_byte_corr": 0.5279456274163395}, "model_output": [{"sum_logits": -15.353225708007812, "num_tokens": 5, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -30.256582260131836, "logits_per_token": -3.0706451416015623, "logits_per_char": -0.5294215761382004, "bits_per_byte": 0.7637938824347492, "num_chars": 29}, {"sum_logits": -21.167545318603516, "num_tokens": 4, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -23.688703536987305, "logits_per_token": -5.291886329650879, "logits_per_char": -0.8819810549418131, "bits_per_byte": 1.2724296941234843, "num_chars": 24}, {"sum_logits": -15.761054992675781, "num_tokens": 5, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -30.829086303710938, "logits_per_token": -3.152210998535156, "logits_per_char": -0.5628948211669922, "bits_per_byte": 0.812085567040283, "num_chars": 28}, {"sum_logits": -9.148600578308105, "num_tokens": 3, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -25.886520385742188, "logits_per_token": -3.0495335261027017, "logits_per_char": -0.3659440231323242, "bits_per_byte": 0.5279456274163395, "num_chars": 25}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 426, "native_id": "TIMSS_2003_8_pg47", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -63.34294891357422, "logits_per_token_corr": -4.524496350969587, "logits_per_char_corr": -0.9454171479637943, "bits_per_byte_corr": 1.3639486309397333}, "model_output": [{"sum_logits": -40.624568939208984, "num_tokens": 10, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -45.50511932373047, "logits_per_token": -4.062456893920898, "logits_per_char": -0.8463451862335205, "bits_per_byte": 1.2210180030602233, "num_chars": 48}, {"sum_logits": -32.58970642089844, "num_tokens": 8, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -38.93093490600586, "logits_per_token": -4.073713302612305, "logits_per_char": -0.9052696228027344, "bits_per_byte": 1.3060279954858656, "num_chars": 36}, {"sum_logits": -63.34294891357422, "num_tokens": 14, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -70.05836486816406, "logits_per_token": -4.524496350969587, "logits_per_char": -0.9454171479637943, "bits_per_byte": 1.3639486309397333, "num_chars": 67}, {"sum_logits": -36.15556335449219, "num_tokens": 10, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -44.20332336425781, "logits_per_token": -3.615556335449219, "logits_per_char": -0.682180440650796, "bits_per_byte": 0.9841783387190582, "num_chars": 53}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 427, "native_id": "NYSEDREGENTS_2010_8_16", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.003125190734863, "logits_per_token_corr": -2.5015625953674316, "logits_per_char_corr": -0.38485578390268177, "bits_per_byte_corr": 0.5552295308942324}, "model_output": [{"sum_logits": -10.711872100830078, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.3468017578125, "logits_per_token": -5.355936050415039, "logits_per_char": -0.5100891476585752, "bits_per_byte": 0.7359030837388333, "num_chars": 21}, {"sum_logits": -5.003125190734863, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.095794677734375, "logits_per_token": -2.5015625953674316, "logits_per_char": -0.38485578390268177, "bits_per_byte": 0.5552295308942324, "num_chars": 13}, {"sum_logits": -8.622302055358887, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -18.608949661254883, "logits_per_token": -4.311151027679443, "logits_per_char": -0.5071942385505227, "bits_per_byte": 0.7317266127248188, "num_chars": 17}, {"sum_logits": -11.402505874633789, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -19.804136276245117, "logits_per_token": -5.7012529373168945, "logits_per_char": -0.6707356396843406, "bits_per_byte": 0.96766698112078, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 428, "native_id": "Mercury_7159810", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.153324127197266, "logits_per_token_corr": -1.6837026807996962, "logits_per_char_corr": -0.3443937301635742, "bits_per_byte_corr": 0.4968551266205973}, "model_output": [{"sum_logits": -12.98346996307373, "num_tokens": 8, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -24.45978546142578, "logits_per_token": -1.6229337453842163, "logits_per_char": -0.39343848372950696, "bits_per_byte": 0.5676117493718406, "num_chars": 33}, {"sum_logits": -15.153324127197266, "num_tokens": 9, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -30.652790069580078, "logits_per_token": -1.6837026807996962, "logits_per_char": -0.3443937301635742, "bits_per_byte": 0.4968551266205973, "num_chars": 44}, {"sum_logits": -17.88886260986328, "num_tokens": 10, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -40.10052490234375, "logits_per_token": -1.788886260986328, "logits_per_char": -0.41602006069449493, "bits_per_byte": 0.6001900784747046, "num_chars": 43}, {"sum_logits": -29.76551055908203, "num_tokens": 11, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -50.37485885620117, "logits_per_token": -2.7059555053710938, "logits_per_char": -0.5131984579152075, "bits_per_byte": 0.7403888702266651, "num_chars": 58}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 429, "native_id": "Mercury_7267523", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.108090877532959, "logits_per_token_corr": -1.5540454387664795, "logits_per_char_corr": -0.3108090877532959, "bits_per_byte_corr": 0.4484027295652248}, "model_output": [{"sum_logits": -4.315270900726318, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -15.6222562789917, "logits_per_token": -2.157635450363159, "logits_per_char": -0.5394088625907898, "bits_per_byte": 0.7782024910718478, "num_chars": 8}, {"sum_logits": -3.108090877532959, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -15.53705883026123, "logits_per_token": -1.5540454387664795, "logits_per_char": -0.3108090877532959, "bits_per_byte": 0.4484027295652248, "num_chars": 10}, {"sum_logits": -3.0661654472351074, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": true, "sum_logits_uncond": -15.960084915161133, "logits_per_token": -1.5330827236175537, "logits_per_char": -0.2555137872695923, "bits_per_byte": 0.3686284737728632, "num_chars": 12}, {"sum_logits": -3.4719269275665283, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -15.550552368164062, "logits_per_token": -1.7359634637832642, "logits_per_char": -0.2479947805404663, "bits_per_byte": 0.35778084005233457, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 430, "native_id": "Mercury_SC_401006", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.243621826171875, "logits_per_token_corr": -2.6218109130859375, "logits_per_char_corr": -0.8739369710286459, "bits_per_byte_corr": 1.260824534153455}, "model_output": [{"sum_logits": -5.243621826171875, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -16.23455238342285, "logits_per_token": -2.6218109130859375, "logits_per_char": -0.8739369710286459, "bits_per_byte": 1.260824534153455, "num_chars": 6}, {"sum_logits": -9.327512741088867, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -15.53629207611084, "logits_per_token": -4.663756370544434, "logits_per_char": -1.1659390926361084, "bits_per_byte": 1.6820945469258999, "num_chars": 8}, {"sum_logits": -7.680360317230225, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -15.82407283782959, "logits_per_token": -3.8401801586151123, "logits_per_char": -0.9600450396537781, "bits_per_byte": 1.385052217739549, "num_chars": 8}, {"sum_logits": -7.8315229415893555, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -15.160818099975586, "logits_per_token": -3.9157614707946777, "logits_per_char": -0.6024248416607196, "bits_per_byte": 0.8691153315728636, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 431, "native_id": "ACTAAP_2010_7_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.707988739013672, "logits_per_token_corr": -1.8471992492675782, "logits_per_char_corr": -0.3420739350495515, "bits_per_byte_corr": 0.4935083697137159}, "model_output": [{"sum_logits": -22.074996948242188, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -38.040061950683594, "logits_per_token": -3.153570992606027, "logits_per_char": -0.45051014180086096, "bits_per_byte": 0.6499487474467528, "num_chars": 49}, {"sum_logits": -25.811782836914062, "num_tokens": 13, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -48.307098388671875, "logits_per_token": -1.985521756685697, "logits_per_char": -0.43019638061523435, "bits_per_byte": 0.6206421849224255, "num_chars": 60}, {"sum_logits": -29.330028533935547, "num_tokens": 15, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -56.21886444091797, "logits_per_token": -1.9553352355957032, "logits_per_char": -0.3859214280780993, "bits_per_byte": 0.5567669304614606, "num_chars": 76}, {"sum_logits": -27.707988739013672, "num_tokens": 15, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -51.880645751953125, "logits_per_token": -1.8471992492675782, "logits_per_char": -0.3420739350495515, "bits_per_byte": 0.4935083697137159, "num_chars": 81}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 432, "native_id": "MEAP_2005_8_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.27277374267578, "logits_per_token_corr": -3.610396248953683, "logits_per_char_corr": -0.42835209733348784, "bits_per_byte_corr": 0.6179814465778535}, "model_output": [{"sum_logits": -16.341766357421875, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -30.020442962646484, "logits_per_token": -1.8157518174913194, "logits_per_char": -0.3985796672541921, "bits_per_byte": 0.5750289093472092, "num_chars": 41}, {"sum_logits": -19.25921058654785, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -34.2804069519043, "logits_per_token": -2.4074013233184814, "logits_per_char": -0.4186784910119098, "bits_per_byte": 0.6040253827101907, "num_chars": 46}, {"sum_logits": -25.009130477905273, "num_tokens": 11, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -40.629005432128906, "logits_per_token": -2.2735573161732066, "logits_per_char": -0.4465916156768799, "bits_per_byte": 0.6442955092400874, "num_chars": 56}, {"sum_logits": -25.27277374267578, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -39.21099090576172, "logits_per_token": -3.610396248953683, "logits_per_char": -0.42835209733348784, "bits_per_byte": 0.6179814465778535, "num_chars": 59}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 433, "native_id": "Mercury_7164623", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.480638265609741, "logits_per_token_corr": -1.7403191328048706, "logits_per_char_corr": -0.21753989160060883, "bits_per_byte_corr": 0.3138437228079465}, "model_output": [{"sum_logits": -4.797964096069336, "num_tokens": 2, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -18.404743194580078, "logits_per_token": -2.398982048034668, "logits_per_char": -0.39983034133911133, "bits_per_byte": 0.5768332506472919, "num_chars": 12}, {"sum_logits": -2.38395357131958, "num_tokens": 2, "num_tokens_all": 205, "is_greedy": true, "sum_logits_uncond": -16.204452514648438, "logits_per_token": -1.19197678565979, "logits_per_char": -0.158930238087972, "bits_per_byte": 0.22928786633698417, "num_chars": 15}, {"sum_logits": -3.480638265609741, "num_tokens": 2, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -18.752607345581055, "logits_per_token": -1.7403191328048706, "logits_per_char": -0.21753989160060883, "bits_per_byte": 0.3138437228079465, "num_chars": 16}, {"sum_logits": -5.309347629547119, "num_tokens": 2, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -19.082592010498047, "logits_per_token": -2.6546738147735596, "logits_per_char": -0.2949637571970622, "bits_per_byte": 0.4255427497504836, "num_chars": 18}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 434, "native_id": "Mercury_417127", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.394927978515625, "logits_per_token_corr": -2.732487996419271, "logits_per_char_corr": -0.6072195547598379, "bits_per_byte_corr": 0.8760326403834519}, "model_output": [{"sum_logits": -16.394927978515625, "num_tokens": 6, "num_tokens_all": 258, "is_greedy": false, "sum_logits_uncond": -32.02614974975586, "logits_per_token": -2.732487996419271, "logits_per_char": -0.6072195547598379, "bits_per_byte": 0.8760326403834519, "num_chars": 27}, {"sum_logits": -18.620641708374023, "num_tokens": 6, "num_tokens_all": 258, "is_greedy": false, "sum_logits_uncond": -32.439857482910156, "logits_per_token": -3.103440284729004, "logits_per_char": -0.6420910933922077, "bits_per_byte": 0.9263416362365758, "num_chars": 29}, {"sum_logits": -21.856143951416016, "num_tokens": 7, "num_tokens_all": 259, "is_greedy": false, "sum_logits_uncond": -27.578330993652344, "logits_per_token": -3.1223062787737166, "logits_per_char": -0.6244612557547433, "bits_per_byte": 0.9009071569053101, "num_chars": 35}, {"sum_logits": -35.19117736816406, "num_tokens": 7, "num_tokens_all": 259, "is_greedy": false, "sum_logits_uncond": -44.622764587402344, "logits_per_token": -5.027311052594866, "logits_per_char": -0.7997994856400923, "bits_per_byte": 1.1538667516393337, "num_chars": 44}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 435, "native_id": "Mercury_411224", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.1630988121032715, "logits_per_token_corr": -1.5407747030258179, "logits_per_char_corr": -1.0271831353505452, "bits_per_byte_corr": 1.481912015456073}, "model_output": [{"sum_logits": -4.677820205688477, "num_tokens": 4, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -16.939945220947266, "logits_per_token": -1.1694550514221191, "logits_per_char": -0.7796367009480795, "bits_per_byte": 1.1247780021536342, "num_chars": 6}, {"sum_logits": -6.1630988121032715, "num_tokens": 4, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -15.636468887329102, "logits_per_token": -1.5407747030258179, "logits_per_char": -1.0271831353505452, "bits_per_byte": 1.481912015456073, "num_chars": 6}, {"sum_logits": -3.368373394012451, "num_tokens": 4, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -23.34696388244629, "logits_per_token": -0.8420933485031128, "logits_per_char": -0.4210466742515564, "bits_per_byte": 0.6074419489259476, "num_chars": 8}, {"sum_logits": -10.991363525390625, "num_tokens": 4, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -23.46993637084961, "logits_per_token": -2.7478408813476562, "logits_per_char": -1.3739204406738281, "bits_per_byte": 1.982148206337535, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 436, "native_id": "TIMSS_2011_8_pg15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.036901473999023, "logits_per_token_corr": -1.6182231903076172, "logits_per_char_corr": -0.3339190710158575, "bits_per_byte_corr": 0.48174338781317333}, "model_output": [{"sum_logits": -21.37994384765625, "num_tokens": 13, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -36.287635803222656, "logits_per_token": -1.6446110652043269, "logits_per_char": -0.3289222130408654, "bits_per_byte": 0.47453444559262054, "num_chars": 65}, {"sum_logits": -21.69279670715332, "num_tokens": 13, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -36.703941345214844, "logits_per_token": -1.6686766697810247, "logits_per_char": -0.3286787379871715, "bits_per_byte": 0.474183185340076, "num_chars": 66}, {"sum_logits": -20.833145141601562, "num_tokens": 13, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -39.70377731323242, "logits_per_token": -1.6025496262770433, "logits_per_char": -0.33601847002583163, "bits_per_byte": 0.4847721803537124, "num_chars": 62}, {"sum_logits": -21.036901473999023, "num_tokens": 13, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -39.68501663208008, "logits_per_token": -1.6182231903076172, "logits_per_char": -0.3339190710158575, "bits_per_byte": 0.48174338781317333, "num_chars": 63}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 437, "native_id": "NYSEDREGENTS_2012_8_19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.653837203979492, "logits_per_token_corr": -4.826918601989746, "logits_per_char_corr": -0.6435891469319661, "bits_per_byte_corr": 0.928502870649373}, "model_output": [{"sum_logits": -13.45425033569336, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -22.346078872680664, "logits_per_token": -3.36356258392334, "logits_per_char": -0.6727125167846679, "bits_per_byte": 0.9705190119098712, "num_chars": 20}, {"sum_logits": -11.817771911621094, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.490371704101562, "logits_per_token": -5.908885955810547, "logits_per_char": -0.7878514607747396, "bits_per_byte": 1.136629395417659, "num_chars": 15}, {"sum_logits": -9.653837203979492, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.049114227294922, "logits_per_token": -4.826918601989746, "logits_per_char": -0.6435891469319661, "bits_per_byte": 0.928502870649373, "num_chars": 15}, {"sum_logits": -11.202608108520508, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.51401710510254, "logits_per_token": -5.601304054260254, "logits_per_char": -0.6589769475600299, "bits_per_byte": 0.9507027743056846, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 438, "native_id": "Mercury_7222460", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.171083450317383, "logits_per_token_corr": -3.396385431289673, "logits_per_char_corr": -0.5660642385482788, "bits_per_byte_corr": 0.8166580697787758}, "model_output": [{"sum_logits": -27.171083450317383, "num_tokens": 8, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -36.00238800048828, "logits_per_token": -3.396385431289673, "logits_per_char": -0.5660642385482788, "bits_per_byte": 0.8166580697787758, "num_chars": 48}, {"sum_logits": -44.42131805419922, "num_tokens": 8, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -50.627593994140625, "logits_per_token": -5.552664756774902, "logits_per_char": -0.7529036958338851, "bits_per_byte": 1.0862104282472989, "num_chars": 59}, {"sum_logits": -21.478572845458984, "num_tokens": 8, "num_tokens_all": 243, "is_greedy": false, "sum_logits_uncond": -35.15126037597656, "logits_per_token": -2.684821605682373, "logits_per_char": -0.3640436075501523, "bits_per_byte": 0.5252039072803101, "num_chars": 59}, {"sum_logits": -31.03704261779785, "num_tokens": 7, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -43.254302978515625, "logits_per_token": -4.433863231113979, "logits_per_char": -0.574760048477738, "bits_per_byte": 0.8292034716405285, "num_chars": 54}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 439, "native_id": "Mercury_7007420", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.317340850830078, "logits_per_token_corr": -1.616762978690011, "logits_per_char_corr": -0.35366690158843994, "bits_per_byte_corr": 0.5102334850485739}, "model_output": [{"sum_logits": -22.131595611572266, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -40.35071563720703, "logits_per_token": -3.161656515938895, "logits_per_char": -0.6706544124718868, "bits_per_byte": 0.9675497950241876, "num_chars": 33}, {"sum_logits": -8.931632995605469, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.343582153320312, "logits_per_token": -1.2759475708007812, "logits_per_char": -0.2791135311126709, "bits_per_byte": 0.402675707181547, "num_chars": 32}, {"sum_logits": -11.317340850830078, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -42.122039794921875, "logits_per_token": -1.616762978690011, "logits_per_char": -0.35366690158843994, "bits_per_byte": 0.5102334850485739, "num_chars": 32}, {"sum_logits": -19.272136688232422, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -44.82438659667969, "logits_per_token": -2.753162384033203, "logits_per_char": -0.50716149179559, "bits_per_byte": 0.7316793691438721, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 440, "native_id": "Mercury_SC_405710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.06934356689453, "logits_per_token_corr": -3.1527633666992188, "logits_per_char_corr": -0.6305526733398438, "bits_per_byte_corr": 0.9096952148473246}, "model_output": [{"sum_logits": -21.11897850036621, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -31.67006492614746, "logits_per_token": -4.223795700073242, "logits_per_char": -0.8799574375152588, "bits_per_byte": 1.2695102312975357, "num_chars": 24}, {"sum_logits": -30.96472930908203, "num_tokens": 4, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -35.30094909667969, "logits_per_token": -7.741182327270508, "logits_per_char": -1.1058831896100725, "bits_per_byte": 1.595452193454067, "num_chars": 28}, {"sum_logits": -24.740386962890625, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -33.19157409667969, "logits_per_token": -4.1233978271484375, "logits_per_char": -0.7068681989397322, "bits_per_byte": 1.0197952451731975, "num_chars": 35}, {"sum_logits": -22.06934356689453, "num_tokens": 7, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -32.463279724121094, "logits_per_token": -3.1527633666992188, "logits_per_char": -0.6305526733398438, "bits_per_byte": 0.9096952148473246, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 441, "native_id": "Mercury_SC_401375", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.2753705978393555, "logits_per_token_corr": -6.2753705978393555, "logits_per_char_corr": -0.5704882361672141, "bits_per_byte_corr": 0.8230405492045229}, "model_output": [{"sum_logits": -5.656257629394531, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.601149559020996, "logits_per_token": -5.656257629394531, "logits_per_char": -0.4040184020996094, "bits_per_byte": 0.5828753451374084, "num_chars": 14}, {"sum_logits": -3.3350677490234375, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.06580352783203, "logits_per_token": -3.3350677490234375, "logits_per_char": -0.2779223124186198, "bits_per_byte": 0.40095714187902404, "num_chars": 12}, {"sum_logits": -6.2753705978393555, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.479450225830078, "logits_per_token": -6.2753705978393555, "logits_per_char": -0.5704882361672141, "bits_per_byte": 0.8230405492045229, "num_chars": 11}, {"sum_logits": -5.535904407501221, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -17.44771957397461, "logits_per_token": -2.7679522037506104, "logits_per_char": -0.27679522037506105, "bits_per_byte": 0.3993310917771552, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 442, "native_id": "VASoL_2010_3_22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.704185485839844, "logits_per_token_corr": -2.411576165093316, "logits_per_char_corr": -0.5865996077254012, "bits_per_byte_corr": 0.8462843450534556}, "model_output": [{"sum_logits": -21.704185485839844, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -47.44579315185547, "logits_per_token": -2.411576165093316, "logits_per_char": -0.5865996077254012, "bits_per_byte": 0.8462843450534556, "num_chars": 37}, {"sum_logits": -25.83454704284668, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -44.31389236450195, "logits_per_token": -3.229318380355835, "logits_per_char": -0.7176263067457411, "bits_per_byte": 1.0353159139542867, "num_chars": 36}, {"sum_logits": -25.063007354736328, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -41.09925842285156, "logits_per_token": -2.784778594970703, "logits_per_char": -0.6961946487426758, "bits_per_byte": 1.0043965672352138, "num_chars": 36}, {"sum_logits": -30.027873992919922, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -42.70168685913086, "logits_per_token": -3.336430443657769, "logits_per_char": -0.8115641619708087, "bits_per_byte": 1.1708395918393344, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 443, "native_id": "Mercury_SC_408358", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.962993621826172, "logits_per_token_corr": -2.542090329256925, "logits_per_char_corr": -0.4301999018742488, "bits_per_byte_corr": 0.6206472650253433}, "model_output": [{"sum_logits": -27.962993621826172, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -44.4559440612793, "logits_per_token": -2.542090329256925, "logits_per_char": -0.4301999018742488, "bits_per_byte": 0.6206472650253433, "num_chars": 65}, {"sum_logits": -20.95318603515625, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -35.98044204711914, "logits_per_token": -1.9048350941051135, "logits_per_char": -0.3675997550027412, "bits_per_byte": 0.5303343435748337, "num_chars": 57}, {"sum_logits": -18.01458168029785, "num_tokens": 13, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -34.15358352661133, "logits_per_token": -1.385737052330604, "logits_per_char": -0.28147783875465393, "bits_per_byte": 0.40608668209177423, "num_chars": 64}, {"sum_logits": -18.749046325683594, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -39.55699157714844, "logits_per_token": -1.8749046325683594, "logits_per_char": -0.3024039729948967, "bits_per_byte": 0.4362767121851709, "num_chars": 62}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 444, "native_id": "NYSEDREGENTS_2013_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.80082893371582, "logits_per_token_corr": -2.40041446685791, "logits_per_char_corr": -0.4000690778096517, "bits_per_byte_corr": 0.5771776745694199}, "model_output": [{"sum_logits": -6.243849754333496, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -21.573352813720703, "logits_per_token": -3.121924877166748, "logits_per_char": -0.5203208128611246, "bits_per_byte": 0.7506642563865982, "num_chars": 12}, {"sum_logits": -4.80082893371582, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -19.818283081054688, "logits_per_token": -2.40041446685791, "logits_per_char": -0.4000690778096517, "bits_per_byte": 0.5771776745694199, "num_chars": 12}, {"sum_logits": -7.255677223205566, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -23.88519287109375, "logits_per_token": -1.4511354446411133, "logits_per_char": -0.31546422709589417, "bits_per_byte": 0.4551186760094432, "num_chars": 23}, {"sum_logits": -5.27846622467041, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -17.279781341552734, "logits_per_token": -2.639233112335205, "logits_per_char": -0.43987218538920086, "bits_per_byte": 0.6346014204864467, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 445, "native_id": "Mercury_SC_400661", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.983745574951172, "logits_per_token_corr": -2.4530677795410156, "logits_per_char_corr": -0.5996387905544704, "bits_per_byte_corr": 0.8650959094582118}, "model_output": [{"sum_logits": -17.979995727539062, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -23.609188079833984, "logits_per_token": -4.494998931884766, "logits_per_char": -1.284285409109933, "bits_per_byte": 1.852832190810285, "num_chars": 14}, {"sum_logits": -17.246301651000977, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -27.559755325317383, "logits_per_token": -2.8743836085001626, "logits_per_char": -0.7839228023182262, "bits_per_byte": 1.1309615393450967, "num_chars": 22}, {"sum_logits": -23.685321807861328, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -35.08103561401367, "logits_per_token": -2.960665225982666, "logits_per_char": -0.6966271119959214, "bits_per_byte": 1.0050204798260385, "num_chars": 34}, {"sum_logits": -26.983745574951172, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -44.2401237487793, "logits_per_token": -2.4530677795410156, "logits_per_char": -0.5996387905544704, "bits_per_byte": 0.8650959094582118, "num_chars": 45}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 446, "native_id": "Mercury_SC_415422", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.373641967773438, "logits_per_token_corr": -2.2157856334339487, "logits_per_char_corr": -0.48747283935546876, "bits_per_byte_corr": 0.7032746479067024}, "model_output": [{"sum_logits": -9.6456937789917, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -21.58440589904785, "logits_per_token": -1.60761562983195, "logits_per_char": -0.4384406263178045, "bits_per_byte": 0.6325361173134021, "num_chars": 22}, {"sum_logits": -24.898466110229492, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.589210510253906, "logits_per_token": -2.7664962344699435, "logits_per_char": -0.5412710023962933, "bits_per_byte": 0.7808889909346917, "num_chars": 46}, {"sum_logits": -24.373641967773438, "num_tokens": 11, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -32.74468231201172, "logits_per_token": -2.2157856334339487, "logits_per_char": -0.48747283935546876, "bits_per_byte": 0.7032746479067024, "num_chars": 50}, {"sum_logits": -27.199546813964844, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -42.54188537597656, "logits_per_token": -3.022171868218316, "logits_per_char": -0.618171518499201, "bits_per_byte": 0.8918329841582382, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 447, "native_id": "Mercury_SC_400162", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.518415451049805, "logits_per_token_corr": -1.3148019313812256, "logits_per_char_corr": -0.2768004066065738, "bits_per_byte_corr": 0.39933857392763955}, "model_output": [{"sum_logits": -18.644634246826172, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -26.187950134277344, "logits_per_token": -4.661158561706543, "logits_per_char": -1.096743190989775, "bits_per_byte": 1.5822659627708222, "num_chars": 17}, {"sum_logits": -16.229248046875, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -31.364715576171875, "logits_per_token": -2.7048746744791665, "logits_per_char": -0.6010832609953703, "bits_per_byte": 0.8671798398000103, "num_chars": 27}, {"sum_logits": -8.335002899169922, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -28.203603744506836, "logits_per_token": -2.0837507247924805, "logits_per_char": -0.3472917874654134, "bits_per_byte": 0.5010361395181757, "num_chars": 24}, {"sum_logits": -10.518415451049805, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -30.51124382019043, "logits_per_token": -1.3148019313812256, "logits_per_char": -0.2768004066065738, "bits_per_byte": 0.39933857392763955, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 448, "native_id": "Mercury_7212328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.610427856445312, "logits_per_token_corr": -3.076303482055664, "logits_per_char_corr": -0.5723355315452399, "bits_per_byte_corr": 0.8257056330854596}, "model_output": [{"sum_logits": -33.943050384521484, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -38.10145950317383, "logits_per_token": -4.2428812980651855, "logits_per_char": -0.8932381680137232, "bits_per_byte": 1.2886702753270671, "num_chars": 38}, {"sum_logits": -24.610427856445312, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -42.91731262207031, "logits_per_token": -3.076303482055664, "logits_per_char": -0.5723355315452399, "bits_per_byte": 0.8257056330854596, "num_chars": 43}, {"sum_logits": -28.691965103149414, "num_tokens": 8, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -40.50265884399414, "logits_per_token": -3.5864956378936768, "logits_per_char": -0.6104673426202003, "bits_per_byte": 0.8807182078234594, "num_chars": 47}, {"sum_logits": -19.716339111328125, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -25.408533096313477, "logits_per_token": -4.929084777832031, "logits_per_char": -0.8961972323330966, "bits_per_byte": 1.2929393027463014, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 449, "native_id": "NCEOGA_2013_8_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -45.173484802246094, "logits_per_token_corr": -4.106680436567827, "logits_per_char_corr": -0.6742311164514342, "bits_per_byte_corr": 0.9727098881182121}, "model_output": [{"sum_logits": -29.617450714111328, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -44.2284049987793, "logits_per_token": -3.290827857123481, "logits_per_char": -0.6044377696757414, "bits_per_byte": 0.8720193728378041, "num_chars": 49}, {"sum_logits": -21.836143493652344, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -39.28396224975586, "logits_per_token": -3.1194490705217635, "logits_per_char": -0.4043730276602286, "bits_per_byte": 0.5833869616750865, "num_chars": 54}, {"sum_logits": -27.721851348876953, "num_tokens": 9, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -41.27787780761719, "logits_per_token": -3.0802057054307728, "logits_per_char": -0.5898266244441905, "bits_per_byte": 0.850939946070522, "num_chars": 47}, {"sum_logits": -45.173484802246094, "num_tokens": 11, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -62.668941497802734, "logits_per_token": -4.106680436567827, "logits_per_char": -0.6742311164514342, "bits_per_byte": 0.9727098881182121, "num_chars": 67}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 450, "native_id": "Mercury_SC_407696", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.331329345703125, "logits_per_token_corr": -3.1479254828559027, "logits_per_char_corr": -0.6588681243186774, "bits_per_byte_corr": 0.9505457755550518}, "model_output": [{"sum_logits": -20.969905853271484, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -41.55122375488281, "logits_per_token": -2.9957008361816406, "logits_per_char": -0.5376898936736279, "bits_per_byte": 0.7757225431396142, "num_chars": 39}, {"sum_logits": -28.331329345703125, "num_tokens": 9, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -48.26082229614258, "logits_per_token": -3.1479254828559027, "logits_per_char": -0.6588681243186774, "bits_per_byte": 0.9505457755550518, "num_chars": 43}, {"sum_logits": -19.674856185913086, "num_tokens": 9, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -30.400318145751953, "logits_per_token": -2.1860951317681208, "logits_per_char": -0.4471558224071156, "bits_per_byte": 0.6451094874918352, "num_chars": 44}, {"sum_logits": -31.15593719482422, "num_tokens": 10, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -45.28611373901367, "logits_per_token": -3.1155937194824217, "logits_per_char": -0.5878478716004569, "bits_per_byte": 0.8480852091557206, "num_chars": 53}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 451, "native_id": "Mercury_SC_400052", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.975281715393066, "logits_per_token_corr": -3.2438204288482666, "logits_per_char_corr": -0.8650187810262044, "bits_per_byte_corr": 1.247958305663218}, "model_output": [{"sum_logits": -10.560880661010742, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -19.691492080688477, "logits_per_token": -5.280440330505371, "logits_per_char": -0.9600800600918856, "bits_per_byte": 1.3851027415519366, "num_chars": 11}, {"sum_logits": -12.975281715393066, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -24.54510498046875, "logits_per_token": -3.2438204288482666, "logits_per_char": -0.8650187810262044, "bits_per_byte": 1.247958305663218, "num_chars": 15}, {"sum_logits": -17.00067710876465, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -26.30377197265625, "logits_per_token": -4.250169277191162, "logits_per_char": -0.8947724794086657, "bits_per_byte": 1.2908838187677314, "num_chars": 19}, {"sum_logits": -12.983612060546875, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -22.868553161621094, "logits_per_token": -4.327870686848958, "logits_per_char": -0.7637418859145221, "bits_per_byte": 1.101846631328857, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 452, "native_id": "Mercury_7212870", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.85378646850586, "logits_per_token_corr": -5.42689323425293, "logits_per_char_corr": -0.9867078607732599, "bits_per_byte_corr": 1.4235185375447625}, "model_output": [{"sum_logits": -9.207433700561523, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -18.732772827148438, "logits_per_token": -4.603716850280762, "logits_per_char": -0.8370394273237749, "bits_per_byte": 1.2075926308294156, "num_chars": 11}, {"sum_logits": -9.817428588867188, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -18.58572769165039, "logits_per_token": -4.908714294433594, "logits_per_char": -0.9817428588867188, "bits_per_byte": 1.41635555394504, "num_chars": 10}, {"sum_logits": -8.895191192626953, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.995222091674805, "logits_per_token": -4.447595596313477, "logits_per_char": -0.8895191192626953, "bits_per_byte": 1.283304822137131, "num_chars": 10}, {"sum_logits": -10.85378646850586, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -16.840335845947266, "logits_per_token": -5.42689323425293, "logits_per_char": -0.9867078607732599, "bits_per_byte": 1.4235185375447625, "num_chars": 11}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 453, "native_id": "NYSEDREGENTS_2010_8_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.35728645324707, "logits_per_token_corr": -4.178643226623535, "logits_per_char_corr": -0.6428681887113131, "bits_per_byte_corr": 0.927462747799748}, "model_output": [{"sum_logits": -8.539417266845703, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -23.15180778503418, "logits_per_token": -1.4232362111409504, "logits_per_char": -0.22472150702225535, "bits_per_byte": 0.32420460376233506, "num_chars": 38}, {"sum_logits": -16.060930252075195, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -31.612321853637695, "logits_per_token": -3.2121860504150392, "logits_per_char": -0.5948492685953776, "bits_per_byte": 0.8581860898795948, "num_chars": 27}, {"sum_logits": -8.35728645324707, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.183937072753906, "logits_per_token": -4.178643226623535, "logits_per_char": -0.6428681887113131, "bits_per_byte": 0.927462747799748, "num_chars": 13}, {"sum_logits": -7.432377815246582, "num_tokens": 3, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.718324661254883, "logits_per_token": -2.477459271748861, "logits_per_char": -0.4954918543497721, "bits_per_byte": 0.7148436410718063, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 454, "native_id": "MCAS_2010_8_12005", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.07720184326172, "logits_per_token_corr": -2.1384801228841144, "logits_per_char_corr": -0.5012062788009644, "bits_per_byte_corr": 0.723087812889082}, "model_output": [{"sum_logits": -32.07720184326172, "num_tokens": 15, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -50.98241424560547, "logits_per_token": -2.1384801228841144, "logits_per_char": -0.5012062788009644, "bits_per_byte": 0.723087812889082, "num_chars": 64}, {"sum_logits": -36.12792205810547, "num_tokens": 12, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -46.17616653442383, "logits_per_token": -3.010660171508789, "logits_per_char": -0.5235930733058763, "bits_per_byte": 0.755385130302742, "num_chars": 69}, {"sum_logits": -53.032344818115234, "num_tokens": 14, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -60.9222412109375, "logits_per_token": -3.7880246298653737, "logits_per_char": -0.7264704769604826, "bits_per_byte": 1.0480753544638812, "num_chars": 73}, {"sum_logits": -44.7435302734375, "num_tokens": 12, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -54.73609161376953, "logits_per_token": -3.7286275227864585, "logits_per_char": -0.6046423009923987, "bits_per_byte": 0.8723144491540522, "num_chars": 74}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 455, "native_id": "Mercury_7218505", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.96929168701172, "logits_per_token_corr": -2.1790265170010654, "logits_per_char_corr": -0.40625918113579185, "bits_per_byte_corr": 0.5861081059406391}, "model_output": [{"sum_logits": -43.31951904296875, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -57.75395965576172, "logits_per_token": -3.938138094815341, "logits_per_char": -0.8173494159050707, "bits_per_byte": 1.1791859490005836, "num_chars": 53}, {"sum_logits": -23.96929168701172, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -44.405635833740234, "logits_per_token": -2.1790265170010654, "logits_per_char": -0.40625918113579185, "bits_per_byte": 0.5861081059406391, "num_chars": 59}, {"sum_logits": -46.86975860595703, "num_tokens": 10, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -48.28221130371094, "logits_per_token": -4.6869758605957035, "logits_per_char": -0.7811626434326172, "bits_per_byte": 1.12697947180876, "num_chars": 60}, {"sum_logits": -35.84944152832031, "num_tokens": 10, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -53.37248992919922, "logits_per_token": -3.584944152832031, "logits_per_char": -0.5515298696664663, "bits_per_byte": 0.795689407870519, "num_chars": 65}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 456, "native_id": "Mercury_SC_400853", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.022117614746094, "logits_per_token_corr": -2.5745882306780135, "logits_per_char_corr": -0.42909803844633554, "bits_per_byte_corr": 0.6190576121221548}, "model_output": [{"sum_logits": -10.67125129699707, "num_tokens": 4, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -27.978864669799805, "logits_per_token": -2.6678128242492676, "logits_per_char": -0.561644805105109, "bits_per_byte": 0.8102821750667711, "num_chars": 19}, {"sum_logits": -18.53736114501953, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -28.776111602783203, "logits_per_token": -3.0895601908365884, "logits_per_char": -0.6865689312970197, "bits_per_byte": 0.9905095924113574, "num_chars": 27}, {"sum_logits": -25.94527816772461, "num_tokens": 8, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -44.066375732421875, "logits_per_token": -3.243159770965576, "logits_per_char": -0.8946647644042969, "bits_per_byte": 1.2907284188650991, "num_chars": 29}, {"sum_logits": -18.022117614746094, "num_tokens": 7, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -39.844093322753906, "logits_per_token": -2.5745882306780135, "logits_per_char": -0.42909803844633554, "bits_per_byte": 0.6190576121221548, "num_chars": 42}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 457, "native_id": "Mercury_7210455", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.985485076904297, "logits_per_token_corr": -2.453225916082209, "logits_per_char_corr": -0.5291271583706725, "bits_per_byte_corr": 0.7633691273815868}, "model_output": [{"sum_logits": -28.935894012451172, "num_tokens": 10, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.26142120361328, "logits_per_token": -2.893589401245117, "logits_per_char": -0.5673704708323759, "bits_per_byte": 0.818542564617293, "num_chars": 51}, {"sum_logits": -24.702220916748047, "num_tokens": 10, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.17760467529297, "logits_per_token": -2.4702220916748048, "logits_per_char": -0.5255791684414478, "bits_per_byte": 0.7582504599055667, "num_chars": 47}, {"sum_logits": -26.985485076904297, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.97937774658203, "logits_per_token": -2.453225916082209, "logits_per_char": -0.5291271583706725, "bits_per_byte": 0.7633691273815868, "num_chars": 51}, {"sum_logits": -27.995899200439453, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -34.191707611083984, "logits_per_token": -3.1106554667154946, "logits_per_char": -0.6221310933430989, "bits_per_byte": 0.8975454431495624, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 458, "native_id": "Mercury_7174738", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.13108253479004, "logits_per_token_corr": -1.4087388356526693, "logits_per_char_corr": -0.32016791719378845, "bits_per_byte_corr": 0.4619046663875587}, "model_output": [{"sum_logits": -31.553485870361328, "num_tokens": 13, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -61.704017639160156, "logits_per_token": -2.427191220797025, "logits_per_char": -0.5535699275501987, "bits_per_byte": 0.7986325892625082, "num_chars": 57}, {"sum_logits": -31.77719497680664, "num_tokens": 13, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -60.67205810546875, "logits_per_token": -2.4443996136005106, "logits_per_char": -0.5209376225706006, "bits_per_byte": 0.751554124695632, "num_chars": 61}, {"sum_logits": -21.13108253479004, "num_tokens": 15, "num_tokens_all": 254, "is_greedy": false, "sum_logits_uncond": -47.305355072021484, "logits_per_token": -1.4087388356526693, "logits_per_char": -0.32016791719378845, "bits_per_byte": 0.4619046663875587, "num_chars": 66}, {"sum_logits": -32.29789352416992, "num_tokens": 15, "num_tokens_all": 254, "is_greedy": false, "sum_logits_uncond": -61.448726654052734, "logits_per_token": -2.153192901611328, "logits_per_char": -0.48936202309348364, "bits_per_byte": 0.7060001639168665, "num_chars": 66}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 459, "native_id": "MCAS_2001_5_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.35601806640625, "logits_per_token_corr": -2.336574009486607, "logits_per_char_corr": -0.39892726991234756, "bits_per_byte_corr": 0.5755303939783303}, "model_output": [{"sum_logits": -13.909791946411133, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.098119735717773, "logits_per_token": -1.9871131352015905, "logits_per_char": -0.479647998152108, "bits_per_byte": 0.6919857883068621, "num_chars": 29}, {"sum_logits": -8.75705623626709, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -28.930957794189453, "logits_per_token": -1.2510080337524414, "logits_per_char": -0.28248568504087385, "bits_per_byte": 0.40754069693088313, "num_chars": 31}, {"sum_logits": -14.313087463378906, "num_tokens": 6, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -31.106781005859375, "logits_per_token": -2.385514577229818, "logits_per_char": -0.47710291544596356, "bits_per_byte": 0.6883140101080526, "num_chars": 30}, {"sum_logits": -16.35601806640625, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -37.14151382446289, "logits_per_token": -2.336574009486607, "logits_per_char": -0.39892726991234756, "bits_per_byte": 0.5755303939783303, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 460, "native_id": "NYSEDREGENTS_2012_4_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.549279689788818, "logits_per_token_corr": -3.774639844894409, "logits_per_char_corr": -0.5392342635563442, "bits_per_byte_corr": 0.777950597910709}, "model_output": [{"sum_logits": -7.549279689788818, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -21.36368179321289, "logits_per_token": -3.774639844894409, "logits_per_char": -0.5392342635563442, "bits_per_byte": 0.777950597910709, "num_chars": 14}, {"sum_logits": -14.704568862915039, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -19.89478874206543, "logits_per_token": -7.3522844314575195, "logits_per_char": -0.9803045908610026, "bits_per_byte": 1.4142805717968687, "num_chars": 15}, {"sum_logits": -7.596608638763428, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -17.915904998779297, "logits_per_token": -2.532202879587809, "logits_per_char": -0.44685933169196634, "bits_per_byte": 0.6446817418074194, "num_chars": 17}, {"sum_logits": -8.908907890319824, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -21.324573516845703, "logits_per_token": -2.9696359634399414, "logits_per_char": -0.556806743144989, "bits_per_byte": 0.8033023270693876, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 461, "native_id": "Mercury_416593", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.502779006958008, "logits_per_token_corr": -4.502779006958008, "logits_per_char_corr": -0.3752315839131673, "bits_per_byte_corr": 0.5413447452968263}, "model_output": [{"sum_logits": -5.508037090301514, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -16.579147338867188, "logits_per_token": -2.754018545150757, "logits_per_char": -0.6120041211446127, "bits_per_byte": 0.8829353105795754, "num_chars": 9}, {"sum_logits": -4.502779006958008, "num_tokens": 1, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -16.723670959472656, "logits_per_token": -4.502779006958008, "logits_per_char": -0.3752315839131673, "bits_per_byte": 0.5413447452968263, "num_chars": 12}, {"sum_logits": -5.274410724639893, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -16.427722930908203, "logits_per_token": -2.6372053623199463, "logits_per_char": -0.4057239018953763, "bits_per_byte": 0.5853358612350003, "num_chars": 13}, {"sum_logits": -5.810868263244629, "num_tokens": 3, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -16.995010375976562, "logits_per_token": -1.9369560877482097, "logits_per_char": -0.3631792664527893, "bits_per_byte": 0.5239569266655071, "num_chars": 16}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 462, "native_id": "Mercury_7205870", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.55121421813965, "logits_per_token_corr": -4.710242843627929, "logits_per_char_corr": -0.7597165876819242, "bits_per_byte_corr": 1.0960393535305848}, "model_output": [{"sum_logits": -23.00247573852539, "num_tokens": 5, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -37.338226318359375, "logits_per_token": -4.600495147705078, "logits_per_char": -0.6572135925292969, "bits_per_byte": 0.9481587907475177, "num_chars": 35}, {"sum_logits": -23.55121421813965, "num_tokens": 5, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -35.072898864746094, "logits_per_token": -4.710242843627929, "logits_per_char": -0.7597165876819242, "bits_per_byte": 1.0960393535305848, "num_chars": 31}, {"sum_logits": -23.378690719604492, "num_tokens": 8, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -36.99675369262695, "logits_per_token": -2.9223363399505615, "logits_per_char": -0.6152287031474867, "bits_per_byte": 0.8875873990440649, "num_chars": 38}, {"sum_logits": -33.565834045410156, "num_tokens": 8, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -36.85535430908203, "logits_per_token": -4.1957292556762695, "logits_per_char": -0.799186524890718, "bits_per_byte": 1.1529824362059513, "num_chars": 42}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 463, "native_id": "Mercury_SC_401798", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.693785667419434, "logits_per_token_corr": -1.4617232084274292, "logits_per_char_corr": -0.2657678560777144, "bits_per_byte_corr": 0.3834219679912858}, "model_output": [{"sum_logits": -23.607601165771484, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -35.247825622558594, "logits_per_token": -3.9346001942952475, "logits_per_char": -0.7869200388590495, "bits_per_byte": 1.1352856376389167, "num_chars": 30}, {"sum_logits": -19.30681800842285, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.142553329467773, "logits_per_token": -3.2178030014038086, "logits_per_char": -0.5218058921195365, "bits_per_byte": 0.7528067728680377, "num_chars": 37}, {"sum_logits": -11.693785667419434, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.575408935546875, "logits_per_token": -1.4617232084274292, "logits_per_char": -0.2657678560777144, "bits_per_byte": 0.3834219679912858, "num_chars": 44}, {"sum_logits": -16.540708541870117, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -39.545814514160156, "logits_per_token": -1.8378565046522353, "logits_per_char": -0.33081417083740233, "bits_per_byte": 0.4772639637232576, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 464, "native_id": "Mercury_7084228", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.709802627563477, "logits_per_token_corr": -3.927450656890869, "logits_per_char_corr": -0.6830348968505859, "bits_per_byte_corr": 0.985411058441153}, "model_output": [{"sum_logits": -11.650006294250488, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.29806900024414, "logits_per_token": -3.8833354314168296, "logits_per_char": -0.7766670862833659, "bits_per_byte": 1.1204937538034976, "num_chars": 15}, {"sum_logits": -7.385406970977783, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.09771156311035, "logits_per_token": -1.8463517427444458, "logits_per_char": -0.3357003168626265, "bits_per_byte": 0.4843131823629129, "num_chars": 22}, {"sum_logits": -13.126519203186035, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.73326873779297, "logits_per_token": -3.281629800796509, "logits_per_char": -0.6908694317466334, "bits_per_byte": 0.9967139030833603, "num_chars": 19}, {"sum_logits": -15.709802627563477, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -30.976905822753906, "logits_per_token": -3.927450656890869, "logits_per_char": -0.6830348968505859, "bits_per_byte": 0.985411058441153, "num_chars": 23}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 465, "native_id": "Mercury_417460", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.12613868713379, "logits_per_token_corr": -4.021023114522298, "logits_per_char_corr": -1.3403410381740994, "bits_per_byte_corr": 1.9337033688751273}, "model_output": [{"sum_logits": -24.12613868713379, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.149417877197266, "logits_per_token": -4.021023114522298, "logits_per_char": -1.3403410381740994, "bits_per_byte": 1.9337033688751273, "num_chars": 18}, {"sum_logits": -10.35468578338623, "num_tokens": 4, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -24.424379348754883, "logits_per_token": -2.5886714458465576, "logits_per_char": -0.5177342891693115, "bits_per_byte": 0.7469326914832749, "num_chars": 20}, {"sum_logits": -15.349248886108398, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -26.43600845336914, "logits_per_token": -2.558208147684733, "logits_per_char": -0.6673586472221043, "bits_per_byte": 0.9627950108423887, "num_chars": 23}, {"sum_logits": -20.870361328125, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -34.922080993652344, "logits_per_token": -2.3189290364583335, "logits_per_char": -0.7729763454861112, "bits_per_byte": 1.115169140358088, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 466, "native_id": "Mercury_402539", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.824569702148438, "logits_per_token_corr": -2.438597245649858, "logits_per_char_corr": -0.4326543500346522, "bits_per_byte_corr": 0.6241882852144789}, "model_output": [{"sum_logits": -29.353395462036133, "num_tokens": 11, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -49.33930969238281, "logits_per_token": -2.6684904965487393, "logits_per_char": -0.4812032042956743, "bits_per_byte": 0.6942294764977468, "num_chars": 61}, {"sum_logits": -26.824569702148438, "num_tokens": 11, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -44.781585693359375, "logits_per_token": -2.438597245649858, "logits_per_char": -0.4326543500346522, "bits_per_byte": 0.6241882852144789, "num_chars": 62}, {"sum_logits": -27.123371124267578, "num_tokens": 12, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -42.62843322753906, "logits_per_token": -2.2602809270222983, "logits_per_char": -0.4109601685495088, "bits_per_byte": 0.5928901971696948, "num_chars": 66}, {"sum_logits": -32.00370788574219, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -48.512298583984375, "logits_per_token": -3.555967542860243, "logits_per_char": -0.5714947836739677, "bits_per_byte": 0.8244926903009364, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 467, "native_id": "Mercury_406800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.85500717163086, "logits_per_token_corr": -2.8364295959472656, "logits_per_char_corr": -0.5366218154494827, "bits_per_byte_corr": 0.7741816319823575}, "model_output": [{"sum_logits": -22.498872756958008, "num_tokens": 8, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -40.10090255737305, "logits_per_token": -2.812359094619751, "logits_per_char": -0.4786994203608087, "bits_per_byte": 0.6906172798314562, "num_chars": 47}, {"sum_logits": -19.85500717163086, "num_tokens": 7, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -35.74235153198242, "logits_per_token": -2.8364295959472656, "logits_per_char": -0.5366218154494827, "bits_per_byte": 0.7741816319823575, "num_chars": 37}, {"sum_logits": -21.438079833984375, "num_tokens": 7, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -39.49786376953125, "logits_per_token": -3.0625828334263394, "logits_per_char": -0.5228799959508384, "bits_per_byte": 0.7543563771388578, "num_chars": 41}, {"sum_logits": -24.710155487060547, "num_tokens": 5, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -38.298500061035156, "logits_per_token": -4.9420310974121096, "logits_per_char": -0.8236718495686849, "bits_per_byte": 1.1883072926934357, "num_chars": 30}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 468, "native_id": "Mercury_SC_408321", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.568464279174805, "logits_per_token_corr": -1.938352039882115, "logits_per_char_corr": -0.3769017855326335, "bits_per_byte_corr": 0.5437543368905167}, "model_output": [{"sum_logits": -13.568464279174805, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -22.65264892578125, "logits_per_token": -1.938352039882115, "logits_per_char": -0.3769017855326335, "bits_per_byte": 0.5437543368905167, "num_chars": 36}, {"sum_logits": -15.451874732971191, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -25.32439422607422, "logits_per_token": -2.5753124554951987, "logits_per_char": -0.532823266654179, "bits_per_byte": 0.7687014844727941, "num_chars": 29}, {"sum_logits": -15.848844528198242, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -23.19062042236328, "logits_per_token": -2.6414740880330405, "logits_per_char": -0.5282948176066081, "bits_per_byte": 0.7621683134889405, "num_chars": 30}, {"sum_logits": -15.235396385192871, "num_tokens": 5, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -23.094554901123047, "logits_per_token": -3.0470792770385744, "logits_per_char": -0.5859767840458796, "bits_per_byte": 0.8453858004196609, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 469, "native_id": "Mercury_SC_406836", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.680540084838867, "logits_per_token_corr": -4.5257914406912665, "logits_per_char_corr": -0.7040120018853082, "bits_per_byte_corr": 1.0156746238469754}, "model_output": [{"sum_logits": -32.06562042236328, "num_tokens": 11, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -46.83735656738281, "logits_per_token": -2.9150564020330254, "logits_per_char": -0.7287641005082564, "bits_per_byte": 1.051384353781923, "num_chars": 44}, {"sum_logits": -31.714693069458008, "num_tokens": 11, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -49.353633880615234, "logits_per_token": -2.8831539154052734, "logits_per_char": -0.7375510016153025, "bits_per_byte": 1.0640611724338493, "num_chars": 43}, {"sum_logits": -31.680540084838867, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -47.40422821044922, "logits_per_token": -4.5257914406912665, "logits_per_char": -0.7040120018853082, "bits_per_byte": 1.0156746238469754, "num_chars": 45}, {"sum_logits": -31.356464385986328, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -41.687068939208984, "logits_per_token": -3.135646438598633, "logits_per_char": -0.6271292877197265, "bits_per_byte": 0.9047563133901274, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 470, "native_id": "Mercury_SC_410963", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.729511260986328, "logits_per_token_corr": -1.5729511260986329, "logits_per_char_corr": -0.3745121728806269, "bits_per_byte_corr": 0.5403068545678187}, "model_output": [{"sum_logits": -10.998703002929688, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -28.306604385375977, "logits_per_token": -1.5712432861328125, "logits_per_char": -0.4073593704788773, "bits_per_byte": 0.5876953436499486, "num_chars": 27}, {"sum_logits": -12.889002799987793, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -25.61538314819336, "logits_per_token": -1.8412861142839705, "logits_per_char": -0.42963342666625975, "bits_per_byte": 0.6198300140519903, "num_chars": 30}, {"sum_logits": -21.75007438659668, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -33.946617126464844, "logits_per_token": -2.175007438659668, "logits_per_char": -0.5304896191852849, "bits_per_byte": 0.7653347428422351, "num_chars": 41}, {"sum_logits": -15.729511260986328, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -35.1549072265625, "logits_per_token": -1.5729511260986329, "logits_per_char": -0.3745121728806269, "bits_per_byte": 0.5403068545678187, "num_chars": 42}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 471, "native_id": "Mercury_7132405", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.3275203704834, "logits_per_token_corr": -2.887920061747233, "logits_per_char_corr": -0.5250763748631333, "bits_per_byte_corr": 0.757525082103541}, "model_output": [{"sum_logits": -15.892633438110352, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.92206573486328, "logits_per_token": -2.6487722396850586, "logits_per_char": -0.567594051361084, "bits_per_byte": 0.8188651231372998, "num_chars": 28}, {"sum_logits": -24.384889602661133, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -34.69881057739258, "logits_per_token": -4.0641482671101885, "logits_per_char": -0.9378803693331205, "bits_per_byte": 1.3530753577849743, "num_chars": 26}, {"sum_logits": -16.22007179260254, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.198822021484375, "logits_per_token": -2.0275089740753174, "logits_per_char": -0.4268439945421721, "bits_per_byte": 0.6158057141596699, "num_chars": 38}, {"sum_logits": -17.3275203704834, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -31.753013610839844, "logits_per_token": -2.887920061747233, "logits_per_char": -0.5250763748631333, "bits_per_byte": 0.757525082103541, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 472, "native_id": "Mercury_SC_408872", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.210742950439453, "logits_per_token_corr": -2.0301061357770647, "logits_per_char_corr": -0.5465670365553635, "bits_per_byte_corr": 0.7885295531523663}, "model_output": [{"sum_logits": -16.798416137695312, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -27.281686782836914, "logits_per_token": -2.7997360229492188, "logits_per_char": -0.730365919030231, "bits_per_byte": 1.0536952894199814, "num_chars": 23}, {"sum_logits": -18.647985458374023, "num_tokens": 5, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -27.666337966918945, "logits_per_token": -3.7295970916748047, "logits_per_char": -0.7459194183349609, "bits_per_byte": 1.0761342457354013, "num_chars": 25}, {"sum_logits": -14.210742950439453, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -25.592761993408203, "logits_per_token": -2.0301061357770647, "logits_per_char": -0.5465670365553635, "bits_per_byte": 0.7885295531523663, "num_chars": 26}, {"sum_logits": -16.697154998779297, "num_tokens": 5, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -24.452579498291016, "logits_per_token": -3.3394309997558596, "logits_per_char": -0.5963269642421177, "bits_per_byte": 0.8603179540610916, "num_chars": 28}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 473, "native_id": "VASoL_2008_3_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.053231716156006, "logits_per_token_corr": -1.684410572052002, "logits_per_char_corr": -0.31582698225975037, "bits_per_byte_corr": 0.4556420210853958}, "model_output": [{"sum_logits": -7.005350112915039, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.426725387573242, "logits_per_token": -1.7513375282287598, "logits_per_char": -0.43783438205718994, "bits_per_byte": 0.6316614917250455, "num_chars": 16}, {"sum_logits": -5.053231716156006, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.225025177001953, "logits_per_token": -1.684410572052002, "logits_per_char": -0.31582698225975037, "bits_per_byte": 0.4556420210853958, "num_chars": 16}, {"sum_logits": -7.664015769958496, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -17.989789962768555, "logits_per_token": -1.916003942489624, "logits_per_char": -0.4257786538865831, "bits_per_byte": 0.6142687524789932, "num_chars": 18}, {"sum_logits": -8.772533416748047, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -21.82721710205078, "logits_per_token": -2.1931333541870117, "logits_per_char": -0.4873629675971137, "bits_per_byte": 0.7031161364657896, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 474, "native_id": "WASL_2005_8_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.85804557800293, "logits_per_token_corr": -1.1681203281178194, "logits_per_char_corr": -0.2392535611807582, "bits_per_byte_corr": 0.34516992623075204}, "model_output": [{"sum_logits": -19.85804557800293, "num_tokens": 17, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -62.27806854248047, "logits_per_token": -1.1681203281178194, "logits_per_char": -0.2392535611807582, "bits_per_byte": 0.34516992623075204, "num_chars": 83}, {"sum_logits": -24.000764846801758, "num_tokens": 15, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -55.219993591308594, "logits_per_token": -1.6000509897867838, "logits_per_char": -0.36364795222426904, "bits_per_byte": 0.5246330973037565, "num_chars": 66}, {"sum_logits": -40.7366943359375, "num_tokens": 15, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -63.866634368896484, "logits_per_token": -2.7157796223958335, "logits_per_char": -0.5580369087114726, "bits_per_byte": 0.8050770808316271, "num_chars": 73}, {"sum_logits": -28.861515045166016, "num_tokens": 12, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -49.60609436035156, "logits_per_token": -2.4051262537638345, "logits_per_char": -0.4810252507527669, "bits_per_byte": 0.6939727438038855, "num_chars": 60}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 475, "native_id": "AKDE&ED_2012_8_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.801740646362305, "logits_per_token_corr": -2.950435161590576, "logits_per_char_corr": -0.5900870323181152, "bits_per_byte_corr": 0.8513156352188419}, "model_output": [{"sum_logits": -10.709412574768066, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -23.486297607421875, "logits_per_token": -3.5698041915893555, "logits_per_char": -0.7649580410548619, "bits_per_byte": 1.1036011723187782, "num_chars": 14}, {"sum_logits": -11.801740646362305, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -26.3447322845459, "logits_per_token": -2.950435161590576, "logits_per_char": -0.5900870323181152, "bits_per_byte": 0.8513156352188419, "num_chars": 20}, {"sum_logits": -15.998649597167969, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.147296905517578, "logits_per_token": -2.6664415995279946, "logits_per_char": -0.5925425776728878, "bits_per_byte": 0.8548582383248527, "num_chars": 27}, {"sum_logits": -20.736454010009766, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -46.72644805908203, "logits_per_token": -2.9623505728585378, "logits_per_char": -0.7150501382761988, "bits_per_byte": 1.0315992884787808, "num_chars": 29}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 476, "native_id": "Mercury_7056823", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.998458862304688, "logits_per_token_corr": -2.749614715576172, "logits_per_char_corr": -0.4230176485501803, "bits_per_byte_corr": 0.610285463772294}, "model_output": [{"sum_logits": -9.445140838623047, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.69381332397461, "logits_per_token": -3.1483802795410156, "logits_per_char": -0.6746529170445034, "bits_per_byte": 0.9733184177420775, "num_chars": 14}, {"sum_logits": -10.998458862304688, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.535146713256836, "logits_per_token": -2.749614715576172, "logits_per_char": -0.4230176485501803, "bits_per_byte": 0.610285463772294, "num_chars": 26}, {"sum_logits": -10.190780639648438, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.781545639038086, "logits_per_token": -2.0381561279296876, "logits_per_char": -0.4430774191151495, "bits_per_byte": 0.6392255952877662, "num_chars": 23}, {"sum_logits": -8.305426597595215, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.66057586669922, "logits_per_token": -2.7684755325317383, "logits_per_char": -0.6921188831329346, "bits_per_byte": 0.9985164804022101, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 477, "native_id": "Mercury_7205800", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -43.643150329589844, "logits_per_token_corr": -4.364315032958984, "logits_per_char_corr": -0.6819242238998413, "bits_per_byte_corr": 0.983808696083063}, "model_output": [{"sum_logits": -40.557884216308594, "num_tokens": 11, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -61.824501037597656, "logits_per_token": -3.6870803833007812, "logits_per_char": -0.6239674494816707, "bits_per_byte": 0.9001947450439879, "num_chars": 65}, {"sum_logits": -43.643150329589844, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -51.33611297607422, "logits_per_token": -4.364315032958984, "logits_per_char": -0.6819242238998413, "bits_per_byte": 0.983808696083063, "num_chars": 64}, {"sum_logits": -44.47667694091797, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -51.97054672241211, "logits_per_token": -4.9418529934353295, "logits_per_char": -0.5930223592122396, "bits_per_byte": 0.8555504167723862, "num_chars": 75}, {"sum_logits": -39.056732177734375, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -63.6891975402832, "logits_per_token": -3.2547276814778647, "logits_per_char": -0.49438901490803006, "bits_per_byte": 0.7132525800783072, "num_chars": 79}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 478, "native_id": "Mercury_SC_402282", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.415250778198242, "logits_per_token_corr": -2.707625389099121, "logits_per_char_corr": -0.45127089818318683, "bits_per_byte_corr": 0.6510462869068597}, "model_output": [{"sum_logits": -5.415250778198242, "num_tokens": 2, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -17.198345184326172, "logits_per_token": -2.707625389099121, "logits_per_char": -0.45127089818318683, "bits_per_byte": 0.6510462869068597, "num_chars": 12}, {"sum_logits": -4.271763324737549, "num_tokens": 2, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -16.102745056152344, "logits_per_token": -2.1358816623687744, "logits_per_char": -0.3285971788259653, "bits_per_byte": 0.4740655203426646, "num_chars": 13}, {"sum_logits": -10.841282844543457, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -20.103809356689453, "logits_per_token": -3.6137609481811523, "logits_per_char": -0.5705938339233398, "bits_per_byte": 0.8231928945636146, "num_chars": 19}, {"sum_logits": -4.402844429016113, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -17.915307998657227, "logits_per_token": -1.4676148096720378, "logits_per_char": -0.24460246827867296, "bits_per_byte": 0.352886767975095, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 479, "native_id": "MCAS_1998_8_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.53931999206543, "logits_per_token_corr": -3.6348299980163574, "logits_per_char_corr": -0.5592046150794396, "bits_per_byte_corr": 0.8067617250179088}, "model_output": [{"sum_logits": -12.80486011505127, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -26.505571365356445, "logits_per_token": -3.2012150287628174, "logits_per_char": -0.60975524357387, "bits_per_byte": 0.8796908660606962, "num_chars": 21}, {"sum_logits": -21.28465461730957, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -34.23823547363281, "logits_per_token": -2.6605818271636963, "logits_per_char": -0.5752609356029613, "bits_per_byte": 0.8299260990121339, "num_chars": 37}, {"sum_logits": -14.53931999206543, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -31.238162994384766, "logits_per_token": -3.6348299980163574, "logits_per_char": -0.5592046150794396, "bits_per_byte": 0.8067617250179088, "num_chars": 26}, {"sum_logits": -11.889251708984375, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.82648468017578, "logits_per_token": -1.9815419514973958, "logits_per_char": -0.3963083902994792, "bits_per_byte": 0.5717521493481571, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 480, "native_id": "Mercury_7230318", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.880198955535889, "logits_per_token_corr": -3.4400994777679443, "logits_per_char_corr": -0.34400994777679444, "bits_per_byte_corr": 0.4963014456744092}, "model_output": [{"sum_logits": -3.1184494495391846, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.653596878051758, "logits_per_token": -1.5592247247695923, "logits_per_char": -0.20789662996927896, "bits_per_byte": 0.2999314370744221, "num_chars": 15}, {"sum_logits": -6.949915409088135, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -18.634368896484375, "logits_per_token": -3.4749577045440674, "logits_per_char": -0.3861064116160075, "bits_per_byte": 0.5570338052942471, "num_chars": 18}, {"sum_logits": -14.525236129760742, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -23.35538101196289, "logits_per_token": -7.262618064880371, "logits_per_char": -0.8544256546918083, "bits_per_byte": 1.2326756548330633, "num_chars": 17}, {"sum_logits": -6.880198955535889, "num_tokens": 2, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -16.969642639160156, "logits_per_token": -3.4400994777679443, "logits_per_char": -0.34400994777679444, "bits_per_byte": 0.4963014456744092, "num_chars": 20}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 481, "native_id": "Mercury_SC_416167", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.2286291122436523, "logits_per_token_corr": -1.2286291122436523, "logits_per_char_corr": -0.24572582244873048, "bits_per_byte_corr": 0.3545074254654001}, "model_output": [{"sum_logits": -6.5317182540893555, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.69958209991455, "logits_per_token": -6.5317182540893555, "logits_per_char": -0.8164647817611694, "bits_per_byte": 1.1779096917081753, "num_chars": 8}, {"sum_logits": -5.138859748840332, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.454791069030762, "logits_per_token": -5.138859748840332, "logits_per_char": -0.856476624806722, "bits_per_byte": 1.235634579246863, "num_chars": 6}, {"sum_logits": -1.2286291122436523, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": true, "sum_logits_uncond": -12.759955406188965, "logits_per_token": -1.2286291122436523, "logits_per_char": -0.24572582244873048, "bits_per_byte": 0.3545074254654001, "num_chars": 5}, {"sum_logits": -4.752955436706543, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.502662658691406, "logits_per_token": -4.752955436706543, "logits_per_char": -0.7921592394510905, "bits_per_byte": 1.1428442063512823, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 482, "native_id": "Mercury_7027720", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.52374839782715, "logits_per_token_corr": -3.253958066304525, "logits_per_char_corr": -0.6972767284938267, "bits_per_byte_corr": 1.0059576783260467}, "model_output": [{"sum_logits": -18.393659591674805, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -31.37717056274414, "logits_per_token": -3.065609931945801, "logits_per_char": -0.7357463836669922, "bits_per_byte": 1.061457659069121, "num_chars": 25}, {"sum_logits": -19.52374839782715, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -33.13866424560547, "logits_per_token": -3.253958066304525, "logits_per_char": -0.6972767284938267, "bits_per_byte": 1.0059576783260467, "num_chars": 28}, {"sum_logits": -18.114002227783203, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.67173957824707, "logits_per_token": -2.587714603969029, "logits_per_char": -0.6246207664752829, "bits_per_byte": 0.9011372822308013, "num_chars": 29}, {"sum_logits": -20.874771118164062, "num_tokens": 9, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -35.8251953125, "logits_per_token": -2.31941901312934, "logits_per_char": -0.5218692779541015, "bits_per_byte": 0.7528982192972272, "num_chars": 40}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 483, "native_id": "LEAP__5_10312", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.97475814819336, "logits_per_token_corr": -1.8291263580322266, "logits_per_char_corr": -0.45728158950805664, "bits_per_byte_corr": 0.6597178814735699}, "model_output": [{"sum_logits": -10.400755882263184, "num_tokens": 6, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -21.510299682617188, "logits_per_token": -1.7334593137105305, "logits_per_char": -0.43336482842763263, "bits_per_byte": 0.6252132888686912, "num_chars": 24}, {"sum_logits": -10.97475814819336, "num_tokens": 6, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -23.934335708618164, "logits_per_token": -1.8291263580322266, "logits_per_char": -0.45728158950805664, "bits_per_byte": 0.6597178814735699, "num_chars": 24}, {"sum_logits": -17.16249656677246, "num_tokens": 5, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -23.879785537719727, "logits_per_token": -3.432499313354492, "logits_per_char": -0.7461955029031505, "bits_per_byte": 1.0765325515727948, "num_chars": 23}, {"sum_logits": -13.922836303710938, "num_tokens": 5, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -23.143831253051758, "logits_per_token": -2.7845672607421874, "logits_per_char": -0.7327808580900493, "bits_per_byte": 1.057179310025633, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 484, "native_id": "Mercury_405161", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.513069152832031, "logits_per_token_corr": -3.378267288208008, "logits_per_char_corr": -0.6142304160378196, "bits_per_byte_corr": 0.8861471751815637}, "model_output": [{"sum_logits": -16.293596267700195, "num_tokens": 5, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -26.23822593688965, "logits_per_token": -3.258719253540039, "logits_per_char": -0.8575576983000103, "bits_per_byte": 1.2371942386144674, "num_chars": 19}, {"sum_logits": -16.130615234375, "num_tokens": 5, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -31.17938804626465, "logits_per_token": -3.226123046875, "logits_per_char": -0.8489797491776315, "bits_per_byte": 1.2248188739546049, "num_chars": 19}, {"sum_logits": -9.90593147277832, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -24.739253997802734, "logits_per_token": -1.6509885787963867, "logits_per_char": -0.495296573638916, "bits_per_byte": 0.7145619105586728, "num_chars": 20}, {"sum_logits": -13.513069152832031, "num_tokens": 4, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -28.37246322631836, "logits_per_token": -3.378267288208008, "logits_per_char": -0.6142304160378196, "bits_per_byte": 0.8861471751815637, "num_chars": 22}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 485, "native_id": "Mercury_SC_409245", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.11603355407715, "logits_per_token_corr": -2.192366686734286, "logits_per_char_corr": -0.4230883079662658, "bits_per_byte_corr": 0.6103874037614727}, "model_output": [{"sum_logits": -10.905767440795898, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -23.174278259277344, "logits_per_token": -1.8176279067993164, "logits_per_char": -0.3894916943141392, "bits_per_byte": 0.5619177358548524, "num_chars": 28}, {"sum_logits": -14.333784103393555, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -28.016359329223633, "logits_per_token": -2.3889640172322593, "logits_per_char": -0.49426841735839844, "bits_per_byte": 0.7130785945915101, "num_chars": 29}, {"sum_logits": -17.699237823486328, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -29.393367767333984, "logits_per_token": -2.5284625462123325, "logits_per_char": -0.4214104243687221, "bits_per_byte": 0.6079667294161057, "num_chars": 42}, {"sum_logits": -24.11603355407715, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -41.40571594238281, "logits_per_token": -2.192366686734286, "logits_per_char": -0.4230883079662658, "bits_per_byte": 0.6103874037614727, "num_chars": 57}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 486, "native_id": "ACTAAP_2011_5_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.856413841247559, "logits_per_token_corr": -4.928206920623779, "logits_per_char_corr": -0.7581856800959661, "bits_per_byte_corr": 1.0938307207482623}, "model_output": [{"sum_logits": -4.29807186126709, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -13.853055000305176, "logits_per_token": -4.29807186126709, "logits_per_char": -0.4775635401407878, "bits_per_byte": 0.6889785510709869, "num_chars": 9}, {"sum_logits": -4.783845901489258, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -15.004528045654297, "logits_per_token": -4.783845901489258, "logits_per_char": -0.47838459014892576, "bits_per_byte": 0.6901630758460503, "num_chars": 10}, {"sum_logits": -9.856413841247559, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -19.035520553588867, "logits_per_token": -4.928206920623779, "logits_per_char": -0.7581856800959661, "bits_per_byte": 1.0938307207482623, "num_chars": 13}, {"sum_logits": -8.99697494506836, "num_tokens": 3, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -23.672550201416016, "logits_per_token": -2.9989916483561196, "logits_per_char": -0.5997983296712239, "bits_per_byte": 0.86532607575078, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 487, "native_id": "Mercury_7223370", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.774648427963257, "logits_per_token_corr": -3.774648427963257, "logits_per_char_corr": -0.4718310534954071, "bits_per_byte_corr": 0.6807083210157281}, "model_output": [{"sum_logits": -9.170079231262207, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -17.503559112548828, "logits_per_token": -4.5850396156311035, "logits_per_char": -1.0188976923624675, "bits_per_byte": 1.4699586479455966, "num_chars": 9}, {"sum_logits": -9.23604965209961, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -12.257821083068848, "logits_per_token": -9.23604965209961, "logits_per_char": -1.1545062065124512, "bits_per_byte": 1.6656003788122393, "num_chars": 8}, {"sum_logits": -4.557818412780762, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -17.695938110351562, "logits_per_token": -2.278909206390381, "logits_per_char": -0.6511169161115374, "bits_per_byte": 0.9393631459137052, "num_chars": 7}, {"sum_logits": -3.774648427963257, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -15.0416259765625, "logits_per_token": -3.774648427963257, "logits_per_char": -0.4718310534954071, "bits_per_byte": 0.6807083210157281, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 488, "native_id": "Mercury_SC_400697", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.922542572021484, "logits_per_token_corr": -2.3247269524468317, "logits_per_char_corr": -0.46494539048936634, "bits_per_byte_corr": 0.6707744091436734}, "model_output": [{"sum_logits": -22.262855529785156, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.636260986328125, "logits_per_token": -3.180407932826451, "logits_per_char": -0.5858646192048725, "bits_per_byte": 0.8452239807597778, "num_chars": 38}, {"sum_logits": -20.922542572021484, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -34.453147888183594, "logits_per_token": -2.3247269524468317, "logits_per_char": -0.46494539048936634, "bits_per_byte": 0.6707744091436734, "num_chars": 45}, {"sum_logits": -17.796672821044922, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.253684997558594, "logits_per_token": -2.2245841026306152, "logits_per_char": -0.42373030526297434, "bits_per_byte": 0.6113136100776989, "num_chars": 42}, {"sum_logits": -28.484907150268555, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -45.53207778930664, "logits_per_token": -2.8484907150268555, "logits_per_char": -0.5374510783069538, "bits_per_byte": 0.7753780053944253, "num_chars": 53}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 489, "native_id": "Mercury_SC_401262", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.233981132507324, "logits_per_token_corr": -5.116990566253662, "logits_per_char_corr": -0.4651809605685147, "bits_per_byte_corr": 0.6711142649286428}, "model_output": [{"sum_logits": -14.525100708007812, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -18.62957000732422, "logits_per_token": -7.262550354003906, "logits_per_char": -1.117315439077524, "bits_per_byte": 1.6119454430669766, "num_chars": 13}, {"sum_logits": -12.715449333190918, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -16.673784255981445, "logits_per_token": -6.357724666595459, "logits_per_char": -0.8476966222127279, "bits_per_byte": 1.2229677130455063, "num_chars": 15}, {"sum_logits": -11.849424362182617, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -18.67382049560547, "logits_per_token": -5.924712181091309, "logits_per_char": -0.5924712181091308, "bits_per_byte": 0.8547552882361006, "num_chars": 20}, {"sum_logits": -10.233981132507324, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -18.67389488220215, "logits_per_token": -5.116990566253662, "logits_per_char": -0.4651809605685147, "bits_per_byte": 0.6711142649286428, "num_chars": 22}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 490, "native_id": "Mercury_7136063", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.227102279663086, "logits_per_token_corr": -2.742367426554362, "logits_per_char_corr": -0.3917667752220517, "bits_per_byte_corr": 0.5651999837983213}, "model_output": [{"sum_logits": -10.2337007522583, "num_tokens": 5, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -27.065622329711914, "logits_per_token": -2.0467401504516602, "logits_per_char": -0.3790259537873445, "bits_per_byte": 0.5468188638976041, "num_chars": 27}, {"sum_logits": -8.227102279663086, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -23.149505615234375, "logits_per_token": -2.742367426554362, "logits_per_char": -0.3917667752220517, "bits_per_byte": 0.5651999837983213, "num_chars": 21}, {"sum_logits": -6.523214340209961, "num_tokens": 3, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -22.067955017089844, "logits_per_token": -2.174404780069987, "logits_per_char": -0.32616071701049804, "bits_per_byte": 0.47055044896417214, "num_chars": 20}, {"sum_logits": -9.203629493713379, "num_tokens": 4, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -27.97206687927246, "logits_per_token": -2.3009073734283447, "logits_per_char": -0.48440155230070414, "bits_per_byte": 0.6988437173036438, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 491, "native_id": "Mercury_405876", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.592798233032227, "logits_per_token_corr": -3.4321330388387046, "logits_per_char_corr": -0.6240241888797644, "bits_per_byte_corr": 0.9002766026922407}, "model_output": [{"sum_logits": -10.976800918579102, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -24.63484001159668, "logits_per_token": -1.8294668197631836, "logits_per_char": -0.49894549629905005, "bits_per_byte": 0.7198261931850393, "num_chars": 22}, {"sum_logits": -20.838159561157227, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -34.4389762878418, "logits_per_token": -2.9768799373081754, "logits_per_char": -0.563193501652898, "bits_per_byte": 0.81251647189611, "num_chars": 37}, {"sum_logits": -6.537890434265137, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -25.56698226928711, "logits_per_token": -0.9339843477521624, "logits_per_char": -0.2334960869380406, "bits_per_byte": 0.33686364669273144, "num_chars": 28}, {"sum_logits": -20.592798233032227, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -35.48064422607422, "logits_per_token": -3.4321330388387046, "logits_per_char": -0.6240241888797644, "bits_per_byte": 0.9002766026922407, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 492, "native_id": "Mercury_7057890", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.96950340270996, "logits_per_token_corr": -3.593900680541992, "logits_per_char_corr": -0.7487293084462484, "bits_per_byte_corr": 1.0801880602644016}, "model_output": [{"sum_logits": -17.96950340270996, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -29.664447784423828, "logits_per_token": -3.593900680541992, "logits_per_char": -0.7487293084462484, "bits_per_byte": 1.0801880602644016, "num_chars": 24}, {"sum_logits": -15.352380752563477, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.895191192626953, "logits_per_token": -2.193197250366211, "logits_per_char": -0.4652236591685902, "bits_per_byte": 0.6711758659872247, "num_chars": 33}, {"sum_logits": -19.224609375, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.15399932861328, "logits_per_token": -2.746372767857143, "logits_per_char": -0.45772879464285715, "bits_per_byte": 0.6603630621038071, "num_chars": 42}, {"sum_logits": -11.753791809082031, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.881092071533203, "logits_per_token": -1.9589653015136719, "logits_per_char": -0.3673059940338135, "bits_per_byte": 0.5299105360817545, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 493, "native_id": "LEAP_2002_4_10247", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.177215576171875, "logits_per_token_corr": -1.6962025960286458, "logits_per_char_corr": -0.37693391022858797, "bits_per_byte_corr": 0.5438006830300602}, "model_output": [{"sum_logits": -9.602849960327148, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -22.036930084228516, "logits_per_token": -1.371835708618164, "logits_per_char": -0.30976935355894025, "bits_per_byte": 0.44690271019918415, "num_chars": 31}, {"sum_logits": -10.177215576171875, "num_tokens": 6, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -21.544422149658203, "logits_per_token": -1.6962025960286458, "logits_per_char": -0.37693391022858797, "bits_per_byte": 0.5438006830300602, "num_chars": 27}, {"sum_logits": -16.071176528930664, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -25.352523803710938, "logits_per_token": -2.295882361275809, "logits_per_char": -0.5541785009976091, "bits_per_byte": 0.7995105751571046, "num_chars": 29}, {"sum_logits": -15.04203987121582, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -25.6110782623291, "logits_per_token": -2.148862838745117, "logits_per_char": -0.4700637459754944, "bits_per_byte": 0.6781586352210224, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 494, "native_id": "Mercury_SC_405481", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.670047760009766, "logits_per_token_corr": -2.334009552001953, "logits_per_char_corr": -0.4862519900004069, "bits_per_byte_corr": 0.7015133345964809}, "model_output": [{"sum_logits": -11.178936958312988, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.835750579833984, "logits_per_token": -2.794734239578247, "logits_per_char": -0.7452624638875326, "bits_per_byte": 1.0751864608120059, "num_chars": 15}, {"sum_logits": -12.522651672363281, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -23.560359954833984, "logits_per_token": -2.504530334472656, "logits_per_char": -0.695702870686849, "bits_per_byte": 1.003687081472854, "num_chars": 18}, {"sum_logits": -11.670047760009766, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.55146026611328, "logits_per_token": -2.334009552001953, "logits_per_char": -0.4862519900004069, "bits_per_byte": 0.7015133345964809, "num_chars": 24}, {"sum_logits": -14.978515625, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -32.84233856201172, "logits_per_token": -2.4964192708333335, "logits_per_char": -0.6808416193181818, "bits_per_byte": 0.982246827821858, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 495, "native_id": "Mercury_SC_400401", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.06790542602539, "logits_per_token_corr": -3.6779842376708984, "logits_per_char_corr": -0.9594741489576257, "bits_per_byte_corr": 1.3842285965633196}, "model_output": [{"sum_logits": -18.930782318115234, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.713531494140625, "logits_per_token": -3.7861564636230467, "logits_per_char": -0.9963569641113281, "bits_per_byte": 1.4374392510796288, "num_chars": 19}, {"sum_logits": -22.06790542602539, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -37.064788818359375, "logits_per_token": -3.6779842376708984, "logits_per_char": -0.9594741489576257, "bits_per_byte": 1.3842285965633196, "num_chars": 23}, {"sum_logits": -18.903564453125, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -29.41362190246582, "logits_per_token": -3.780712890625, "logits_per_char": -0.756142578125, "bits_per_byte": 1.0908831476667167, "num_chars": 25}, {"sum_logits": -11.27348804473877, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -24.886905670166016, "logits_per_token": -3.7578293482462564, "logits_per_char": -0.46972866853078205, "bits_per_byte": 0.6776752206532218, "num_chars": 24}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 496, "native_id": "Mercury_7064260", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.31364631652832, "logits_per_token_corr": -1.3681829240587022, "logits_per_char_corr": -0.19545470343695748, "bits_per_byte_corr": 0.28198153136712417}, "model_output": [{"sum_logits": -17.359542846679688, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -37.15898132324219, "logits_per_token": -1.9288380940755208, "logits_per_char": -0.2845826696176998, "bits_per_byte": 0.4105660061806927, "num_chars": 61}, {"sum_logits": -12.31364631652832, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -34.11216735839844, "logits_per_token": -1.3681829240587022, "logits_per_char": -0.19545470343695748, "bits_per_byte": 0.28198153136712417, "num_chars": 63}, {"sum_logits": -14.04896068572998, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -34.75370788574219, "logits_per_token": -1.5609956317477756, "logits_per_char": -0.23031083091360624, "bits_per_byte": 0.332268293622315, "num_chars": 61}, {"sum_logits": -13.060997009277344, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -34.208740234375, "logits_per_token": -1.451221889919705, "logits_per_char": -0.20731741284567212, "bits_per_byte": 0.29909580340259595, "num_chars": 63}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 497, "native_id": "Mercury_7015995", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.796844720840454, "logits_per_token_corr": -1.796844720840454, "logits_per_char_corr": -0.17968447208404542, "bits_per_byte_corr": 0.25922989680058994}, "model_output": [{"sum_logits": -4.785787582397461, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.271526336669922, "logits_per_token": -2.3928937911987305, "logits_per_char": -0.5317541758219401, "bits_per_byte": 0.767159112430862, "num_chars": 9}, {"sum_logits": -1.796844720840454, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": true, "sum_logits_uncond": -14.539495468139648, "logits_per_token": -1.796844720840454, "logits_per_char": -0.17968447208404542, "bits_per_byte": 0.25922989680058994, "num_chars": 10}, {"sum_logits": -3.5248911380767822, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.718083381652832, "logits_per_token": -3.5248911380767822, "logits_per_char": -0.4406113922595978, "bits_per_byte": 0.6356678705725602, "num_chars": 8}, {"sum_logits": -6.31928825378418, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.651010513305664, "logits_per_token": -6.31928825378418, "logits_per_char": -0.7899110317230225, "bits_per_byte": 1.1396007282111078, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 498, "native_id": "Mercury_400887", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.244871139526367, "logits_per_token_corr": -2.561217784881592, "logits_per_char_corr": -1.463553019932338, "bits_per_byte_corr": 2.1114606839359675}, "model_output": [{"sum_logits": -13.578932762145996, "num_tokens": 9, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -37.81000518798828, "logits_per_token": -1.5087703069051106, "logits_per_char": -0.969923768724714, "bits_per_byte": 1.3060172637684517, "num_chars": 14}, {"sum_logits": -10.244871139526367, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -23.44922637939453, "logits_per_token": -2.561217784881592, "logits_per_char": -1.463553019932338, "bits_per_byte": 2.1114606839359675, "num_chars": 7}, {"sum_logits": -13.443799018859863, "num_tokens": 9, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -37.87168884277344, "logits_per_token": -1.4937554465399847, "logits_per_char": -0.9602713584899902, "bits_per_byte": 1.293020145015398, "num_chars": 14}, {"sum_logits": -9.68351936340332, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -21.540035247802734, "logits_per_token": -2.42087984085083, "logits_per_char": -1.3833599090576172, "bits_per_byte": 1.9957664805634656, "num_chars": 7}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 499, "native_id": "Mercury_7247678", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.760448455810547, "logits_per_token_corr": -2.705495314164595, "logits_per_char_corr": -0.4650070071220398, "bits_per_byte_corr": 0.6708633031540677}, "model_output": [{"sum_logits": -29.760448455810547, "num_tokens": 11, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -39.670265197753906, "logits_per_token": -2.705495314164595, "logits_per_char": -0.4650070071220398, "bits_per_byte": 0.6708633031540677, "num_chars": 64}, {"sum_logits": -29.953350067138672, "num_tokens": 11, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -40.09717559814453, "logits_per_token": -2.723031824285334, "logits_per_char": -0.46802109479904175, "bits_per_byte": 0.6752117124984861, "num_chars": 64}, {"sum_logits": -32.72246551513672, "num_tokens": 11, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -39.17095947265625, "logits_per_token": -2.9747695922851562, "logits_per_char": -0.5112885236740112, "bits_per_byte": 0.7376334175684653, "num_chars": 64}, {"sum_logits": -30.145282745361328, "num_tokens": 11, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -39.67323303222656, "logits_per_token": -2.7404802495783027, "logits_per_char": -0.47102004289627075, "bits_per_byte": 0.6795382800462448, "num_chars": 64}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 500, "native_id": "MDSA_2007_8_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.37044620513916, "logits_per_token_corr": -1.537044620513916, "logits_per_char_corr": -0.30138129813998354, "bits_per_byte_corr": 0.4348013042435448}, "model_output": [{"sum_logits": -8.411481857299805, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -23.893632888793945, "logits_per_token": -1.0514352321624756, "logits_per_char": -0.17896769909148522, "bits_per_byte": 0.25819581195877944, "num_chars": 47}, {"sum_logits": -15.390130996704102, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -33.45800018310547, "logits_per_token": -1.7100145551893446, "logits_per_char": -0.3017672744451785, "bits_per_byte": 0.43535815034495057, "num_chars": 51}, {"sum_logits": -15.37044620513916, "num_tokens": 10, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.78857421875, "logits_per_token": -1.537044620513916, "logits_per_char": -0.30138129813998354, "bits_per_byte": 0.4348013042435448, "num_chars": 51}, {"sum_logits": -16.648164749145508, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -33.89305114746094, "logits_per_token": -2.0810205936431885, "logits_per_char": -0.30269390452991835, "bits_per_byte": 0.4366949949729443, "num_chars": 55}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 501, "native_id": "AKDE&ED_2008_8_48", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.779117584228516, "logits_per_token_corr": -2.539873940604074, "logits_per_char_corr": -0.433637014249476, "bits_per_byte_corr": 0.6256059700040653}, "model_output": [{"sum_logits": -17.779117584228516, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.164234161376953, "logits_per_token": -2.539873940604074, "logits_per_char": -0.433637014249476, "bits_per_byte": 0.6256059700040653, "num_chars": 41}, {"sum_logits": -15.103479385375977, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -31.709749221801758, "logits_per_token": -3.0206958770751955, "logits_per_char": -0.4576811934962417, "bits_per_byte": 0.6602943881656443, "num_chars": 33}, {"sum_logits": -17.994335174560547, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.7352180480957, "logits_per_token": -2.5706193106515065, "logits_per_char": -0.473535136172646, "bits_per_byte": 0.6831667926434472, "num_chars": 38}, {"sum_logits": -15.153352737426758, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.148067474365234, "logits_per_token": -3.7883381843566895, "logits_per_char": -0.409550073984507, "bits_per_byte": 0.5908558607335808, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 502, "native_id": "Mercury_401014", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.466232299804688, "logits_per_token_corr": -1.2094617571149553, "logits_per_char_corr": -0.2565524939334754, "bits_per_byte_corr": 0.37012701072578674}, "model_output": [{"sum_logits": -9.631230354309082, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -26.317834854125977, "logits_per_token": -1.9262460708618163, "logits_per_char": -0.37043193670419544, "bits_per_byte": 0.5344203180704211, "num_chars": 26}, {"sum_logits": -10.491933822631836, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -27.70271873474121, "logits_per_token": -2.098386764526367, "logits_per_char": -0.4035359162550706, "bits_per_byte": 0.5821792652021927, "num_chars": 26}, {"sum_logits": -8.466232299804688, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -26.184207916259766, "logits_per_token": -1.2094617571149553, "logits_per_char": -0.2565524939334754, "bits_per_byte": 0.37012701072578674, "num_chars": 33}, {"sum_logits": -22.25770378112793, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -39.84015655517578, "logits_per_token": -2.225770378112793, "logits_per_char": -0.5058569041165438, "bits_per_byte": 0.7297972469689059, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 503, "native_id": "Mercury_7106698", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.78602933883667, "logits_per_token_corr": -4.78602933883667, "logits_per_char_corr": -0.478602933883667, "bits_per_byte_corr": 0.6904780792693709}, "model_output": [{"sum_logits": -4.78602933883667, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.734278678894043, "logits_per_token": -4.78602933883667, "logits_per_char": -0.478602933883667, "bits_per_byte": 0.6904780792693709, "num_chars": 10}, {"sum_logits": -7.282506465911865, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.61407470703125, "logits_per_token": -7.282506465911865, "logits_per_char": -0.6620460423556241, "bits_per_byte": 0.9551305421473097, "num_chars": 11}, {"sum_logits": -5.169748306274414, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.96464729309082, "logits_per_token": -2.584874153137207, "logits_per_char": -0.4699771187522195, "bits_per_byte": 0.6780336585555976, "num_chars": 11}, {"sum_logits": -5.83207368850708, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -15.095113754272461, "logits_per_token": -5.83207368850708, "logits_per_char": -0.5301885171370073, "bits_per_byte": 0.7649003444103832, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 504, "native_id": "Mercury_7143308", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.909936904907227, "logits_per_token_corr": -2.9819873809814452, "logits_per_char_corr": -0.4141649140252007, "bits_per_byte_corr": 0.5975136675747903}, "model_output": [{"sum_logits": -14.373920440673828, "num_tokens": 9, "num_tokens_all": 242, "is_greedy": false, "sum_logits_uncond": -36.813636779785156, "logits_per_token": -1.5971022711859808, "logits_per_char": -0.3505834253822885, "bits_per_byte": 0.5057849692172569, "num_chars": 41}, {"sum_logits": -14.909936904907227, "num_tokens": 5, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -28.562911987304688, "logits_per_token": -2.9819873809814452, "logits_per_char": -0.4141649140252007, "bits_per_byte": 0.5975136675747903, "num_chars": 36}, {"sum_logits": -20.633609771728516, "num_tokens": 8, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -30.805095672607422, "logits_per_token": -2.5792012214660645, "logits_per_char": -0.44855673416801123, "bits_per_byte": 0.6471305759420037, "num_chars": 46}, {"sum_logits": -23.023643493652344, "num_tokens": 8, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -42.3544921875, "logits_per_token": -2.877955436706543, "logits_per_char": -0.4898647551840924, "bits_per_byte": 0.706725453010884, "num_chars": 47}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 505, "native_id": "MCAS_2005_9_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.960616111755371, "logits_per_token_corr": -5.9803080558776855, "logits_per_char_corr": -1.9934360186258953, "bits_per_byte_corr": 2.8759202584030845}, "model_output": [{"sum_logits": -13.947324752807617, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.72819709777832, "logits_per_token": -6.973662376403809, "logits_per_char": -2.324554125467936, "bits_per_byte": 3.3536227090929818, "num_chars": 6}, {"sum_logits": -11.960616111755371, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -20.807647705078125, "logits_per_token": -5.9803080558776855, "logits_per_char": -1.9934360186258953, "bits_per_byte": 2.8759202584030845, "num_chars": 6}, {"sum_logits": -11.646505355834961, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.419193267822266, "logits_per_token": -5.8232526779174805, "logits_per_char": -2.3293010711669924, "bits_per_byte": 3.3604711041123845, "num_chars": 5}, {"sum_logits": -9.856468200683594, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -14.591767311096191, "logits_per_token": -4.928234100341797, "logits_per_char": -2.4641170501708984, "bits_per_byte": 3.55496944845405, "num_chars": 4}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 506, "native_id": "Mercury_400443", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.78724193572998, "logits_per_token_corr": -1.4208046595255535, "logits_per_char_corr": -0.33650636672973633, "bits_per_byte_corr": 0.48547606650890224}, "model_output": [{"sum_logits": -15.149903297424316, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -35.9551887512207, "logits_per_token": -1.683322588602702, "logits_per_char": -0.39868166572169256, "bits_per_byte": 0.5751760620304505, "num_chars": 38}, {"sum_logits": -12.78724193572998, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -34.07432174682617, "logits_per_token": -1.4208046595255535, "logits_per_char": -0.33650636672973633, "bits_per_byte": 0.48547606650890224, "num_chars": 38}, {"sum_logits": -16.6668643951416, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -38.76996994018555, "logits_per_token": -1.8518738216824002, "logits_per_char": -0.4386016946089895, "bits_per_byte": 0.6327684897383393, "num_chars": 38}, {"sum_logits": -17.847715377807617, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -36.05046844482422, "logits_per_token": -1.9830794864230685, "logits_per_char": -0.4696767204686215, "bits_per_byte": 0.677600275441559, "num_chars": 38}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 507, "native_id": "Mercury_7283430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.045976638793945, "logits_per_token_corr": -1.5057470798492432, "logits_per_char_corr": -0.34417076110839845, "bits_per_byte_corr": 0.4965334502704233}, "model_output": [{"sum_logits": -14.861821174621582, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -23.18496322631836, "logits_per_token": -1.8577276468276978, "logits_per_char": -0.4503582174127752, "bits_per_byte": 0.6497295668854712, "num_chars": 33}, {"sum_logits": -12.045976638793945, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.964641571044922, "logits_per_token": -1.5057470798492432, "logits_per_char": -0.34417076110839845, "bits_per_byte": 0.4965334502704233, "num_chars": 35}, {"sum_logits": -17.264816284179688, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -27.257110595703125, "logits_per_token": -1.9183129204644098, "logits_per_char": -0.507788714240579, "bits_per_byte": 0.7325842598547926, "num_chars": 34}, {"sum_logits": -19.629592895507812, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -27.50216293334961, "logits_per_token": -2.181065877278646, "logits_per_char": -0.5452664693196615, "bits_per_byte": 0.7866532312510749, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 508, "native_id": "Mercury_7159250", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.873358726501465, "logits_per_token_corr": -1.6244529088338215, "logits_per_char_corr": -0.2320647012619745, "bits_per_byte_corr": 0.3347985936762699}, "model_output": [{"sum_logits": -8.191206932067871, "num_tokens": 2, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -20.358640670776367, "logits_per_token": -4.0956034660339355, "logits_per_char": -0.48183570188634534, "bits_per_byte": 0.6951419776351828, "num_chars": 17}, {"sum_logits": -7.797887802124023, "num_tokens": 4, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -28.398771286010742, "logits_per_token": -1.9494719505310059, "logits_per_char": -0.41041514748021174, "bits_per_byte": 0.5921038979758394, "num_chars": 19}, {"sum_logits": -4.873358726501465, "num_tokens": 3, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -29.497310638427734, "logits_per_token": -1.6244529088338215, "logits_per_char": -0.2320647012619745, "bits_per_byte": 0.3347985936762699, "num_chars": 21}, {"sum_logits": -4.107757091522217, "num_tokens": 4, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -23.970273971557617, "logits_per_token": -1.0269392728805542, "logits_per_char": -0.17859813441400943, "bits_per_byte": 0.25766264283129703, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 509, "native_id": "Mercury_401912", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.209390640258789, "logits_per_token_corr": -2.6046953201293945, "logits_per_char_corr": -1.7364635467529297, "bits_per_byte_corr": 2.505187347586712}, "model_output": [{"sum_logits": -5.209390640258789, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -10.419174194335938, "logits_per_token": -2.6046953201293945, "logits_per_char": -1.7364635467529297, "bits_per_byte": 2.505187347586712, "num_chars": 3}, {"sum_logits": -5.066200256347656, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -10.692663192749023, "logits_per_token": -2.533100128173828, "logits_per_char": -1.266550064086914, "bits_per_byte": 1.8272454964971026, "num_chars": 4}, {"sum_logits": -3.490501880645752, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -9.797324180603027, "logits_per_token": -1.745250940322876, "logits_per_char": -0.872625470161438, "bits_per_byte": 1.258932438356211, "num_chars": 4}, {"sum_logits": -3.378349542617798, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -8.640174865722656, "logits_per_token": -1.689174771308899, "logits_per_char": -0.6756699085235596, "bits_per_byte": 0.9747856263055392, "num_chars": 5}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 510, "native_id": "Mercury_7219328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.918204307556152, "logits_per_token_corr": -1.729551076889038, "logits_per_char_corr": -0.4069531945621266, "bits_per_byte_corr": 0.5871093556691233}, "model_output": [{"sum_logits": -8.840559959411621, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.140602111816406, "logits_per_token": -1.262937137058803, "logits_per_char": -0.5200329387889189, "bits_per_byte": 0.7502489418902262, "num_chars": 17}, {"sum_logits": -6.918204307556152, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.655057907104492, "logits_per_token": -1.729551076889038, "logits_per_char": -0.4069531945621266, "bits_per_byte": 0.5871093556691233, "num_chars": 17}, {"sum_logits": -15.111125946044922, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -33.11799621582031, "logits_per_token": -3.7777814865112305, "logits_per_char": -0.6296302477518717, "bits_per_byte": 0.9083644360259673, "num_chars": 24}, {"sum_logits": -17.399860382080078, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.963729858398438, "logits_per_token": -3.479972076416016, "logits_per_char": -1.023521198945887, "bits_per_byte": 1.4766289579650183, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 511, "native_id": "Mercury_7214498", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.399392127990723, "logits_per_token_corr": -2.1996960639953613, "logits_per_char_corr": -0.5499240159988403, "bits_per_byte_corr": 0.7933726507478399}, "model_output": [{"sum_logits": -4.703239917755127, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -11.782609939575195, "logits_per_token": -2.3516199588775635, "logits_per_char": -0.7838733196258545, "bits_per_byte": 1.130890150910202, "num_chars": 6}, {"sum_logits": -4.458329677581787, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.125205039978027, "logits_per_token": -4.458329677581787, "logits_per_char": -0.636904239654541, "bits_per_byte": 0.9188585880714223, "num_chars": 7}, {"sum_logits": -4.399392127990723, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.020513534545898, "logits_per_token": -2.1996960639953613, "logits_per_char": -0.5499240159988403, "bits_per_byte": 0.7933726507478399, "num_chars": 8}, {"sum_logits": -4.60117244720459, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.134454727172852, "logits_per_token": -2.300586223602295, "logits_per_char": -0.5112413830227323, "bits_per_byte": 0.7375654079846408, "num_chars": 9}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 512, "native_id": "TAKS_2009_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.130525588989258, "logits_per_token_corr": -2.1884209314982095, "logits_per_char_corr": -0.4689473424639021, "bits_per_byte_corr": 0.676548005411216}, "model_output": [{"sum_logits": -13.130525588989258, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -24.68219566345215, "logits_per_token": -2.1884209314982095, "logits_per_char": -0.4689473424639021, "bits_per_byte": 0.676548005411216, "num_chars": 28}, {"sum_logits": -19.696758270263672, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.17041778564453, "logits_per_token": -3.2827930450439453, "logits_per_char": -0.6791985610435749, "bits_per_byte": 0.9798763957971893, "num_chars": 29}, {"sum_logits": -10.719192504882812, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.7349796295166, "logits_per_token": -2.1438385009765626, "logits_per_char": -0.48723602294921875, "bits_per_byte": 0.7029329940518041, "num_chars": 22}, {"sum_logits": -15.298332214355469, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.727994918823242, "logits_per_token": -3.0596664428710936, "logits_per_char": -0.6119332885742188, "bits_per_byte": 0.8828331207815348, "num_chars": 25}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 513, "native_id": "NYSEDREGENTS_2013_4_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.463484287261963, "logits_per_token_corr": -2.7317421436309814, "logits_per_char_corr": -0.7804977553231376, "bits_per_byte_corr": 1.1260202410304672}, "model_output": [{"sum_logits": -7.692168712615967, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -10.820269584655762, "logits_per_token": -7.692168712615967, "logits_per_char": -1.9230421781539917, "bits_per_byte": 2.7743634138450677, "num_chars": 4}, {"sum_logits": -6.843062877655029, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -6.843062877655029, "logits_per_char": -1.1405104796091716, "bits_per_byte": 1.6454088130152273, "num_chars": 6}, {"sum_logits": -6.675387859344482, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.209413528442383, "logits_per_token": -3.337693929672241, "logits_per_char": -0.9536268370492118, "bits_per_byte": 1.375792708670514, "num_chars": 7}, {"sum_logits": -5.463484287261963, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -15.340288162231445, "logits_per_token": -2.7317421436309814, "logits_per_char": -0.7804977553231376, "bits_per_byte": 1.1260202410304672, "num_chars": 7}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 514, "native_id": "Mercury_403907", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.377644538879395, "logits_per_token_corr": -1.6722055673599243, "logits_per_char_corr": -0.3615579605102539, "bits_per_byte_corr": 0.5216178766224457}, "model_output": [{"sum_logits": -34.728424072265625, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -53.90300750732422, "logits_per_token": -4.341053009033203, "logits_per_char": -0.890472412109375, "bits_per_byte": 1.2846801329995516, "num_chars": 39}, {"sum_logits": -26.237316131591797, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -43.29505157470703, "logits_per_token": -2.915257347954644, "logits_per_char": -0.6727516956818409, "bits_per_byte": 0.9705755351105302, "num_chars": 39}, {"sum_logits": -13.377644538879395, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.306900024414062, "logits_per_token": -1.6722055673599243, "logits_per_char": -0.3615579605102539, "bits_per_byte": 0.5216178766224457, "num_chars": 37}, {"sum_logits": -18.436342239379883, "num_tokens": 9, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.20493698120117, "logits_per_token": -2.048482471042209, "logits_per_char": -0.5422453599817613, "bits_per_byte": 0.7822946917912997, "num_chars": 34}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 515, "native_id": "Mercury_7081480", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.619022369384766, "logits_per_token_corr": -1.603170394897461, "logits_per_char_corr": -0.26719506581624347, "bits_per_byte_corr": 0.3854809964033716}, "model_output": [{"sum_logits": -9.619022369384766, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -36.93034744262695, "logits_per_token": -1.603170394897461, "logits_per_char": -0.26719506581624347, "bits_per_byte": 0.3854809964033716, "num_chars": 36}, {"sum_logits": -18.363203048706055, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -41.78034973144531, "logits_per_token": -2.6233147212437222, "logits_per_char": -0.4832421854922646, "bits_per_byte": 0.6971711045585356, "num_chars": 38}, {"sum_logits": -12.397072792053223, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -33.919830322265625, "logits_per_token": -2.066178798675537, "logits_per_char": -0.36461978800156536, "bits_per_byte": 0.5260351599602214, "num_chars": 34}, {"sum_logits": -11.410531997680664, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -29.043848037719727, "logits_per_token": -2.282106399536133, "logits_per_char": -0.422612296210395, "bits_per_byte": 0.6097006639618725, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 516, "native_id": "Mercury_416505", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.81387710571289, "logits_per_token_corr": -2.968979517618815, "logits_per_char_corr": -0.7422448794047037, "bits_per_byte_corr": 1.0708330066431622}, "model_output": [{"sum_logits": -16.504484176635742, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -28.68425750732422, "logits_per_token": -3.3008968353271486, "logits_per_char": -0.7502038262107156, "bits_per_byte": 1.0823153397309027, "num_chars": 22}, {"sum_logits": -17.81387710571289, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -29.030895233154297, "logits_per_token": -2.968979517618815, "logits_per_char": -0.7422448794047037, "bits_per_byte": 1.0708330066431622, "num_chars": 24}, {"sum_logits": -16.480520248413086, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -29.10504913330078, "logits_per_token": -2.3543600354875838, "logits_per_char": -0.5150162577629089, "bits_per_byte": 0.7430114010522746, "num_chars": 32}, {"sum_logits": -19.25369644165039, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -32.250511169433594, "logits_per_token": -2.406712055206299, "logits_per_char": -0.5662851894603056, "bits_per_byte": 0.816976834563837, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 517, "native_id": "Mercury_7041668", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.77699089050293, "logits_per_token_corr": -3.296165148417155, "logits_per_char_corr": -0.48236563147568123, "bits_per_byte_corr": 0.6959065044257385}, "model_output": [{"sum_logits": -14.23951244354248, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.039308547973633, "logits_per_token": -2.3732520739237466, "logits_per_char": -0.5476735555208646, "bits_per_byte": 0.7901259225765453, "num_chars": 26}, {"sum_logits": -23.815319061279297, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -31.382051467895508, "logits_per_token": -4.76306381225586, "logits_per_char": -0.744228720664978, "bits_per_byte": 1.0736950845912727, "num_chars": 32}, {"sum_logits": -17.63637924194336, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.384140014648438, "logits_per_token": -2.939396540323893, "logits_per_char": -0.5187170365277458, "bits_per_byte": 0.7483504962237358, "num_chars": 34}, {"sum_logits": -19.77699089050293, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.63266372680664, "logits_per_token": -3.296165148417155, "logits_per_char": -0.48236563147568123, "bits_per_byte": 0.6959065044257385, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 518, "native_id": "Mercury_SC_401309", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.367407321929932, "logits_per_token_corr": -1.591851830482483, "logits_per_char_corr": -0.42449382146199544, "bits_per_byte_corr": 0.6124151311116658}, "model_output": [{"sum_logits": -9.43545913696289, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.663753509521484, "logits_per_token": -4.717729568481445, "logits_per_char": -0.7862882614135742, "bits_per_byte": 1.1343741754513834, "num_chars": 12}, {"sum_logits": -1.751754641532898, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": true, "sum_logits_uncond": -18.732818603515625, "logits_per_token": -0.875877320766449, "logits_per_char": -0.15925042195753616, "bits_per_byte": 0.22974979401777737, "num_chars": 11}, {"sum_logits": -6.367407321929932, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.395719528198242, "logits_per_token": -1.591851830482483, "logits_per_char": -0.42449382146199544, "bits_per_byte": 0.6124151311116658, "num_chars": 15}, {"sum_logits": -4.056763172149658, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -22.222496032714844, "logits_per_token": -1.0141907930374146, "logits_per_char": -0.2897687980106899, "bits_per_byte": 0.4180480078946784, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 519, "native_id": "NYSEDREGENTS_2010_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.1134321689605713, "logits_per_token_corr": -2.1134321689605713, "logits_per_char_corr": -0.17611934741338095, "bits_per_byte_corr": 0.25408650911806774}, "model_output": [{"sum_logits": -6.3464860916137695, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -11.525116920471191, "logits_per_token": -6.3464860916137695, "logits_per_char": -1.0577476819356282, "bits_per_byte": 1.5260073352414236, "num_chars": 6}, {"sum_logits": -2.1134321689605713, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": true, "sum_logits_uncond": -14.96625804901123, "logits_per_token": -2.1134321689605713, "logits_per_char": -0.17611934741338095, "bits_per_byte": 0.25408650911806774, "num_chars": 12}, {"sum_logits": -5.311971664428711, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -12.437054634094238, "logits_per_token": -5.311971664428711, "logits_per_char": -1.0623943328857421, "bits_per_byte": 1.5327110355239, "num_chars": 5}, {"sum_logits": -6.0238800048828125, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -11.17255687713623, "logits_per_token": -6.0238800048828125, "logits_per_char": -1.2047760009765625, "bits_per_byte": 1.7381243619921722, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 520, "native_id": "ACTAAP_2007_7_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.899491310119629, "logits_per_token_corr": -4.899491310119629, "logits_per_char_corr": -0.8165818850199381, "bits_per_byte_corr": 1.1780786359988729}, "model_output": [{"sum_logits": -2.3180992603302, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": true, "sum_logits_uncond": -12.446762084960938, "logits_per_token": -2.3180992603302, "logits_per_char": -0.57952481508255, "bits_per_byte": 0.8360775767922892, "num_chars": 4}, {"sum_logits": -2.354375123977661, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -2.354375123977661, "logits_per_char": -0.4708750247955322, "bits_per_byte": 0.6793290631514701, "num_chars": 5}, {"sum_logits": -4.899491310119629, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.36142349243164, "logits_per_token": -4.899491310119629, "logits_per_char": -0.8165818850199381, "bits_per_byte": 1.1780786359988729, "num_chars": 6}, {"sum_logits": -5.738097190856934, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.967909812927246, "logits_per_token": -2.869048595428467, "logits_per_char": -0.47817476590474445, "bits_per_byte": 0.6898603634495114, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 521, "native_id": "VASoL_2009_3_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.401466369628906, "logits_per_token_corr": -2.4859237670898438, "logits_per_char_corr": -0.49718475341796875, "bits_per_byte_corr": 0.717285978162221}, "model_output": [{"sum_logits": -18.40218734741211, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.537078857421875, "logits_per_token": -3.067031224568685, "logits_per_char": -0.6815624943485966, "bits_per_byte": 0.9832868306533389, "num_chars": 27}, {"sum_logits": -11.911581039428711, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -23.492389678955078, "logits_per_token": -1.7016544342041016, "logits_per_char": -0.3722369074821472, "bits_per_byte": 0.5370243404607234, "num_chars": 32}, {"sum_logits": -20.65302276611328, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.839664459228516, "logits_per_token": -2.58162784576416, "logits_per_char": -0.57369507683648, "bits_per_byte": 0.8276670423349972, "num_chars": 36}, {"sum_logits": -17.401466369628906, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -37.078773498535156, "logits_per_token": -2.4859237670898438, "logits_per_char": -0.49718475341796875, "bits_per_byte": 0.717285978162221, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 522, "native_id": "Mercury_7085295", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.1067473888397217, "logits_per_token_corr": -1.0533736944198608, "logits_per_char_corr": -0.3511245648066203, "bits_per_byte_corr": 0.5065656683811705}, "model_output": [{"sum_logits": -5.225647449493408, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.83778953552246, "logits_per_token": -1.7418824831644695, "logits_per_char": -0.8709412415822347, "bits_per_byte": 1.2565026101372694, "num_chars": 6}, {"sum_logits": -3.426487922668457, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.111711502075195, "logits_per_token": -1.7132439613342285, "logits_per_char": -0.5710813204447428, "bits_per_byte": 0.8238961889505434, "num_chars": 6}, {"sum_logits": -2.1067473888397217, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.933523178100586, "logits_per_token": -1.0533736944198608, "logits_per_char": -0.3511245648066203, "bits_per_byte": 0.5065656683811705, "num_chars": 6}, {"sum_logits": -5.828806400299072, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.99820327758789, "logits_per_token": -1.9429354667663574, "logits_per_char": -0.8326866286141532, "bits_per_byte": 1.2013128697170519, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 523, "native_id": "Mercury_7201968", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.544498443603516, "logits_per_token_corr": -4.090749740600586, "logits_per_char_corr": -0.5113437175750732, "bits_per_byte_corr": 0.7377130455358148}, "model_output": [{"sum_logits": -24.544498443603516, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -38.06367874145508, "logits_per_token": -4.090749740600586, "logits_per_char": -0.5113437175750732, "bits_per_byte": 0.7377130455358148, "num_chars": 48}, {"sum_logits": -16.134023666381836, "num_tokens": 8, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -28.96414566040039, "logits_per_token": -2.0167529582977295, "logits_per_char": -0.3226804733276367, "bits_per_byte": 0.46552951866181935, "num_chars": 50}, {"sum_logits": -28.492483139038086, "num_tokens": 7, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -34.263580322265625, "logits_per_token": -4.070354734148298, "logits_per_char": -0.5479323680584247, "bits_per_byte": 0.7904993101410035, "num_chars": 52}, {"sum_logits": -32.81230926513672, "num_tokens": 8, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -48.565086364746094, "logits_per_token": -4.10153865814209, "logits_per_char": -0.5756545485111705, "bits_per_byte": 0.8304939624028376, "num_chars": 57}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 524, "native_id": "Mercury_7214008", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.665498733520508, "logits_per_token_corr": -2.435807594886193, "logits_per_char_corr": -0.49477341771125793, "bits_per_byte_corr": 0.7138071560962282}, "model_output": [{"sum_logits": -30.990028381347656, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -45.696189880371094, "logits_per_token": -3.0990028381347656, "logits_per_char": -0.573889414469401, "bits_per_byte": 0.8279474122742706, "num_chars": 54}, {"sum_logits": -19.10111427307129, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -41.56319808959961, "logits_per_token": -2.7287306104387556, "logits_per_char": -0.5162463317046294, "bits_per_byte": 0.7447860226279228, "num_chars": 37}, {"sum_logits": -31.665498733520508, "num_tokens": 13, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -42.22686004638672, "logits_per_token": -2.435807594886193, "logits_per_char": -0.49477341771125793, "bits_per_byte": 0.7138071560962282, "num_chars": 64}, {"sum_logits": -25.09931182861328, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.077964782714844, "logits_per_token": -2.2817556207830254, "logits_per_char": -0.4921433691884957, "bits_per_byte": 0.7100127981351392, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 525, "native_id": "Mercury_176855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.4885056018829346, "logits_per_token_corr": -2.4885056018829346, "logits_per_char_corr": -0.3110632002353668, "bits_per_byte_corr": 0.44876933638293676}, "model_output": [{"sum_logits": -7.977536201477051, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.101266860961914, "logits_per_token": -3.9887681007385254, "logits_per_char": -0.725230563770641, "bits_per_byte": 1.0462865378537625, "num_chars": 11}, {"sum_logits": -2.4885056018829346, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.469803810119629, "logits_per_token": -2.4885056018829346, "logits_per_char": -0.3110632002353668, "bits_per_byte": 0.44876933638293676, "num_chars": 8}, {"sum_logits": -8.425520896911621, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -15.102890968322754, "logits_per_token": -8.425520896911621, "logits_per_char": -0.9361689885457357, "bits_per_byte": 1.35060635720994, "num_chars": 9}, {"sum_logits": -3.382988691329956, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.691494345664978, "logits_per_char": -0.22553257942199706, "bits_per_byte": 0.3253747338912452, "num_chars": 15}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 526, "native_id": "Mercury_SC_401678", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -37.971580505371094, "logits_per_token_corr": -5.4245115007672995, "logits_per_char_corr": -1.2248896937216482, "bits_per_byte_corr": 1.7671422867694928}, "model_output": [{"sum_logits": -16.010421752929688, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -26.854032516479492, "logits_per_token": -2.6684036254882812, "logits_per_char": -0.5520835087217134, "bits_per_byte": 0.7964881401899669, "num_chars": 29}, {"sum_logits": -37.971580505371094, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -42.12493133544922, "logits_per_token": -5.4245115007672995, "logits_per_char": -1.2248896937216482, "bits_per_byte": 1.7671422867694928, "num_chars": 31}, {"sum_logits": -27.763385772705078, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -38.93424987792969, "logits_per_token": -3.966197967529297, "logits_per_char": -0.7932395935058594, "bits_per_byte": 1.1444028277885028, "num_chars": 35}, {"sum_logits": -27.497011184692383, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -42.64982604980469, "logits_per_token": -3.9281444549560547, "logits_per_char": -0.670658809382741, "bits_per_byte": 0.9675561384256722, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 527, "native_id": "Mercury_417143", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.9167401790618896, "logits_per_token_corr": -3.9167401790618896, "logits_per_char_corr": -0.6527900298436483, "bits_per_byte_corr": 0.9417769387978665}, "model_output": [{"sum_logits": -4.506295204162598, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -12.721945762634277, "logits_per_token": -4.506295204162598, "logits_per_char": -0.643756457737514, "bits_per_byte": 0.9287442491188242, "num_chars": 7}, {"sum_logits": -1.420567274093628, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": true, "sum_logits_uncond": -11.534162521362305, "logits_per_token": -1.420567274093628, "logits_per_char": -0.2841134548187256, "bits_per_byte": 0.4098890723171004, "num_chars": 5}, {"sum_logits": -4.851978302001953, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -13.63293170928955, "logits_per_token": -4.851978302001953, "logits_per_char": -0.6931397574288505, "bits_per_byte": 0.9999892906863, "num_chars": 7}, {"sum_logits": -3.9167401790618896, "num_tokens": 1, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -11.576622009277344, "logits_per_token": -3.9167401790618896, "logits_per_char": -0.6527900298436483, "bits_per_byte": 0.9417769387978665, "num_chars": 6}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 528, "native_id": "NYSEDREGENTS_2013_4_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.420501708984375, "logits_per_token_corr": -2.0600716727120534, "logits_per_char_corr": -0.4120143345424107, "bits_per_byte_corr": 0.5944110372199293}, "model_output": [{"sum_logits": -19.62725830078125, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.628883361816406, "logits_per_token": -2.80389404296875, "logits_per_char": -0.56077880859375, "bits_per_byte": 0.8090328061944055, "num_chars": 35}, {"sum_logits": -19.49077606201172, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -41.531044006347656, "logits_per_token": -2.7843965802873885, "logits_per_char": -0.5568793160574776, "bits_per_byte": 0.8034070276503379, "num_chars": 35}, {"sum_logits": -20.785438537597656, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.674678802490234, "logits_per_token": -2.969348362513951, "logits_per_char": -0.5938696725027902, "bits_per_byte": 0.8567728314547438, "num_chars": 35}, {"sum_logits": -14.420501708984375, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.43985366821289, "logits_per_token": -2.0600716727120534, "logits_per_char": -0.4120143345424107, "bits_per_byte": 0.5944110372199293, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 529, "native_id": "Mercury_7032620", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.459209442138672, "logits_per_token_corr": -1.9098682403564453, "logits_per_char_corr": -0.47746706008911133, "bits_per_byte_corr": 0.6888393597788885}, "model_output": [{"sum_logits": -13.263071060180664, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.738821029663086, "logits_per_token": -2.210511843363444, "logits_per_char": -0.552627960840861, "bits_per_byte": 0.7972736185622632, "num_chars": 24}, {"sum_logits": -11.459209442138672, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -24.12262535095215, "logits_per_token": -1.9098682403564453, "logits_per_char": -0.47746706008911133, "bits_per_byte": 0.6888393597788885, "num_chars": 24}, {"sum_logits": -9.783382415771484, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -28.554813385009766, "logits_per_token": -1.3976260593959264, "logits_per_char": -0.2795252118791853, "bits_per_byte": 0.4032696369818271, "num_chars": 35}, {"sum_logits": -14.902545928955078, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.94302749633789, "logits_per_token": -2.1289351327078685, "logits_per_char": -0.4383101743810317, "bits_per_byte": 0.6323479149511455, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 530, "native_id": "NYSEDREGENTS_2008_8_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.975009441375732, "logits_per_token_corr": -1.9916698137919109, "logits_per_char_corr": -0.31447418112503855, "bits_per_byte_corr": 0.45369034159703675}, "model_output": [{"sum_logits": -9.804373741149902, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -21.729238510131836, "logits_per_token": -3.268124580383301, "logits_per_char": -0.5446874300638834, "bits_per_byte": 0.7858178541882832, "num_chars": 18}, {"sum_logits": -7.077563285827637, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -20.655559539794922, "logits_per_token": -3.5387816429138184, "logits_per_char": -0.4718375523885091, "bits_per_byte": 0.6807176969365776, "num_chars": 15}, {"sum_logits": -12.342267990112305, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -23.08863639831543, "logits_per_token": -6.171133995056152, "logits_per_char": -0.6856815550062392, "bits_per_byte": 0.989229379037245, "num_chars": 18}, {"sum_logits": -5.975009441375732, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -23.422298431396484, "logits_per_token": -1.9916698137919109, "logits_per_char": -0.31447418112503855, "bits_per_byte": 0.45369034159703675, "num_chars": 19}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 531, "native_id": "TAKS_2009_8_27", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.940444946289062, "logits_per_token_corr": -2.4491313587535513, "logits_per_char_corr": -0.4416466384637551, "bits_per_byte_corr": 0.6371614151373982}, "model_output": [{"sum_logits": -36.68482208251953, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -46.012840270996094, "logits_per_token": -4.0760913425021705, "logits_per_char": -0.7054773477407602, "bits_per_byte": 1.0177886710458248, "num_chars": 52}, {"sum_logits": -27.256837844848633, "num_tokens": 12, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -38.64753341674805, "logits_per_token": -2.271403153737386, "logits_per_char": -0.39502663543258887, "bits_per_byte": 0.5699029679580578, "num_chars": 69}, {"sum_logits": -26.940444946289062, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -39.88473892211914, "logits_per_token": -2.4491313587535513, "logits_per_char": -0.4416466384637551, "bits_per_byte": 0.6371614151373982, "num_chars": 61}, {"sum_logits": -31.095054626464844, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -44.26036071777344, "logits_per_token": -2.8268231478604404, "logits_per_char": -0.47838545579176683, "bits_per_byte": 0.6901643247046843, "num_chars": 65}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 532, "native_id": "NCEOGA_2013_8_57", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.375226974487305, "logits_per_token_corr": -1.8916817982991536, "logits_per_char_corr": -0.394100374645657, "bits_per_byte_corr": 0.5685666561141804}, "model_output": [{"sum_logits": -36.93680953979492, "num_tokens": 18, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -55.84124755859375, "logits_per_token": -2.052044974433051, "logits_per_char": -0.4860106518394069, "bits_per_byte": 0.7011651572284286, "num_chars": 76}, {"sum_logits": -29.782203674316406, "num_tokens": 12, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -47.89137649536133, "logits_per_token": -2.4818503061930337, "logits_per_char": -0.4963700612386068, "bits_per_byte": 0.7161106257952036, "num_chars": 60}, {"sum_logits": -28.375226974487305, "num_tokens": 15, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -48.264408111572266, "logits_per_token": -1.8916817982991536, "logits_per_char": -0.394100374645657, "bits_per_byte": 0.5685666561141804, "num_chars": 72}, {"sum_logits": -37.14771270751953, "num_tokens": 16, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -58.47911071777344, "logits_per_token": -2.3217320442199707, "logits_per_char": -0.5306816101074219, "bits_per_byte": 0.765611727193498, "num_chars": 70}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 533, "native_id": "Mercury_SC_413143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.266602516174316, "logits_per_token_corr": -1.566650629043579, "logits_per_char_corr": -0.5222168763478597, "bits_per_byte_corr": 0.7533996977761235}, "model_output": [{"sum_logits": -13.923795700073242, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.53705883026123, "logits_per_token": -6.961897850036621, "logits_per_char": -1.3923795700073243, "bits_per_byte": 2.0087791006861173, "num_chars": 10}, {"sum_logits": -6.318690776824951, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.6222562789917, "logits_per_token": -3.1593453884124756, "logits_per_char": -0.7898363471031189, "bits_per_byte": 1.1394929810803422, "num_chars": 8}, {"sum_logits": -6.266602516174316, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -16.847557067871094, "logits_per_token": -1.566650629043579, "logits_per_char": -0.5222168763478597, "bits_per_byte": 0.7533996977761235, "num_chars": 12}, {"sum_logits": -9.184432983398438, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.550552368164062, "logits_per_token": -4.592216491699219, "logits_per_char": -0.6560309273856026, "bits_per_byte": 0.9464525656096766, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 534, "native_id": "Mercury_401195", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.720895767211914, "logits_per_token_corr": -5.860447883605957, "logits_per_char_corr": -0.8372068405151367, "bits_per_byte_corr": 1.2078341570103728}, "model_output": [{"sum_logits": -6.043584823608398, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.727197647094727, "logits_per_token": -3.021792411804199, "logits_per_char": -0.549416802146218, "bits_per_byte": 0.792640895837991, "num_chars": 11}, {"sum_logits": -5.5833964347839355, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.11473274230957, "logits_per_token": -2.7916982173919678, "logits_per_char": -0.4294920334449181, "bits_per_byte": 0.6196260267527454, "num_chars": 13}, {"sum_logits": -6.947482109069824, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.629657745361328, "logits_per_token": -3.473741054534912, "logits_per_char": -0.5344217006976788, "bits_per_byte": 0.771007537340541, "num_chars": 13}, {"sum_logits": -11.720895767211914, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.860658645629883, "logits_per_token": -5.860447883605957, "logits_per_char": -0.8372068405151367, "bits_per_byte": 1.2078341570103728, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 535, "native_id": "CSZ10358", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.148427963256836, "logits_per_token_corr": -3.148427963256836, "logits_per_char_corr": -0.3935534954071045, "bits_per_byte_corr": 0.567777676148755}, "model_output": [{"sum_logits": -3.148427963256836, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.910344123840332, "logits_per_token": -3.148427963256836, "logits_per_char": -0.3935534954071045, "bits_per_byte": 0.567777676148755, "num_chars": 8}, {"sum_logits": -7.334392070770264, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -16.18487548828125, "logits_per_token": -3.667196035385132, "logits_per_char": -0.6111993392308553, "bits_per_byte": 0.8817742557035998, "num_chars": 12}, {"sum_logits": -7.136323928833008, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -21.369840621948242, "logits_per_token": -3.568161964416504, "logits_per_char": -0.4757549285888672, "bits_per_byte": 0.6863692761541348, "num_chars": 15}, {"sum_logits": -7.39674186706543, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -7.39674186706543, "logits_per_char": -0.8218602074517144, "bits_per_byte": 1.1856936455954148, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 536, "native_id": "MCAS_1999_4_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.017229080200195, "logits_per_token_corr": -1.803445816040039, "logits_per_char_corr": -0.3757178783416748, "bits_per_byte_corr": 0.5420463198572465}, "model_output": [{"sum_logits": -10.298117637634277, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -31.971179962158203, "logits_per_token": -2.0596235275268553, "logits_per_char": -0.39608144760131836, "bits_per_byte": 0.5714247402429543, "num_chars": 26}, {"sum_logits": -9.935855865478516, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.70254135131836, "logits_per_token": -1.9871711730957031, "logits_per_char": -0.3821483025184044, "bits_per_byte": 0.5513234609278336, "num_chars": 26}, {"sum_logits": -9.017229080200195, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -29.506301879882812, "logits_per_token": -1.803445816040039, "logits_per_char": -0.3757178783416748, "bits_per_byte": 0.5420463198572465, "num_chars": 24}, {"sum_logits": -7.77197265625, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -25.85906982421875, "logits_per_token": -1.55439453125, "logits_per_char": -0.3238321940104167, "bits_per_byte": 0.46719110037935646, "num_chars": 24}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 537, "native_id": "AKDE&ED_2008_8_36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.515745162963867, "logits_per_token_corr": -3.7031490325927736, "logits_per_char_corr": -0.5972821020310924, "bits_per_byte_corr": 0.861695926612612}, "model_output": [{"sum_logits": -18.515745162963867, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -31.63416862487793, "logits_per_token": -3.7031490325927736, "logits_per_char": -0.5972821020310924, "bits_per_byte": 0.861695926612612, "num_chars": 31}, {"sum_logits": -17.000328063964844, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -36.449562072753906, "logits_per_token": -3.400065612792969, "logits_per_char": -0.5483976794827369, "bits_per_byte": 0.7911706126253282, "num_chars": 31}, {"sum_logits": -16.50990867614746, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -32.584957122802734, "logits_per_token": -3.301981735229492, "logits_per_char": -0.5325776992305633, "bits_per_byte": 0.7683472055685396, "num_chars": 31}, {"sum_logits": -20.45504379272461, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -37.62916564941406, "logits_per_token": -4.0910087585449215, "logits_per_char": -0.6818347930908203, "bits_per_byte": 0.9836796746983856, "num_chars": 30}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 538, "native_id": "Mercury_7017938", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.541263580322266, "logits_per_token_corr": -4.270631790161133, "logits_per_char_corr": -0.5024272694307215, "bits_per_byte_corr": 0.7248493300156057}, "model_output": [{"sum_logits": -8.060830116271973, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -19.738780975341797, "logits_per_token": -4.030415058135986, "logits_per_char": -0.4242542166458933, "bits_per_byte": 0.6120694544317019, "num_chars": 19}, {"sum_logits": -8.541263580322266, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -22.96305274963379, "logits_per_token": -4.270631790161133, "logits_per_char": -0.5024272694307215, "bits_per_byte": 0.7248493300156057, "num_chars": 17}, {"sum_logits": -8.914379119873047, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -20.853195190429688, "logits_per_token": -4.457189559936523, "logits_per_char": -0.42449424380347844, "bits_per_byte": 0.6124157404216289, "num_chars": 21}, {"sum_logits": -18.90494155883789, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -27.367238998413086, "logits_per_token": -6.301647186279297, "logits_per_char": -0.8219539808190387, "bits_per_byte": 1.1858289319674211, "num_chars": 23}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 539, "native_id": "MDSA_2013_8_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.337413787841797, "logits_per_token_corr": -2.722902297973633, "logits_per_char_corr": -0.4805121702306411, "bits_per_byte_corr": 0.6932325250790373}, "model_output": [{"sum_logits": -7.335134029388428, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -19.364734649658203, "logits_per_token": -2.445044676462809, "logits_per_char": -0.43147847231696634, "bits_per_byte": 0.6224918522624805, "num_chars": 17}, {"sum_logits": -7.08732795715332, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -26.343456268310547, "logits_per_token": -2.36244265238444, "logits_per_char": -0.37301726090280635, "bits_per_byte": 0.53815015247085, "num_chars": 19}, {"sum_logits": -11.283695220947266, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -28.2392520904541, "logits_per_token": -2.256739044189453, "logits_per_char": -0.4029891150338309, "bits_per_byte": 0.5813903977919576, "num_chars": 28}, {"sum_logits": -16.337413787841797, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -36.647926330566406, "logits_per_token": -2.722902297973633, "logits_per_char": -0.4805121702306411, "bits_per_byte": 0.6932325250790373, "num_chars": 34}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 540, "native_id": "Mercury_7038028", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.248302459716797, "logits_per_token_corr": -3.4060378074645996, "logits_per_char_corr": -0.6192796013572, "bits_per_byte_corr": 0.8934316098023685}, "model_output": [{"sum_logits": -17.916967391967773, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -24.358861923217773, "logits_per_token": -2.986161231994629, "logits_per_char": -0.8958483695983886, "bits_per_byte": 1.292436000208987, "num_chars": 20}, {"sum_logits": -15.780021667480469, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -32.65608215332031, "logits_per_token": -2.6300036112467446, "logits_per_char": -0.584445246943721, "bits_per_byte": 0.8431762594374377, "num_chars": 27}, {"sum_logits": -27.248302459716797, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -38.25864028930664, "logits_per_token": -3.4060378074645996, "logits_per_char": -0.6192796013572, "bits_per_byte": 0.8934316098023685, "num_chars": 44}, {"sum_logits": -23.7132568359375, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.18541717529297, "logits_per_token": -2.9641571044921875, "logits_per_char": -0.5646013532366071, "bits_per_byte": 0.8145475723942361, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 541, "native_id": "Mercury_7057103", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -25.416229248046875, "logits_per_token_corr": -3.630889892578125, "logits_per_char_corr": -0.9775472787710336, "bits_per_byte_corr": 1.4103026113184844}, "model_output": [{"sum_logits": -27.380781173706055, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -44.00735855102539, "logits_per_token": -3.9115401676722934, "logits_per_char": -0.9778850419180733, "bits_per_byte": 1.410789900535714, "num_chars": 28}, {"sum_logits": -28.261531829833984, "num_tokens": 5, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -42.03481674194336, "logits_per_token": -5.652306365966797, "logits_per_char": -1.1304612731933594, "bits_per_byte": 1.630910872754255, "num_chars": 25}, {"sum_logits": -25.416229248046875, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -45.04119873046875, "logits_per_token": -3.630889892578125, "logits_per_char": -0.9775472787710336, "bits_per_byte": 1.4103026113184844, "num_chars": 26}, {"sum_logits": -36.837493896484375, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -49.42301559448242, "logits_per_token": -6.1395823160807295, "logits_per_char": -1.0524998256138394, "bits_per_byte": 1.5184362789506758, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 542, "native_id": "NYSEDREGENTS_2008_4_26", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -30.521167755126953, "logits_per_token_corr": -2.77465161410245, "logits_per_char_corr": -0.6104233551025391, "bits_per_byte_corr": 0.8806547472498686}, "model_output": [{"sum_logits": -30.742233276367188, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -36.910484313964844, "logits_per_token": -4.391747610909598, "logits_per_char": -0.853950924343533, "bits_per_byte": 1.2319907637138465, "num_chars": 36}, {"sum_logits": -36.731231689453125, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -43.17802429199219, "logits_per_token": -3.6731231689453123, "logits_per_char": -0.8542146904523983, "bits_per_byte": 1.2323712977710612, "num_chars": 43}, {"sum_logits": -30.521167755126953, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -42.239322662353516, "logits_per_token": -2.77465161410245, "logits_per_char": -0.6104233551025391, "bits_per_byte": 0.8806547472498686, "num_chars": 50}, {"sum_logits": -36.070308685302734, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -42.01304626464844, "logits_per_token": -3.2791189713911577, "logits_per_char": -0.7361287486796476, "bits_per_byte": 1.0620092951766886, "num_chars": 49}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 543, "native_id": "Mercury_417117", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.0876150131225586, "logits_per_token_corr": -2.0876150131225586, "logits_per_char_corr": -0.2609518766403198, "bits_per_byte_corr": 0.3764739783399284}, "model_output": [{"sum_logits": -10.15984058380127, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "sum_logits_uncond": -13.000348091125488, "logits_per_token": -10.15984058380127, "logits_per_char": -1.1288711759779189, "bits_per_byte": 1.628616847387006, "num_chars": 9}, {"sum_logits": -5.410888671875, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "sum_logits_uncond": -13.852234840393066, "logits_per_token": -5.410888671875, "logits_per_char": -0.49189897017045453, "bits_per_byte": 0.7096602048838128, "num_chars": 11}, {"sum_logits": -2.0876150131225586, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "sum_logits_uncond": -14.330708503723145, "logits_per_token": -2.0876150131225586, "logits_per_char": -0.2609518766403198, "bits_per_byte": 0.3764739783399284, "num_chars": 8}, {"sum_logits": -8.98367691040039, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "sum_logits_uncond": -13.153999328613281, "logits_per_token": -8.98367691040039, "logits_per_char": -0.8983676910400391, "bits_per_byte": 1.296070612759264, "num_chars": 10}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 544, "native_id": "MCAS_2016_8_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -75.92825317382812, "logits_per_token_corr": -2.92031742976262, "logits_per_char_corr": -0.5200565285878639, "bits_per_byte_corr": 0.7502829747761797}, "model_output": [{"sum_logits": -67.05764770507812, "num_tokens": 21, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -88.51383972167969, "logits_per_token": -3.1932213192894343, "logits_per_char": -0.558813730875651, "bits_per_byte": 0.8061977983155407, "num_chars": 120}, {"sum_logits": -76.57505798339844, "num_tokens": 26, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -103.29387664794922, "logits_per_token": -2.9451945378230167, "logits_per_char": -0.5281038481613686, "bits_per_byte": 0.761892802817332, "num_chars": 145}, {"sum_logits": -75.92825317382812, "num_tokens": 26, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -102.12869262695312, "logits_per_token": -2.92031742976262, "logits_per_char": -0.5200565285878639, "bits_per_byte": 0.7502829747761797, "num_chars": 146}, {"sum_logits": -92.68731689453125, "num_tokens": 27, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -117.06954956054688, "logits_per_token": -3.4328635886863426, "logits_per_char": -0.6018656941203328, "bits_per_byte": 0.8683086521892216, "num_chars": 154}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 545, "native_id": "Mercury_400780", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.65597152709961, "logits_per_token_corr": -4.109328587849935, "logits_per_char_corr": -1.7611408233642578, "bits_per_byte_corr": 2.540789132176546}, "model_output": [{"sum_logits": -20.245159149169922, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -33.183189392089844, "logits_per_token": -3.3741931915283203, "logits_per_char": -1.5573199345515325, "bits_per_byte": 2.246737746656635, "num_chars": 13}, {"sum_logits": -21.25250244140625, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -33.95903778076172, "logits_per_token": -3.542083740234375, "logits_per_char": -1.6348078801081731, "bits_per_byte": 2.3585292214399547, "num_chars": 13}, {"sum_logits": -24.65597152709961, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -38.03765106201172, "logits_per_token": -4.109328587849935, "logits_per_char": -1.7611408233642578, "bits_per_byte": 2.540789132176546, "num_chars": 14}, {"sum_logits": -22.909500122070312, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -38.48344802856445, "logits_per_token": -3.8182500203450522, "logits_per_char": -1.6363928658621651, "bits_per_byte": 2.3608158725271204, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 546, "native_id": "NYSEDREGENTS_2008_8_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.147699356079102, "logits_per_token_corr": -2.024616559346517, "logits_per_char_corr": -0.44991479096589265, "bits_per_byte_corr": 0.6490898377495542}, "model_output": [{"sum_logits": -12.147699356079102, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.340953826904297, "logits_per_token": -2.024616559346517, "logits_per_char": -0.44991479096589265, "bits_per_byte": 0.6490898377495542, "num_chars": 27}, {"sum_logits": -11.407546997070312, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -21.114032745361328, "logits_per_token": -3.802515665690104, "logits_per_char": -0.5703773498535156, "bits_per_byte": 0.8228805740696475, "num_chars": 20}, {"sum_logits": -16.527385711669922, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.138824462890625, "logits_per_token": -4.1318464279174805, "logits_per_char": -0.9181880950927734, "bits_per_byte": 1.32466541139458, "num_chars": 18}, {"sum_logits": -13.513901710510254, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -22.704029083251953, "logits_per_token": -2.702780342102051, "logits_per_char": -0.5875609439352284, "bits_per_byte": 0.8476712600360012, "num_chars": 23}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 547, "native_id": "Mercury_SC_416104", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.717099666595459, "logits_per_token_corr": -1.3585498332977295, "logits_per_char_corr": -0.1698187291622162, "bits_per_byte_corr": 0.2449966384125713}, "model_output": [{"sum_logits": -4.613987922668457, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -19.374496459960938, "logits_per_token": -2.3069939613342285, "logits_per_char": -0.3549221478975736, "bits_per_byte": 0.5120444226738565, "num_chars": 13}, {"sum_logits": -6.9162821769714355, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -18.50987434387207, "logits_per_token": -3.4581410884857178, "logits_per_char": -0.6916282176971436, "bits_per_byte": 0.9978085998112582, "num_chars": 10}, {"sum_logits": -2.717099666595459, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.703508377075195, "logits_per_token": -1.3585498332977295, "logits_per_char": -0.1698187291622162, "bits_per_byte": 0.2449966384125713, "num_chars": 16}, {"sum_logits": -9.883591651916504, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -27.394987106323242, "logits_per_token": -2.470897912979126, "logits_per_char": -0.4941795825958252, "bits_per_byte": 0.7129504331200871, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 548, "native_id": "Mercury_416646", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.8900671005249023, "logits_per_token_corr": -1.9450335502624512, "logits_per_char_corr": -0.3890067100524902, "bits_per_byte_corr": 0.5612180514656617}, "model_output": [{"sum_logits": -10.786330223083496, "num_tokens": 2, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -22.055625915527344, "logits_per_token": -5.393165111541748, "logits_per_char": -1.348291277885437, "bits_per_byte": 1.9451731402805608, "num_chars": 8}, {"sum_logits": -3.8900671005249023, "num_tokens": 2, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -17.61030387878418, "logits_per_token": -1.9450335502624512, "logits_per_char": -0.3890067100524902, "bits_per_byte": 0.5612180514656617, "num_chars": 10}, {"sum_logits": -4.304821491241455, "num_tokens": 2, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -17.771047592163086, "logits_per_token": -2.1524107456207275, "logits_per_char": -0.3587351242701213, "bits_per_byte": 0.5175453847775618, "num_chars": 12}, {"sum_logits": -8.283803939819336, "num_tokens": 3, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -25.5430908203125, "logits_per_token": -2.761267979939779, "logits_per_char": -0.5177377462387085, "bits_per_byte": 0.74693767898015, "num_chars": 16}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 549, "native_id": "Mercury_SC_405296", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.91859245300293, "logits_per_token_corr": -2.153098742167155, "logits_per_char_corr": -0.36910264151436945, "bits_per_byte_corr": 0.5325025504921802}, "model_output": [{"sum_logits": -13.341789245605469, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.351238250732422, "logits_per_token": -4.447263081868489, "logits_per_char": -0.7848111320944393, "bits_per_byte": 1.1322431283079142, "num_chars": 17}, {"sum_logits": -11.346261978149414, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.100330352783203, "logits_per_token": -2.8365654945373535, "logits_per_char": -0.4727609157562256, "bits_per_byte": 0.6820498286881217, "num_chars": 24}, {"sum_logits": -12.91859245300293, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.6616153717041, "logits_per_token": -2.153098742167155, "logits_per_char": -0.36910264151436945, "bits_per_byte": 0.5325025504921802, "num_chars": 35}, {"sum_logits": -17.593088150024414, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.301633834838867, "logits_per_token": -5.864362716674805, "logits_per_char": -0.7330453395843506, "bits_per_byte": 1.0575608761658686, "num_chars": 24}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 550, "native_id": "MCAS_2006_8_31", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.6635594367980957, "logits_per_token_corr": -2.6635594367980957, "logits_per_char_corr": -0.4439265727996826, "bits_per_byte_corr": 0.6404506650973957}, "model_output": [{"sum_logits": -4.054145336151123, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -2.0270726680755615, "logits_per_char": -0.2702763557434082, "bits_per_byte": 0.3899263581008364, "num_chars": 15}, {"sum_logits": -6.919743061065674, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -13.882975578308105, "logits_per_token": -6.919743061065674, "logits_per_char": -0.7688603401184082, "bits_per_byte": 1.109230999825826, "num_chars": 9}, {"sum_logits": -11.784852981567383, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -15.320372581481934, "logits_per_token": -5.892426490783691, "logits_per_char": -0.7365533113479614, "bits_per_byte": 1.062621809632812, "num_chars": 16}, {"sum_logits": -2.6635594367980957, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -12.671343803405762, "logits_per_token": -2.6635594367980957, "logits_per_char": -0.4439265727996826, "bits_per_byte": 0.6404506650973957, "num_chars": 6}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 551, "native_id": "MCAS_2015_5_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.761573791503906, "logits_per_token_corr": -2.952314758300781, "logits_per_char_corr": -0.6709806268865411, "bits_per_byte_corr": 0.9680204229424763}, "model_output": [{"sum_logits": -14.761573791503906, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.03782272338867, "logits_per_token": -2.952314758300781, "logits_per_char": -0.6709806268865411, "bits_per_byte": 0.9680204229424763, "num_chars": 22}, {"sum_logits": -13.535222053527832, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.377010345458984, "logits_per_token": -2.7070444107055662, "logits_per_char": -0.6445343835013253, "bits_per_byte": 0.9298665587604554, "num_chars": 21}, {"sum_logits": -18.35359001159668, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.007076263427734, "logits_per_token": -3.0589316685994468, "logits_per_char": -0.6797625930220993, "bits_per_byte": 0.9806901219355099, "num_chars": 27}, {"sum_logits": -13.487253189086914, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.582231521606445, "logits_per_token": -2.247875531514486, "logits_per_char": -0.44957510630289715, "bits_per_byte": 0.6485997763707843, "num_chars": 30}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 552, "native_id": "Mercury_417465", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.421131134033203, "logits_per_token_corr": -4.140377044677734, "logits_per_char_corr": -0.8280754089355469, "bits_per_byte_corr": 1.194660285954272}, "model_output": [{"sum_logits": -5.494137287139893, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -16.19488525390625, "logits_per_token": -2.7470686435699463, "logits_per_char": -0.4994670261036266, "bits_per_byte": 0.7205786016477782, "num_chars": 11}, {"sum_logits": -10.496221542358398, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -19.531801223754883, "logits_per_token": -5.248110771179199, "logits_per_char": -0.8746851285298666, "bits_per_byte": 1.2619038972702707, "num_chars": 12}, {"sum_logits": -12.421131134033203, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -24.27530860900879, "logits_per_token": -4.140377044677734, "logits_per_char": -0.8280754089355469, "bits_per_byte": 1.194660285954272, "num_chars": 15}, {"sum_logits": -10.61501693725586, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -22.39946174621582, "logits_per_token": -3.5383389790852866, "logits_per_char": -0.6244127610150505, "bits_per_byte": 0.9008371937848461, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 553, "native_id": "MCAS_1998_4_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.3268537521362305, "logits_per_token_corr": -2.44228458404541, "logits_per_char_corr": -0.8140948613484701, "bits_per_byte_corr": 1.17449061928147}, "model_output": [{"sum_logits": -8.394790649414062, "num_tokens": 2, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -13.949690818786621, "logits_per_token": -4.197395324707031, "logits_per_char": -1.1992558070591517, "bits_per_byte": 1.7301604056027726, "num_chars": 7}, {"sum_logits": -10.562026977539062, "num_tokens": 3, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -18.007667541503906, "logits_per_token": -3.5206756591796875, "logits_per_char": -1.0562026977539063, "bits_per_byte": 1.5237783942242, "num_chars": 10}, {"sum_logits": -10.998472213745117, "num_tokens": 3, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -19.19963836669922, "logits_per_token": -3.6661574045817056, "logits_per_char": -0.9165393511454264, "bits_per_byte": 1.3222867766780448, "num_chars": 12}, {"sum_logits": -7.3268537521362305, "num_tokens": 3, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -17.141376495361328, "logits_per_token": -2.44228458404541, "logits_per_char": -0.8140948613484701, "bits_per_byte": 1.17449061928147, "num_chars": 9}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 554, "native_id": "Mercury_7214778", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.024310111999512, "logits_per_token_corr": -2.3373850186665854, "logits_per_char_corr": -0.5008682182856968, "bits_per_byte_corr": 0.7226000946601848}, "model_output": [{"sum_logits": -12.171801567077637, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -34.26104736328125, "logits_per_token": -2.0286335945129395, "logits_per_char": -0.45080746544731987, "bits_per_byte": 0.6503776947970383, "num_chars": 27}, {"sum_logits": -8.419560432434082, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -23.21297836303711, "logits_per_token": -2.1048901081085205, "logits_per_char": -0.4952682607314166, "bits_per_byte": 0.7145210636674302, "num_chars": 17}, {"sum_logits": -10.794661521911621, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -21.510509490966797, "logits_per_token": -2.6986653804779053, "logits_per_char": -0.5140315010434106, "bits_per_byte": 0.7415906974165712, "num_chars": 21}, {"sum_logits": -14.024310111999512, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -39.899085998535156, "logits_per_token": -2.3373850186665854, "logits_per_char": -0.5008682182856968, "bits_per_byte": 0.7226000946601848, "num_chars": 28}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 555, "native_id": "Mercury_7123393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.174272537231445, "logits_per_token_corr": -2.8623787562052407, "logits_per_char_corr": -0.8178225017729259, "bits_per_byte_corr": 1.1798684676360534}, "model_output": [{"sum_logits": -11.83754825592041, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.78882598876953, "logits_per_token": -3.9458494186401367, "logits_per_char": -0.6963263679953182, "bits_per_byte": 1.0045865979477908, "num_chars": 17}, {"sum_logits": -14.148137092590332, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.866025924682617, "logits_per_token": -4.71604569753011, "logits_per_char": -0.7074068546295166, "bits_per_byte": 1.0205723610655966, "num_chars": 20}, {"sum_logits": -14.82007122039795, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.818653106689453, "logits_per_token": -2.4700118700663247, "logits_per_char": -0.7410035610198975, "bits_per_byte": 1.0690421627652367, "num_chars": 20}, {"sum_logits": -17.174272537231445, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.635303497314453, "logits_per_token": -2.8623787562052407, "logits_per_char": -0.8178225017729259, "bits_per_byte": 1.1798684676360534, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 556, "native_id": "Mercury_7207550", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.69014549255371, "logits_per_token_corr": -3.448357582092285, "logits_per_char_corr": -0.6269741058349609, "bits_per_byte_corr": 0.9045324332545401}, "model_output": [{"sum_logits": -17.391902923583984, "num_tokens": 8, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -29.377155303955078, "logits_per_token": -2.173987865447998, "logits_per_char": -0.42419275423375574, "bits_per_byte": 0.61198078291451, "num_chars": 41}, {"sum_logits": -39.07144546508789, "num_tokens": 9, "num_tokens_all": 240, "is_greedy": false, "sum_logits_uncond": -58.36726379394531, "logits_per_token": -4.341271718343099, "logits_per_char": -0.7235452863905165, "bits_per_byte": 1.043855196534933, "num_chars": 54}, {"sum_logits": -21.87865447998047, "num_tokens": 8, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -35.100284576416016, "logits_per_token": -2.7348318099975586, "logits_per_char": -0.5609911405123197, "bits_per_byte": 0.8093391364003487, "num_chars": 39}, {"sum_logits": -20.69014549255371, "num_tokens": 6, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -40.951210021972656, "logits_per_token": -3.448357582092285, "logits_per_char": -0.6269741058349609, "bits_per_byte": 0.9045324332545401, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 557, "native_id": "Mercury_SC_405827", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.511602401733398, "logits_per_token_corr": -1.7235113779703777, "logits_per_char_corr": -0.38779006004333494, "bits_per_byte_corr": 0.5594627965309547}, "model_output": [{"sum_logits": -17.66669273376465, "num_tokens": 9, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -38.9062385559082, "logits_per_token": -1.9629658593071833, "logits_per_char": -0.430894944725967, "bits_per_byte": 0.6216499999007232, "num_chars": 41}, {"sum_logits": -15.511602401733398, "num_tokens": 9, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -33.607872009277344, "logits_per_token": -1.7235113779703777, "logits_per_char": -0.38779006004333494, "bits_per_byte": 0.5594627965309547, "num_chars": 40}, {"sum_logits": -19.511383056640625, "num_tokens": 9, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -33.109779357910156, "logits_per_token": -2.1679314507378473, "logits_per_char": -0.5419828626844618, "bits_per_byte": 0.7819159882422388, "num_chars": 36}, {"sum_logits": -4.1095051765441895, "num_tokens": 5, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -23.207988739013672, "logits_per_token": -0.8219010353088378, "logits_per_char": -0.19569072269258045, "bits_per_byte": 0.28232203517676596, "num_chars": 21}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 558, "native_id": "NYSEDREGENTS_2015_4_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.697937965393066, "logits_per_token_corr": -4.5659793217976885, "logits_per_char_corr": -0.6848968982696533, "bits_per_byte_corr": 0.9880973586545715}, "model_output": [{"sum_logits": -13.470911026000977, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.800521850585938, "logits_per_token": -4.490303675333659, "logits_per_char": -0.6735455513000488, "bits_per_byte": 0.9717208266741014, "num_chars": 20}, {"sum_logits": -14.812605857849121, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.00857162475586, "logits_per_token": -4.937535285949707, "logits_per_char": -0.7406302928924561, "bits_per_byte": 1.0685036506888546, "num_chars": 20}, {"sum_logits": -18.22766876220703, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -29.051746368408203, "logits_per_token": -6.075889587402344, "logits_per_char": -0.9593509874845806, "bits_per_byte": 1.3840509121169287, "num_chars": 19}, {"sum_logits": -13.697937965393066, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.948209762573242, "logits_per_token": -4.5659793217976885, "logits_per_char": -0.6848968982696533, "bits_per_byte": 0.9880973586545715, "num_chars": 20}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 559, "native_id": "Mercury_404097", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.995342254638672, "logits_per_token_corr": -2.998835563659668, "logits_per_char_corr": -0.5997671127319336, "bits_per_byte_corr": 0.8652810392272742}, "model_output": [{"sum_logits": -14.061087608337402, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -25.511146545410156, "logits_per_token": -3.5152719020843506, "logits_per_char": -0.7811715337965224, "bits_per_byte": 1.1269922978926779, "num_chars": 18}, {"sum_logits": -11.081949234008789, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -20.8475284576416, "logits_per_token": -3.6939830780029297, "logits_per_char": -0.5277118682861328, "bits_per_byte": 0.7613272953952006, "num_chars": 21}, {"sum_logits": -11.995342254638672, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -24.00865936279297, "logits_per_token": -2.998835563659668, "logits_per_char": -0.5997671127319336, "bits_per_byte": 0.8652810392272742, "num_chars": 20}, {"sum_logits": -5.146402835845947, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -18.361961364746094, "logits_per_token": -1.7154676119486492, "logits_per_char": -0.3216501772403717, "bits_per_byte": 0.46404311560607375, "num_chars": 16}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 560, "native_id": "AIMS_2009_4_4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.192502975463867, "logits_per_token_corr": -2.6490628719329834, "logits_per_char_corr": -0.3210985299312707, "bits_per_byte_corr": 0.46324725676891343}, "model_output": [{"sum_logits": -33.447757720947266, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -37.07878112792969, "logits_per_token": -4.778251102992466, "logits_per_char": -0.6432261100182166, "bits_per_byte": 0.9279791190942466, "num_chars": 52}, {"sum_logits": -43.31001281738281, "num_tokens": 12, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -53.285247802734375, "logits_per_token": -3.609167734781901, "logits_per_char": -0.7100002101210298, "bits_per_byte": 1.0243137821724675, "num_chars": 61}, {"sum_logits": -21.192502975463867, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -34.96044158935547, "logits_per_token": -2.6490628719329834, "logits_per_char": -0.3210985299312707, "bits_per_byte": 0.46324725676891343, "num_chars": 66}, {"sum_logits": -39.356971740722656, "num_tokens": 15, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -50.70457458496094, "logits_per_token": -2.623798116048177, "logits_per_char": -0.5391365991879816, "bits_per_byte": 0.7778096980108006, "num_chars": 73}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 561, "native_id": "NCEOGA_2013_8_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.1093692779541, "logits_per_token_corr": -3.42187385559082, "logits_per_char_corr": -0.48883912222726006, "bits_per_byte_corr": 0.7052457774302886}, "model_output": [{"sum_logits": -17.1093692779541, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -26.374095916748047, "logits_per_token": -3.42187385559082, "logits_per_char": -0.48883912222726006, "bits_per_byte": 0.7052457774302886, "num_chars": 35}, {"sum_logits": -15.995522499084473, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -22.58712387084961, "logits_per_token": -3.1991044998168947, "logits_per_char": -0.6398208999633789, "bits_per_byte": 0.9230664394349434, "num_chars": 25}, {"sum_logits": -20.887577056884766, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -31.613529205322266, "logits_per_token": -3.4812628428141275, "logits_per_char": -0.5967879159109933, "bits_per_byte": 0.8609829667478683, "num_chars": 35}, {"sum_logits": -17.73956298828125, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.90629005432129, "logits_per_token": -2.9565938313802085, "logits_per_char": -0.5543613433837891, "bits_per_byte": 0.7997743609609108, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 562, "native_id": "Mercury_400884", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.0870602130889893, "logits_per_token_corr": -3.0870602130889893, "logits_per_char_corr": -1.5435301065444946, "bits_per_byte_corr": 2.2268432301761556}, "model_output": [{"sum_logits": -3.4394428730010986, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -6.575865745544434, "logits_per_token": -3.4394428730010986, "logits_per_char": -1.7197214365005493, "bits_per_byte": 2.4810335881515693, "num_chars": 2}, {"sum_logits": -3.0870602130889893, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -6.782100677490234, "logits_per_token": -3.0870602130889893, "logits_per_char": -1.5435301065444946, "bits_per_byte": 2.2268432301761556, "num_chars": 2}, {"sum_logits": -4.12345027923584, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -7.313617706298828, "logits_per_token": -4.12345027923584, "logits_per_char": -2.06172513961792, "bits_per_byte": 2.9744406346050156, "num_chars": 2}, {"sum_logits": -0.817086398601532, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": true, "sum_logits_uncond": -8.148248672485352, "logits_per_token": -0.817086398601532, "logits_per_char": -0.2723621328671773, "bits_per_byte": 0.39293549841369996, "num_chars": 3}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 563, "native_id": "Mercury_7219678", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.016769409179688, "logits_per_token_corr": -5.254192352294922, "logits_per_char_corr": -0.7505989074707031, "bits_per_byte_corr": 1.0828853215054353}, "model_output": [{"sum_logits": -21.016769409179688, "num_tokens": 4, "num_tokens_all": 246, "is_greedy": false, "sum_logits_uncond": -32.444549560546875, "logits_per_token": -5.254192352294922, "logits_per_char": -0.7505989074707031, "bits_per_byte": 1.0828853215054353, "num_chars": 28}, {"sum_logits": -18.07898712158203, "num_tokens": 6, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -30.66556167602539, "logits_per_token": -3.013164520263672, "logits_per_char": -0.516542489188058, "bits_per_byte": 0.7452132875605877, "num_chars": 35}, {"sum_logits": -16.18909454345703, "num_tokens": 5, "num_tokens_all": 247, "is_greedy": false, "sum_logits_uncond": -33.61231231689453, "logits_per_token": -3.2378189086914064, "logits_per_char": -0.46254555838448663, "bits_per_byte": 0.6673121832669948, "num_chars": 35}, {"sum_logits": -25.612964630126953, "num_tokens": 6, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -36.87199783325195, "logits_per_token": -4.268827438354492, "logits_per_char": -0.7761504433371804, "bits_per_byte": 1.119748395587125, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 564, "native_id": "ACTAAP_2010_5_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.8447821140289307, "logits_per_token_corr": -2.8447821140289307, "logits_per_char_corr": -0.4741303523381551, "bits_per_byte_corr": 0.6840255080536847}, "model_output": [{"sum_logits": -4.844881057739258, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -9.907538414001465, "logits_per_token": -4.844881057739258, "logits_per_char": -1.2112202644348145, "bits_per_byte": 1.7474214689255811, "num_chars": 4}, {"sum_logits": -2.8447821140289307, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -10.234457969665527, "logits_per_token": -2.8447821140289307, "logits_per_char": -0.4741303523381551, "bits_per_byte": 0.6840255080536847, "num_chars": 6}, {"sum_logits": -2.9379026889801025, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -11.345063209533691, "logits_per_token": -2.9379026889801025, "logits_per_char": -0.41970038414001465, "bits_per_byte": 0.605499662858427, "num_chars": 7}, {"sum_logits": -8.549909591674805, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -12.191210746765137, "logits_per_token": -8.549909591674805, "logits_per_char": -1.2214156559535436, "bits_per_byte": 1.7621303097095837, "num_chars": 7}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 565, "native_id": "ACTAAP_2012_7_9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.63022232055664, "logits_per_token_corr": -2.45377779006958, "logits_per_char_corr": -0.42674396349036176, "bits_per_byte_corr": 0.6156613998572881}, "model_output": [{"sum_logits": -23.07335662841797, "num_tokens": 10, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -24.793132781982422, "logits_per_token": -2.3073356628417967, "logits_per_char": -0.591624528933794, "bits_per_byte": 0.8535337739616669, "num_chars": 39}, {"sum_logits": -18.64964485168457, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -18.553598403930664, "logits_per_token": -2.3312056064605713, "logits_per_char": -0.45486938662645293, "bits_per_byte": 0.6562378083386596, "num_chars": 41}, {"sum_logits": -19.63022232055664, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -38.766197204589844, "logits_per_token": -2.45377779006958, "logits_per_char": -0.42674396349036176, "bits_per_byte": 0.6156613998572881, "num_chars": 46}, {"sum_logits": -28.21997833251953, "num_tokens": 13, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.79362106323242, "logits_per_token": -2.1707675640399637, "logits_per_char": -0.5759179251534599, "bits_per_byte": 0.8308739345785546, "num_chars": 49}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 566, "native_id": "MCAS_2005_8_6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.115251541137695, "logits_per_token_corr": -1.8550193493182843, "logits_per_char_corr": -0.39533199247766715, "bits_per_byte_corr": 0.5703435050526932}, "model_output": [{"sum_logits": -24.115251541137695, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -36.0908088684082, "logits_per_token": -1.8550193493182843, "logits_per_char": -0.39533199247766715, "bits_per_byte": 0.5703435050526932, "num_chars": 61}, {"sum_logits": -35.613319396972656, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -48.04572296142578, "logits_per_token": -2.739486107459435, "logits_per_char": -0.5395957484389796, "bits_per_byte": 0.7784721103582438, "num_chars": 66}, {"sum_logits": -39.713218688964844, "num_tokens": 11, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -48.867164611816406, "logits_per_token": -3.610292608087713, "logits_per_char": -0.5840179218965418, "bits_per_byte": 0.8425597597110241, "num_chars": 68}, {"sum_logits": -33.37615203857422, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -55.72490692138672, "logits_per_token": -2.5673963106595554, "logits_per_char": -0.4837123483851336, "bits_per_byte": 0.6978494062324881, "num_chars": 69}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 567, "native_id": "Mercury_SC_401162", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.9958276748657227, "logits_per_token_corr": -2.9958276748657227, "logits_per_char_corr": -0.37447845935821533, "bits_per_byte_corr": 0.5402582162362246}, "model_output": [{"sum_logits": -9.316339492797852, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.97453498840332, "logits_per_token": -4.658169746398926, "logits_per_char": -1.3309056418282645, "bits_per_byte": 1.9200909693581596, "num_chars": 7}, {"sum_logits": -4.7222490310668945, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.712793350219727, "logits_per_token": -4.7222490310668945, "logits_per_char": -0.7870415051778158, "bits_per_byte": 1.1354608764946361, "num_chars": 6}, {"sum_logits": -2.9958276748657227, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.256776809692383, "logits_per_token": -2.9958276748657227, "logits_per_char": -0.37447845935821533, "bits_per_byte": 0.5402582162362246, "num_chars": 8}, {"sum_logits": -8.789193153381348, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.08466911315918, "logits_per_token": -8.789193153381348, "logits_per_char": -0.7324327627817789, "bits_per_byte": 1.0566771146506342, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 568, "native_id": "Mercury_SC_407710", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.290449142456055, "logits_per_token_corr": -2.6452245712280273, "logits_per_char_corr": -0.40695762634277344, "bits_per_byte_corr": 0.5871157493770848}, "model_output": [{"sum_logits": -7.920370101928711, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -15.387356758117676, "logits_per_token": -3.9601850509643555, "logits_per_char": -0.9900462627410889, "bits_per_byte": 1.4283348335082469, "num_chars": 8}, {"sum_logits": -9.266522407531738, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -14.298053741455078, "logits_per_token": -4.633261203765869, "logits_per_char": -1.0296136008368597, "bits_per_byte": 1.4854184359602334, "num_chars": 9}, {"sum_logits": -9.571810722351074, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -14.280302047729492, "logits_per_token": -4.785905361175537, "logits_per_char": -0.870164611122825, "bits_per_byte": 1.2553821692248748, "num_chars": 11}, {"sum_logits": -5.290449142456055, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -15.8533353805542, "logits_per_token": -2.6452245712280273, "logits_per_char": -0.40695762634277344, "bits_per_byte": 0.5871157493770848, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 569, "native_id": "VASoL_2009_3_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.626676559448242, "logits_per_token_corr": -2.2711127599080405, "logits_per_char_corr": -0.45422255198160805, "bits_per_byte_corr": 0.6553046232042661}, "model_output": [{"sum_logits": -16.289140701293945, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.61859130859375, "logits_per_token": -2.3270201001848494, "logits_per_char": -0.5090356469154358, "bits_per_byte": 0.7343832034411322, "num_chars": 32}, {"sum_logits": -13.68663215637207, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -32.55393600463867, "logits_per_token": -2.281105359395345, "logits_per_char": -0.44150426310877644, "bits_per_byte": 0.6369560109188255, "num_chars": 31}, {"sum_logits": -15.515336990356445, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.296295166015625, "logits_per_token": -2.216476712908064, "logits_per_char": -0.4193334321717958, "bits_per_byte": 0.604970263073633, "num_chars": 37}, {"sum_logits": -13.626676559448242, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.662429809570312, "logits_per_token": -2.2711127599080405, "logits_per_char": -0.45422255198160805, "bits_per_byte": 0.6553046232042661, "num_chars": 30}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 570, "native_id": "Mercury_SC_402276", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.151206970214844, "logits_per_token_corr": -2.762600580851237, "logits_per_char_corr": -0.6375232109656701, "bits_per_byte_corr": 0.9197515749124414}, "model_output": [{"sum_logits": -26.213777542114258, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.995006561279297, "logits_per_token": -3.7448253631591797, "logits_per_char": -0.8456057271649761, "bits_per_byte": 1.2199511891290933, "num_chars": 31}, {"sum_logits": -23.914888381958008, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -30.01753044128418, "logits_per_token": -4.782977676391601, "logits_per_char": -0.6643024550543891, "bits_per_byte": 0.9583858575580192, "num_chars": 36}, {"sum_logits": -36.92998123168945, "num_tokens": 11, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -47.249656677246094, "logits_per_token": -3.3572710210626777, "logits_per_char": -0.7241172790527344, "bits_per_byte": 1.04468040751214, "num_chars": 51}, {"sum_logits": -33.151206970214844, "num_tokens": 12, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -42.273887634277344, "logits_per_token": -2.762600580851237, "logits_per_char": -0.6375232109656701, "bits_per_byte": 0.9197515749124414, "num_chars": 52}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 571, "native_id": "Mercury_400744", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.37311840057373, "logits_per_token_corr": -3.0932796001434326, "logits_per_char_corr": -1.7675883429391044, "bits_per_byte_corr": 2.5500909366932185}, "model_output": [{"sum_logits": -12.37311840057373, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.019216537475586, "logits_per_token": -3.0932796001434326, "logits_per_char": -1.7675883429391044, "bits_per_byte": 2.5500909366932185, "num_chars": 7}, {"sum_logits": -16.797748565673828, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.068017959594727, "logits_per_token": -4.199437141418457, "logits_per_char": -2.3996783665248325, "bits_per_byte": 3.4620040791163915, "num_chars": 7}, {"sum_logits": -16.51681900024414, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -21.997798919677734, "logits_per_token": -4.129204750061035, "logits_per_char": -2.752803166707357, "bits_per_byte": 3.9714554771549917, "num_chars": 6}, {"sum_logits": -17.499244689941406, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.600116729736328, "logits_per_token": -4.374811172485352, "logits_per_char": -2.916540781656901, "bits_per_byte": 4.207678922249855, "num_chars": 6}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 572, "native_id": "Mercury_SC_LBS10902", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.961758613586426, "logits_per_token_corr": -2.987252871195475, "logits_per_char_corr": -0.6401256152561733, "bits_per_byte_corr": 0.9235060506767413}, "model_output": [{"sum_logits": -10.042070388793945, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -19.452838897705078, "logits_per_token": -3.3473567962646484, "logits_per_char": -0.8368391990661621, "bits_per_byte": 1.2073037625151115, "num_chars": 12}, {"sum_logits": -8.961758613586426, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -23.63166046142578, "logits_per_token": -2.987252871195475, "logits_per_char": -0.6401256152561733, "bits_per_byte": 0.9235060506767413, "num_chars": 14}, {"sum_logits": -11.01037883758545, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.21058464050293, "logits_per_token": -1.835063139597575, "logits_per_char": -0.4234761091379019, "bits_per_byte": 0.6109468825886435, "num_chars": 26}, {"sum_logits": -17.85761260986328, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.78042221069336, "logits_per_token": -2.5510875156947543, "logits_per_char": -0.661393059624566, "bits_per_byte": 0.9541884871994253, "num_chars": 27}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 573, "native_id": "Mercury_7133245", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.283263206481934, "logits_per_token_corr": -3.094421068827311, "logits_per_char_corr": -0.4420601526896159, "bits_per_byte_corr": 0.637757990060385}, "model_output": [{"sum_logits": -12.16208267211914, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -23.705360412597656, "logits_per_token": -4.054027557373047, "logits_per_char": -0.5791467939104352, "bits_per_byte": 0.8355322075219277, "num_chars": 21}, {"sum_logits": -9.283263206481934, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -26.053604125976562, "logits_per_token": -3.094421068827311, "logits_per_char": -0.4420601526896159, "bits_per_byte": 0.637757990060385, "num_chars": 21}, {"sum_logits": -5.1911773681640625, "num_tokens": 2, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -18.752607345581055, "logits_per_token": -2.5955886840820312, "logits_per_char": -0.3244485855102539, "bits_per_byte": 0.4680803653394184, "num_chars": 16}, {"sum_logits": -3.20353364944458, "num_tokens": 2, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -17.473278045654297, "logits_per_token": -1.60176682472229, "logits_per_char": -0.17797409163581002, "bits_per_byte": 0.2567623394098855, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 574, "native_id": "Mercury_7131530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.358768463134766, "logits_per_token_corr": -1.8176409403483074, "logits_per_char_corr": -0.30865600873839183, "bits_per_byte_corr": 0.44529649314777836}, "model_output": [{"sum_logits": -17.171199798583984, "num_tokens": 10, "num_tokens_all": 259, "is_greedy": false, "sum_logits_uncond": -45.41986846923828, "logits_per_token": -1.7171199798583985, "logits_per_char": -0.29103728472176243, "bits_per_byte": 0.4198780473821776, "num_chars": 59}, {"sum_logits": -16.358768463134766, "num_tokens": 9, "num_tokens_all": 258, "is_greedy": false, "sum_logits_uncond": -32.676124572753906, "logits_per_token": -1.8176409403483074, "logits_per_char": -0.30865600873839183, "bits_per_byte": 0.44529649314777836, "num_chars": 53}, {"sum_logits": -23.61764144897461, "num_tokens": 8, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -42.663909912109375, "logits_per_token": -2.952205181121826, "logits_per_char": -0.4920341968536377, "bits_per_byte": 0.709855295749037, "num_chars": 48}, {"sum_logits": -25.183639526367188, "num_tokens": 10, "num_tokens_all": 259, "is_greedy": false, "sum_logits_uncond": -47.26547622680664, "logits_per_token": -2.5183639526367188, "logits_per_char": -0.5036727905273437, "bits_per_byte": 0.7266462371250265, "num_chars": 50}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 575, "native_id": "Mercury_7041143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.3057749271392822, "logits_per_token_corr": -0.4352583090464274, "logits_per_char_corr": -0.32644373178482056, "bits_per_byte_corr": 0.47095875297558587}, "model_output": [{"sum_logits": -12.13417911529541, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -11.962377548217773, "logits_per_token": -4.044726371765137, "logits_per_char": -3.0335447788238525, "bits_per_byte": 4.376480008726924, "num_chars": 4}, {"sum_logits": -13.604762077331543, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -12.403294563293457, "logits_per_token": -4.534920692443848, "logits_per_char": -3.4011905193328857, "bits_per_byte": 4.906880695363638, "num_chars": 4}, {"sum_logits": -1.3057749271392822, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": true, "sum_logits_uncond": -12.763396263122559, "logits_per_token": -0.4352583090464274, "logits_per_char": -0.32644373178482056, "bits_per_byte": 0.47095875297558587, "num_chars": 4}, {"sum_logits": -9.487356185913086, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -14.09250545501709, "logits_per_token": -3.162452061971029, "logits_per_char": -1.8974712371826172, "bits_per_byte": 2.737472344114775, "num_chars": 5}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 576, "native_id": "MCAS_2010_5_11984", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.48213005065918, "logits_per_token_corr": -2.496426010131836, "logits_per_char_corr": -0.624106502532959, "bits_per_byte_corr": 0.900395356191502}, "model_output": [{"sum_logits": -12.48213005065918, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -24.758153915405273, "logits_per_token": -2.496426010131836, "logits_per_char": -0.624106502532959, "bits_per_byte": 0.900395356191502, "num_chars": 20}, {"sum_logits": -13.462653160095215, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.437389373779297, "logits_per_token": -2.692530632019043, "logits_per_char": -0.5609438816706339, "bits_per_byte": 0.8092709563038104, "num_chars": 24}, {"sum_logits": -19.330028533935547, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.767576217651367, "logits_per_token": -2.761432647705078, "logits_per_char": -0.6903581619262695, "bits_per_byte": 0.9959762966489646, "num_chars": 28}, {"sum_logits": -18.95170021057129, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -31.665800094604492, "logits_per_token": -2.368962526321411, "logits_per_char": -0.5922406315803528, "bits_per_byte": 0.8544226221945365, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 577, "native_id": "Mercury_7159285", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -36.21770095825195, "logits_per_token_corr": -3.2925182689319956, "logits_per_char_corr": -0.6353982624254728, "bits_per_byte_corr": 0.9166859221913524}, "model_output": [{"sum_logits": -31.379701614379883, "num_tokens": 7, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -43.342647552490234, "logits_per_token": -4.4828145163399835, "logits_per_char": -0.6152882669486252, "bits_per_byte": 0.8876733314445839, "num_chars": 51}, {"sum_logits": -41.03017044067383, "num_tokens": 11, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -52.388187408447266, "logits_per_token": -3.7300154946067114, "logits_per_char": -0.7326816150120327, "bits_per_byte": 1.0570361325291355, "num_chars": 56}, {"sum_logits": -36.21770095825195, "num_tokens": 11, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -44.53374481201172, "logits_per_token": -3.2925182689319956, "logits_per_char": -0.6353982624254728, "bits_per_byte": 0.9166859221913524, "num_chars": 57}, {"sum_logits": -23.14388084411621, "num_tokens": 11, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -37.75071716308594, "logits_per_token": -2.1039891676469282, "logits_per_char": -0.3794078826904297, "bits_per_byte": 0.5473698708320578, "num_chars": 61}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 578, "native_id": "AIMS_2008_8_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.42823600769043, "logits_per_token_corr": -2.9285295009613037, "logits_per_char_corr": -1.1156302860804967, "bits_per_byte_corr": 1.6095142811950245}, "model_output": [{"sum_logits": -32.993778228759766, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -40.256744384765625, "logits_per_token": -4.713396889822824, "logits_per_char": -2.356698444911412, "bits_per_byte": 3.3999971593468685, "num_chars": 14}, {"sum_logits": -18.819440841674805, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.35849380493164, "logits_per_token": -2.688491548810686, "logits_per_char": -1.254629389444987, "bits_per_byte": 1.810047598307131, "num_chars": 15}, {"sum_logits": -24.107921600341797, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.628055572509766, "logits_per_token": -3.0134902000427246, "logits_per_char": -1.0958146181973545, "bits_per_byte": 1.580926315408092, "num_chars": 22}, {"sum_logits": -23.42823600769043, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.095603942871094, "logits_per_token": -2.9285295009613037, "logits_per_char": -1.1156302860804967, "bits_per_byte": 1.6095142811950245, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 579, "native_id": "MDSA_2013_8_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.47093391418457, "logits_per_token_corr": -2.947093391418457, "logits_per_char_corr": -0.6406724763953168, "bits_per_byte_corr": 0.924295004530239}, "model_output": [{"sum_logits": -19.883586883544922, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.852603912353516, "logits_per_token": -2.8405124119349887, "logits_per_char": -0.6856409270187904, "bits_per_byte": 0.9891707652412313, "num_chars": 29}, {"sum_logits": -16.24383544921875, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -29.523590087890625, "logits_per_token": -2.707305908203125, "logits_per_char": -0.5076198577880859, "bits_per_byte": 0.7323406514881585, "num_chars": 32}, {"sum_logits": -29.47093391418457, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -43.19997787475586, "logits_per_token": -2.947093391418457, "logits_per_char": -0.6406724763953168, "bits_per_byte": 0.924295004530239, "num_chars": 46}, {"sum_logits": -23.539592742919922, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -38.29656982421875, "logits_per_token": -2.9424490928649902, "logits_per_char": -0.5008423987855303, "bits_per_byte": 0.7225628449953362, "num_chars": 47}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 580, "native_id": "Mercury_7114100", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.218074798583984, "logits_per_token_corr": -2.203012466430664, "logits_per_char_corr": -0.472074099949428, "bits_per_byte_corr": 0.68105896292965}, "model_output": [{"sum_logits": -13.218074798583984, "num_tokens": 6, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -33.528076171875, "logits_per_token": -2.203012466430664, "logits_per_char": -0.472074099949428, "bits_per_byte": 0.68105896292965, "num_chars": 28}, {"sum_logits": -18.94193458557129, "num_tokens": 6, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -30.88239097595215, "logits_per_token": -3.156989097595215, "logits_per_char": -0.8235623732857082, "bits_per_byte": 1.1881493518028903, "num_chars": 23}, {"sum_logits": -10.581155776977539, "num_tokens": 5, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -26.415407180786133, "logits_per_token": -2.116231155395508, "logits_per_char": -0.44088149070739746, "bits_per_byte": 0.6360575402637529, "num_chars": 24}, {"sum_logits": -8.277498245239258, "num_tokens": 7, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -28.970081329345703, "logits_per_token": -1.1824997493198939, "logits_per_char": -0.2670160724270728, "bits_per_byte": 0.38522276352846296, "num_chars": 31}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 581, "native_id": "Mercury_7213343", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.355132102966309, "logits_per_token_corr": -1.7258553504943848, "logits_per_char_corr": -0.24081702565037927, "bits_per_byte_corr": 0.34742552866768206}, "model_output": [{"sum_logits": -10.355132102966309, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -34.97139358520508, "logits_per_token": -1.7258553504943848, "logits_per_char": -0.24081702565037927, "bits_per_byte": 0.34742552866768206, "num_chars": 43}, {"sum_logits": -19.423295974731445, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -41.015350341796875, "logits_per_token": -2.158143997192383, "logits_per_char": -0.3884659194946289, "bits_per_byte": 0.5604378556096751, "num_chars": 50}, {"sum_logits": -21.82623863220215, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.369728088378906, "logits_per_token": -4.36524772644043, "logits_per_char": -0.5596471444154397, "bits_per_byte": 0.8074001598964045, "num_chars": 39}, {"sum_logits": -25.212881088256836, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -43.320960998535156, "logits_per_token": -2.5212881088256838, "logits_per_char": -0.548106110614279, "bits_per_byte": 0.7907499676647262, "num_chars": 46}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 582, "native_id": "Mercury_SC_LBS10597", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.393538475036621, "logits_per_token_corr": -4.131179491678874, "logits_per_char_corr": -0.8852527482169015, "bits_per_byte_corr": 1.2771497497867674}, "model_output": [{"sum_logits": -12.393538475036621, "num_tokens": 3, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -19.577444076538086, "logits_per_token": -4.131179491678874, "logits_per_char": -0.8852527482169015, "bits_per_byte": 1.2771497497867674, "num_chars": 14}, {"sum_logits": -7.921923637390137, "num_tokens": 2, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -16.375511169433594, "logits_per_token": -3.9609618186950684, "logits_per_char": -0.5658516883850098, "bits_per_byte": 0.8163514247122872, "num_chars": 14}, {"sum_logits": -20.220504760742188, "num_tokens": 4, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -26.736825942993164, "logits_per_token": -5.055126190185547, "logits_per_char": -0.9628811790829613, "bits_per_byte": 1.3891439020293042, "num_chars": 21}, {"sum_logits": -20.823211669921875, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -28.040390014648438, "logits_per_token": -4.164642333984375, "logits_per_char": -0.8008927565354568, "bits_per_byte": 1.1554440081384256, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 583, "native_id": "Mercury_7126263", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.614044189453125, "logits_per_token_corr": -3.122808837890625, "logits_per_char_corr": -0.43372344970703125, "bits_per_byte_corr": 0.6257306700100372}, "model_output": [{"sum_logits": -20.512073516845703, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -35.50996017456055, "logits_per_token": -4.10241470336914, "logits_per_char": -0.6032962799072266, "bits_per_byte": 0.870372551209541, "num_chars": 34}, {"sum_logits": -15.614044189453125, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -35.788658142089844, "logits_per_token": -3.122808837890625, "logits_per_char": -0.43372344970703125, "bits_per_byte": 0.6257306700100372, "num_chars": 36}, {"sum_logits": -19.311073303222656, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.828052520751953, "logits_per_token": -3.8622146606445313, "logits_per_char": -0.5081861395584909, "bits_per_byte": 0.7331576233900682, "num_chars": 38}, {"sum_logits": -15.654522895812988, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.17829132080078, "logits_per_token": -3.1309045791625976, "logits_per_char": -0.38181763160519483, "bits_per_byte": 0.5508464036411794, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 584, "native_id": "Mercury_7133613", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.279315948486328, "logits_per_token_corr": -3.409914493560791, "logits_per_char_corr": -0.6062070210774739, "bits_per_byte_corr": 0.8745718630611713}, "model_output": [{"sum_logits": -27.279315948486328, "num_tokens": 8, "num_tokens_all": 273, "is_greedy": false, "sum_logits_uncond": -57.59281921386719, "logits_per_token": -3.409914493560791, "logits_per_char": -0.6062070210774739, "bits_per_byte": 0.8745718630611713, "num_chars": 45}, {"sum_logits": -20.723134994506836, "num_tokens": 7, "num_tokens_all": 272, "is_greedy": false, "sum_logits_uncond": -38.59269714355469, "logits_per_token": -2.9604478563581194, "logits_per_char": -0.5054423169391912, "bits_per_byte": 0.7291991241041227, "num_chars": 41}, {"sum_logits": -16.959362030029297, "num_tokens": 10, "num_tokens_all": 275, "is_greedy": false, "sum_logits_uncond": -42.8790283203125, "logits_per_token": -1.6959362030029297, "logits_per_char": -0.4845532008579799, "bits_per_byte": 0.6990624999251837, "num_chars": 35}, {"sum_logits": -25.675867080688477, "num_tokens": 9, "num_tokens_all": 274, "is_greedy": false, "sum_logits_uncond": -39.84496307373047, "logits_per_token": -2.8528741200764975, "logits_per_char": -0.626240660504597, "bits_per_byte": 0.9034742953136602, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 585, "native_id": "Mercury_7234605", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.165660858154297, "logits_per_token_corr": -3.0276101430257163, "logits_per_char_corr": -0.5342841428868911, "bits_per_byte_corr": 0.7708090833690818}, "model_output": [{"sum_logits": -11.44431209564209, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -25.801254272460938, "logits_per_token": -2.288862419128418, "logits_per_char": -0.4768463373184204, "bits_per_byte": 0.6879438461158452, "num_chars": 24}, {"sum_logits": -12.08627700805664, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -26.659061431884766, "logits_per_token": -2.417255401611328, "logits_per_char": -0.447639889187283, "bits_per_byte": 0.6458078482350422, "num_chars": 27}, {"sum_logits": -18.610336303710938, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -29.853363037109375, "logits_per_token": -3.1017227172851562, "logits_per_char": -0.6003334291519657, "bits_per_byte": 0.8660980611180291, "num_chars": 31}, {"sum_logits": -18.165660858154297, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -30.840229034423828, "logits_per_token": -3.0276101430257163, "logits_per_char": -0.5342841428868911, "bits_per_byte": 0.7708090833690818, "num_chars": 34}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 586, "native_id": "Mercury_SC_400839", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.446612358093262, "logits_per_token_corr": -3.0893224716186523, "logits_per_char_corr": -0.5720967540034542, "bits_per_byte_corr": 0.8253611499100495}, "model_output": [{"sum_logits": -15.14612865447998, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.113201141357422, "logits_per_token": -3.786532163619995, "logits_per_char": -0.5825434097876916, "bits_per_byte": 0.8404324884038537, "num_chars": 26}, {"sum_logits": -15.446612358093262, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -32.32468795776367, "logits_per_token": -3.0893224716186523, "logits_per_char": -0.5720967540034542, "bits_per_byte": 0.8253611499100495, "num_chars": 27}, {"sum_logits": -26.648452758789062, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.65608215332031, "logits_per_token": -5.329690551757812, "logits_per_char": -0.8882817586263021, "bits_per_byte": 1.2815196880832138, "num_chars": 30}, {"sum_logits": -18.484445571899414, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -29.737945556640625, "logits_per_token": -3.696889114379883, "logits_per_char": -0.6846090952555338, "bits_per_byte": 0.9876821466733482, "num_chars": 27}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 587, "native_id": "Mercury_SC_402984", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.15588092803955, "logits_per_token_corr": -2.0389702320098877, "logits_per_char_corr": -0.45310449600219727, "bits_per_byte_corr": 0.6536916093873327}, "model_output": [{"sum_logits": -10.34382438659668, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.336124420166016, "logits_per_token": -5.17191219329834, "logits_per_char": -0.7956787989689753, "bits_per_byte": 1.1479218574138519, "num_chars": 13}, {"sum_logits": -8.15588092803955, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -20.802522659301758, "logits_per_token": -2.0389702320098877, "logits_per_char": -0.45310449600219727, "bits_per_byte": 0.6536916093873327, "num_chars": 18}, {"sum_logits": -14.435714721679688, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.27220344543457, "logits_per_token": -2.4059524536132812, "logits_per_char": -0.49778326626481684, "bits_per_byte": 0.7181494496782777, "num_chars": 29}, {"sum_logits": -17.36624526977539, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.47149085998535, "logits_per_token": -2.4808921813964844, "logits_per_char": -0.5602014603153351, "bits_per_byte": 0.80819986869627, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 588, "native_id": "NYSEDREGENTS_2012_4_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.477100372314453, "logits_per_token_corr": -2.2251909429376777, "logits_per_char_corr": -0.5207893696237118, "bits_per_byte_corr": 0.7513402409043581}, "model_output": [{"sum_logits": -18.911447525024414, "num_tokens": 10, "num_tokens_all": 262, "is_greedy": false, "sum_logits_uncond": -25.65845489501953, "logits_per_token": -1.8911447525024414, "logits_per_char": -0.4502725601196289, "bits_per_byte": 0.649605989533433, "num_chars": 42}, {"sum_logits": -19.492347717285156, "num_tokens": 10, "num_tokens_all": 262, "is_greedy": false, "sum_logits_uncond": -28.18989372253418, "logits_per_token": -1.9492347717285157, "logits_per_char": -0.47542311505573553, "bits_per_byte": 0.6858905704153855, "num_chars": 41}, {"sum_logits": -24.477100372314453, "num_tokens": 11, "num_tokens_all": 263, "is_greedy": false, "sum_logits_uncond": -42.304351806640625, "logits_per_token": -2.2251909429376777, "logits_per_char": -0.5207893696237118, "bits_per_byte": 0.7513402409043581, "num_chars": 47}, {"sum_logits": -28.914867401123047, "num_tokens": 11, "num_tokens_all": 263, "is_greedy": false, "sum_logits_uncond": -41.03177261352539, "logits_per_token": -2.6286243091930044, "logits_per_char": -0.5782973480224609, "bits_per_byte": 0.8343067161518427, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 589, "native_id": "VASoL_2009_3_22", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.11573600769043, "logits_per_token_corr": -2.7789340019226074, "logits_per_char_corr": -0.7939811434064593, "bits_per_byte_corr": 1.1454726581526706}, "model_output": [{"sum_logits": -11.915040016174316, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.148283004760742, "logits_per_token": -2.978760004043579, "logits_per_char": -0.9929200013478597, "bits_per_byte": 1.4324807619450493, "num_chars": 12}, {"sum_logits": -13.911541938781738, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.070728302001953, "logits_per_token": -3.4778854846954346, "logits_per_char": -1.1592951615651448, "bits_per_byte": 1.6725093805178055, "num_chars": 12}, {"sum_logits": -14.24213981628418, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.870643615722656, "logits_per_token": -3.560534954071045, "logits_per_char": -1.0172957011631556, "bits_per_byte": 1.4676474631868, "num_chars": 14}, {"sum_logits": -11.11573600769043, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.392284393310547, "logits_per_token": -2.7789340019226074, "logits_per_char": -0.7939811434064593, "bits_per_byte": 1.1454726581526706, "num_chars": 14}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 590, "native_id": "Mercury_409349", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.886476516723633, "logits_per_token_corr": -2.971619129180908, "logits_per_char_corr": -0.6256040271959806, "bits_per_byte_corr": 0.9025558275964539}, "model_output": [{"sum_logits": -11.886476516723633, "num_tokens": 4, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -23.161266326904297, "logits_per_token": -2.971619129180908, "logits_per_char": -0.6256040271959806, "bits_per_byte": 0.9025558275964539, "num_chars": 19}, {"sum_logits": -7.36442232131958, "num_tokens": 3, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -18.891483306884766, "logits_per_token": -2.45480744043986, "logits_per_char": -0.368221116065979, "bits_per_byte": 0.531230778099369, "num_chars": 20}, {"sum_logits": -12.664876937866211, "num_tokens": 6, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -26.428356170654297, "logits_per_token": -2.1108128229777017, "logits_per_char": -0.4367198944091797, "bits_per_byte": 0.6300536259221279, "num_chars": 29}, {"sum_logits": -22.174177169799805, "num_tokens": 6, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -35.934364318847656, "logits_per_token": -3.6956961949666343, "logits_per_char": -0.599302085670265, "bits_per_byte": 0.8646101469915252, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 591, "native_id": "Mercury_SC_407417", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.231277465820312, "logits_per_token_corr": -4.115638732910156, "logits_per_char_corr": -0.5487518310546875, "bits_per_byte_corr": 0.7916815453419047}, "model_output": [{"sum_logits": -6.195830345153809, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.508800506591797, "logits_per_token": -2.065276781717936, "logits_per_char": -0.4425593103681292, "bits_per_byte": 0.6384781223677983, "num_chars": 14}, {"sum_logits": -9.339330673217773, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.601566314697266, "logits_per_token": -3.1131102244059243, "logits_per_char": -0.6226220448811849, "bits_per_byte": 0.8982537364988764, "num_chars": 15}, {"sum_logits": -11.55853271484375, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.468822479248047, "logits_per_token": -2.8896331787109375, "logits_per_char": -0.6421407063802084, "bits_per_byte": 0.9264132126483281, "num_chars": 18}, {"sum_logits": -8.231277465820312, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -20.9866886138916, "logits_per_token": -4.115638732910156, "logits_per_char": -0.5487518310546875, "bits_per_byte": 0.7916815453419047, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 592, "native_id": "VASoL_2007_5_21", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.285696983337402, "logits_per_token_corr": -10.285696983337402, "logits_per_char_corr": -1.2857121229171753, "bits_per_byte_corr": 1.8548905037447627}, "model_output": [{"sum_logits": -6.386027812957764, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.896053314208984, "logits_per_token": -2.128675937652588, "logits_per_char": -0.7095586458841959, "bits_per_byte": 1.023676739637753, "num_chars": 9}, {"sum_logits": -15.745994567871094, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -17.617870330810547, "logits_per_token": -5.248664855957031, "logits_per_char": -1.749554951985677, "bits_per_byte": 2.524074252994278, "num_chars": 9}, {"sum_logits": -10.285696983337402, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -11.256203651428223, "logits_per_token": -10.285696983337402, "logits_per_char": -1.2857121229171753, "bits_per_byte": 1.8548905037447627, "num_chars": 8}, {"sum_logits": -6.864692687988281, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -13.901801109313965, "logits_per_token": -3.4323463439941406, "logits_per_char": -0.9806703839983258, "bits_per_byte": 1.4148082997420766, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 593, "native_id": "MCAS_2012_8_23651", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.860596656799316, "logits_per_token_corr": -4.860596656799316, "logits_per_char_corr": -0.3738920505230243, "bits_per_byte_corr": 0.5394122071177605}, "model_output": [{"sum_logits": -4.860596656799316, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -4.860596656799316, "logits_per_char": -0.3738920505230243, "bits_per_byte": 0.5394122071177605, "num_chars": 13}, {"sum_logits": -6.319130897521973, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -6.319130897521973, "logits_per_char": -0.5265942414601644, "bits_per_byte": 0.7597149007158104, "num_chars": 12}, {"sum_logits": -7.4253387451171875, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.859761238098145, "logits_per_token": -7.4253387451171875, "logits_per_char": -0.7425338745117187, "bits_per_byte": 1.071249938450894, "num_chars": 10}, {"sum_logits": -6.438794136047363, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.724884033203125, "logits_per_token": -3.2193970680236816, "logits_per_char": -0.49529185661902797, "bits_per_byte": 0.7145551053374726, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 594, "native_id": "MCAS_2000_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.139812469482422, "logits_per_token_corr": -2.4672556790438565, "logits_per_char_corr": -0.47613706086811264, "bits_per_byte_corr": 0.6869205764983661}, "model_output": [{"sum_logits": -19.228147506713867, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -39.404666900634766, "logits_per_token": -2.746878215244838, "logits_per_char": -0.6630395691970299, "bits_per_byte": 0.9565638983943969, "num_chars": 29}, {"sum_logits": -16.674890518188477, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -35.697601318359375, "logits_per_token": -2.7791484196980796, "logits_per_char": -0.5558296839396158, "bits_per_byte": 0.8018927285991398, "num_chars": 30}, {"sum_logits": -21.542625427246094, "num_tokens": 13, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -47.98407745361328, "logits_per_token": -1.6571250328650842, "logits_per_char": -0.3419464353531126, "bits_per_byte": 0.4933244265339485, "num_chars": 63}, {"sum_logits": -27.139812469482422, "num_tokens": 11, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -54.420555114746094, "logits_per_token": -2.4672556790438565, "logits_per_char": -0.47613706086811264, "bits_per_byte": 0.6869205764983661, "num_chars": 57}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 595, "native_id": "Mercury_SC_410971", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.399560928344727, "logits_per_token_corr": -3.0799121856689453, "logits_per_char_corr": -0.6695461273193359, "bits_per_byte_corr": 0.9659508775307104}, "model_output": [{"sum_logits": -15.399560928344727, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.41642189025879, "logits_per_token": -3.0799121856689453, "logits_per_char": -0.6695461273193359, "bits_per_byte": 0.9659508775307104, "num_chars": 23}, {"sum_logits": -18.995342254638672, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.2684326171875, "logits_per_token": -2.7136203220912387, "logits_per_char": -0.5586865369011375, "bits_per_byte": 0.8060142961992789, "num_chars": 34}, {"sum_logits": -28.993101119995117, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -36.7441520690918, "logits_per_token": -4.141871588570731, "logits_per_char": -0.8283743177141462, "bits_per_byte": 1.195091520166836, "num_chars": 35}, {"sum_logits": -25.050464630126953, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -33.34563446044922, "logits_per_token": -2.7833849589029946, "logits_per_char": -0.6262616157531739, "bits_per_byte": 0.9035045273468626, "num_chars": 40}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 596, "native_id": "Mercury_404841", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.40564727783203, "logits_per_token_corr": -4.200806753976004, "logits_per_char_corr": -0.9801882425944011, "bits_per_byte_corr": 1.4141127167296266}, "model_output": [{"sum_logits": -28.747581481933594, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -44.511810302734375, "logits_per_token": -4.106797354561942, "logits_per_char": -0.9582527160644532, "bits_per_byte": 1.3824664413855596, "num_chars": 30}, {"sum_logits": -31.027982711791992, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -45.76153564453125, "logits_per_token": -4.432568958827427, "logits_per_char": -1.0342660903930665, "bits_per_byte": 1.4921305595707655, "num_chars": 30}, {"sum_logits": -28.67308807373047, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -46.53547668457031, "logits_per_token": -4.096155439104352, "logits_per_char": -0.9557696024576823, "bits_per_byte": 1.3788840656991048, "num_chars": 30}, {"sum_logits": -29.40564727783203, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -47.23204040527344, "logits_per_token": -4.200806753976004, "logits_per_char": -0.9801882425944011, "bits_per_byte": 1.4141127167296266, "num_chars": 30}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 597, "native_id": "Mercury_416651", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.114706039428711, "logits_per_token_corr": -3.704902013142904, "logits_per_char_corr": -1.5878151484898158, "bits_per_byte_corr": 2.290733040576276}, "model_output": [{"sum_logits": -11.114706039428711, "num_tokens": 3, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -20.818037033081055, "logits_per_token": -3.704902013142904, "logits_per_char": -1.5878151484898158, "bits_per_byte": 2.290733040576276, "num_chars": 7}, {"sum_logits": -5.346197605133057, "num_tokens": 3, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -17.067554473876953, "logits_per_token": -1.7820658683776855, "logits_per_char": -0.7637425150190081, "bits_per_byte": 1.1018475389347793, "num_chars": 7}, {"sum_logits": -8.657590866088867, "num_tokens": 4, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -19.329957962036133, "logits_per_token": -2.164397716522217, "logits_per_char": -1.0821988582611084, "bits_per_byte": 1.5612829260701209, "num_chars": 8}, {"sum_logits": -7.5582709312438965, "num_tokens": 3, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -21.335268020629883, "logits_per_token": -2.5194236437479653, "logits_per_char": -0.6871155392039906, "bits_per_byte": 0.9912981809280556, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 598, "native_id": "Mercury_416576", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.329544067382812, "logits_per_token_corr": -1.794580312875601, "logits_per_char_corr": -0.44864507821890026, "bits_per_byte_corr": 0.6472580294661135}, "model_output": [{"sum_logits": -21.51177978515625, "num_tokens": 12, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -44.68896484375, "logits_per_token": -1.7926483154296875, "logits_per_char": -0.4390159139827806, "bits_per_byte": 0.6333660819747483, "num_chars": 49}, {"sum_logits": -23.329544067382812, "num_tokens": 13, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -40.847198486328125, "logits_per_token": -1.794580312875601, "logits_per_char": -0.44864507821890026, "bits_per_byte": 0.6472580294661135, "num_chars": 52}, {"sum_logits": -25.238115310668945, "num_tokens": 16, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -49.80158996582031, "logits_per_token": -1.577382206916809, "logits_per_char": -0.4673725057531286, "bits_per_byte": 0.6742759962983715, "num_chars": 54}, {"sum_logits": -31.86419677734375, "num_tokens": 16, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -43.666744232177734, "logits_per_token": -1.9915122985839844, "logits_per_char": -0.5690035138811383, "bits_per_byte": 0.8208985477253025, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 599, "native_id": "MCAS_1998_8_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -34.038421630859375, "logits_per_token_corr": -2.0022600959329044, "logits_per_char_corr": -0.35456689198811847, "bits_per_byte_corr": 0.5115318967350387}, "model_output": [{"sum_logits": -15.644523620605469, "num_tokens": 7, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -38.944541931152344, "logits_per_token": -2.2349319458007812, "logits_per_char": -0.3259275754292806, "bits_per_byte": 0.47021409676112447, "num_chars": 48}, {"sum_logits": -46.646888732910156, "num_tokens": 17, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -66.39459228515625, "logits_per_token": -2.7439346313476562, "logits_per_char": -0.5424056829408158, "bits_per_byte": 0.7825259889292686, "num_chars": 86}, {"sum_logits": -20.116313934326172, "num_tokens": 9, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -35.32652282714844, "logits_per_token": -2.235145992702908, "logits_per_char": -0.4280066794537483, "bits_per_byte": 0.6174831139157185, "num_chars": 47}, {"sum_logits": -34.038421630859375, "num_tokens": 17, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -50.1965446472168, "logits_per_token": -2.0022600959329044, "logits_per_char": -0.35456689198811847, "bits_per_byte": 0.5115318967350387, "num_chars": 96}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 600, "native_id": "Mercury_SC_408367", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.501209259033203, "logits_per_token_corr": -5.500403086344401, "logits_per_char_corr": -1.1000806172688802, "bits_per_byte_corr": 1.5870808511130234}, "model_output": [{"sum_logits": -11.973514556884766, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -21.317480087280273, "logits_per_token": -3.9911715189615884, "logits_per_char": -0.9210395812988281, "bits_per_byte": 1.3287792364032212, "num_chars": 13}, {"sum_logits": -16.501209259033203, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -25.022228240966797, "logits_per_token": -5.500403086344401, "logits_per_char": -1.1000806172688802, "bits_per_byte": 1.5870808511130234, "num_chars": 15}, {"sum_logits": -12.525156021118164, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -18.654796600341797, "logits_per_token": -4.175052007039388, "logits_per_char": -0.8946540015084403, "bits_per_byte": 1.2907128912886214, "num_chars": 14}, {"sum_logits": -13.308794021606445, "num_tokens": 4, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -26.611225128173828, "logits_per_token": -3.3271985054016113, "logits_per_char": -0.7004628432424445, "bits_per_byte": 1.010554270273584, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 601, "native_id": "Mercury_405804", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -1.7634084224700928, "logits_per_token_corr": -1.7634084224700928, "logits_per_char_corr": -0.29390140374501544, "bits_per_byte_corr": 0.4240100976935434}, "model_output": [{"sum_logits": -3.894765615463257, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -12.446762084960938, "logits_per_token": -3.894765615463257, "logits_per_char": -0.9736914038658142, "bits_per_byte": 1.4047397597144322, "num_chars": 4}, {"sum_logits": -3.9515187740325928, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -3.9515187740325928, "logits_per_char": -0.7903037548065186, "bits_per_byte": 1.1401673078561108, "num_chars": 5}, {"sum_logits": -4.537245273590088, "num_tokens": 2, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -13.967909812927246, "logits_per_token": -2.268622636795044, "logits_per_char": -0.378103772799174, "bits_per_byte": 0.5454884379591676, "num_chars": 12}, {"sum_logits": -1.7634084224700928, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": true, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -1.7634084224700928, "logits_per_char": -0.29390140374501544, "bits_per_byte": 0.4240100976935434, "num_chars": 6}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 602, "native_id": "Mercury_7216318", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.002044677734375, "logits_per_token_corr": -2.125255584716797, "logits_per_char_corr": -0.3864101063121449, "bits_per_byte_corr": 0.5574719441263091}, "model_output": [{"sum_logits": -17.002044677734375, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -40.884185791015625, "logits_per_token": -2.125255584716797, "logits_per_char": -0.3864101063121449, "bits_per_byte": 0.5574719441263091, "num_chars": 44}, {"sum_logits": -14.909907341003418, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -33.67287063598633, "logits_per_token": -1.656656371222602, "logits_per_char": -0.346742031186126, "bits_per_byte": 0.5002430088603497, "num_chars": 43}, {"sum_logits": -12.698318481445312, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.17885208129883, "logits_per_token": -1.4109242757161458, "logits_per_char": -0.28859814730557526, "bits_per_byte": 0.4163591159277951, "num_chars": 44}, {"sum_logits": -13.977401733398438, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -35.728477478027344, "logits_per_token": -1.5530446370442708, "logits_per_char": -0.2852530965999681, "bits_per_byte": 0.4115332278632901, "num_chars": 49}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 603, "native_id": "Mercury_401312", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.135894775390625, "logits_per_token_corr": -3.355982462565104, "logits_per_char_corr": -0.491119384765625, "bits_per_byte_corr": 0.7085355008863149}, "model_output": [{"sum_logits": -22.45766258239746, "num_tokens": 5, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -33.20431900024414, "logits_per_token": -4.491532516479492, "logits_per_char": -0.8317652808295356, "bits_per_byte": 1.199983645837249, "num_chars": 27}, {"sum_logits": -23.128307342529297, "num_tokens": 6, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -32.95958709716797, "logits_per_token": -3.8547178904215493, "logits_per_char": -0.7460744304041709, "bits_per_byte": 1.0763578808789287, "num_chars": 31}, {"sum_logits": -19.652603149414062, "num_tokens": 7, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -39.93303298950195, "logits_per_token": -2.8075147356305803, "logits_per_char": -0.5459056430392795, "bits_per_byte": 0.787575364006635, "num_chars": 36}, {"sum_logits": -20.135894775390625, "num_tokens": 6, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -39.985130310058594, "logits_per_token": -3.355982462565104, "logits_per_char": -0.491119384765625, "bits_per_byte": 0.7085355008863149, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 604, "native_id": "MDSA_2013_8_23", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.329648971557617, "logits_per_token_corr": -2.2588498857286243, "logits_per_char_corr": -0.37647498095477067, "bits_per_byte_corr": 0.5431385880426048}, "model_output": [{"sum_logits": -22.291336059570312, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -43.508094787597656, "logits_per_token": -3.184476579938616, "logits_per_char": -0.5184031641760538, "bits_per_byte": 0.7478976741384773, "num_chars": 43}, {"sum_logits": -18.29366683959961, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -39.998260498046875, "logits_per_token": -2.6133809770856584, "logits_per_char": -0.38922695403403423, "bits_per_byte": 0.5615357963656211, "num_chars": 47}, {"sum_logits": -24.39205551147461, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -49.8754997253418, "logits_per_token": -2.7102283901638455, "logits_per_char": -0.48784111022949217, "bits_per_byte": 0.7038059504703601, "num_chars": 50}, {"sum_logits": -20.329648971557617, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -40.79692459106445, "logits_per_token": -2.2588498857286243, "logits_per_char": -0.37647498095477067, "bits_per_byte": 0.5431385880426048, "num_chars": 54}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 605, "native_id": "Mercury_SC_405880", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.988121032714844, "logits_per_token_corr": -2.1976242065429688, "logits_per_char_corr": -0.5783221596165707, "bits_per_byte_corr": 0.8343425117156216}, "model_output": [{"sum_logits": -10.988121032714844, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.893972396850586, "logits_per_token": -2.1976242065429688, "logits_per_char": -0.5783221596165707, "bits_per_byte": 0.8343425117156216, "num_chars": 19}, {"sum_logits": -13.792739868164062, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.292011260986328, "logits_per_token": -2.2987899780273438, "logits_per_char": -0.5108422173394097, "bits_per_byte": 0.7369895336328179, "num_chars": 27}, {"sum_logits": -16.169658660888672, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.210140228271484, "logits_per_token": -2.694943110148112, "logits_per_char": -0.4899896563905658, "bits_per_byte": 0.7069056473620643, "num_chars": 33}, {"sum_logits": -15.389183044433594, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -23.956443786621094, "logits_per_token": -2.1984547206333707, "logits_per_char": -0.4663388801343513, "bits_per_byte": 0.6727847897440248, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 606, "native_id": "ACTAAP_2009_5_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.039063930511475, "logits_per_token_corr": -5.039063930511475, "logits_per_char_corr": -0.8398439884185791, "bits_per_byte_corr": 1.2116387572127625}, "model_output": [{"sum_logits": -3.841165065765381, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -8.644586563110352, "logits_per_token": -3.841165065765381, "logits_per_char": -0.9602912664413452, "bits_per_byte": 1.3854074479049063, "num_chars": 4}, {"sum_logits": -5.039063930511475, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -9.130099296569824, "logits_per_token": -5.039063930511475, "logits_per_char": -0.8398439884185791, "bits_per_byte": 1.2116387572127625, "num_chars": 6}, {"sum_logits": -3.9149832725524902, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -9.081864356994629, "logits_per_token": -3.9149832725524902, "logits_per_char": -0.6524972120920817, "bits_per_byte": 0.9413544920797967, "num_chars": 6}, {"sum_logits": -5.961699962615967, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -10.452678680419922, "logits_per_token": -5.961699962615967, "logits_per_char": -0.8516714232308524, "bits_per_byte": 1.2287021387628791, "num_chars": 7}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 607, "native_id": "CSZ20754", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.932352066040039, "logits_per_token_corr": -2.9661760330200195, "logits_per_char_corr": -0.45633477431077224, "bits_per_byte_corr": 0.6583519158838085}, "model_output": [{"sum_logits": -3.926288366317749, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.537635803222656, "logits_per_token": -1.9631441831588745, "logits_per_char": -0.3926288366317749, "bits_per_byte": 0.5664436755190716, "num_chars": 10}, {"sum_logits": -5.932352066040039, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.792558670043945, "logits_per_token": -2.9661760330200195, "logits_per_char": -0.45633477431077224, "bits_per_byte": 0.6583519158838085, "num_chars": 13}, {"sum_logits": -3.4198758602142334, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": true, "sum_logits_uncond": -14.911417007446289, "logits_per_token": -1.7099379301071167, "logits_per_char": -0.24427684715815953, "bits_per_byte": 0.35241699599932125, "num_chars": 14}, {"sum_logits": -5.685237884521484, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.103055953979492, "logits_per_token": -2.842618942260742, "logits_per_char": -0.3344257579130285, "bits_per_byte": 0.4824743824870058, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 608, "native_id": "Mercury_184363", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.883628845214844, "logits_per_token_corr": -2.764847649468316, "logits_per_char_corr": -0.5294389116003159, "bits_per_byte_corr": 0.7638188922199748}, "model_output": [{"sum_logits": -22.64041519165039, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -31.734066009521484, "logits_per_token": -2.5156016879611545, "logits_per_char": -0.5265212835267533, "bits_per_byte": 0.7596096446670846, "num_chars": 43}, {"sum_logits": -25.52885627746582, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -32.77460479736328, "logits_per_token": -2.8365395863850913, "logits_per_char": -0.5673079172770182, "bits_per_byte": 0.8184523189131885, "num_chars": 45}, {"sum_logits": -38.942359924316406, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.240413665771484, "logits_per_token": -3.8942359924316405, "logits_per_char": -0.8653857760959202, "bits_per_byte": 1.2484877676303279, "num_chars": 45}, {"sum_logits": -24.883628845214844, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -35.86445617675781, "logits_per_token": -2.764847649468316, "logits_per_char": -0.5294389116003159, "bits_per_byte": 0.7638188922199748, "num_chars": 47}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 609, "native_id": "Mercury_7188195", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.386497497558594, "logits_per_token_corr": -2.193248748779297, "logits_per_char_corr": -0.39877249977805396, "bits_per_byte_corr": 0.575307107873107}, "model_output": [{"sum_logits": -3.790576696395874, "num_tokens": 2, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -15.06219482421875, "logits_per_token": -1.895288348197937, "logits_per_char": -0.42117518848843044, "bits_per_byte": 0.6076273557781696, "num_chars": 9}, {"sum_logits": -3.4995687007904053, "num_tokens": 2, "num_tokens_all": 228, "is_greedy": true, "sum_logits_uncond": -12.872313499450684, "logits_per_token": -1.7497843503952026, "logits_per_char": -0.3499568700790405, "bits_per_byte": 0.5048810409884178, "num_chars": 10}, {"sum_logits": -4.386497497558594, "num_tokens": 2, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -15.066802024841309, "logits_per_token": -2.193248748779297, "logits_per_char": -0.39877249977805396, "bits_per_byte": 0.575307107873107, "num_chars": 11}, {"sum_logits": -4.96502685546875, "num_tokens": 2, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -16.461776733398438, "logits_per_token": -2.482513427734375, "logits_per_char": -0.38192514272836536, "bits_per_byte": 0.5510015094054181, "num_chars": 13}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 610, "native_id": "Mercury_7221043", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -25.015562057495117, "logits_per_token_corr": -2.779506895277235, "logits_per_char_corr": -0.42399257724567996, "bits_per_byte_corr": 0.6116919885665126}, "model_output": [{"sum_logits": -30.6899471282959, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -46.99413299560547, "logits_per_token": -3.06899471282959, "logits_per_char": -0.6529775984743809, "bits_per_byte": 0.9420475431312508, "num_chars": 47}, {"sum_logits": -26.75400733947754, "num_tokens": 10, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -43.352317810058594, "logits_per_token": -2.675400733947754, "logits_per_char": -0.46127598861168173, "bits_per_byte": 0.6654805812517053, "num_chars": 58}, {"sum_logits": -39.000709533691406, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -52.75598907470703, "logits_per_token": -4.333412170410156, "logits_per_char": -0.7091038097034801, "bits_per_byte": 1.023020549735417, "num_chars": 55}, {"sum_logits": -25.015562057495117, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -51.893733978271484, "logits_per_token": -2.779506895277235, "logits_per_char": -0.42399257724567996, "bits_per_byte": 0.6116919885665126, "num_chars": 59}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 611, "native_id": "Mercury_7107328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.865897178649902, "logits_per_token_corr": -1.4094138826642717, "logits_per_char_corr": -0.3523534706660679, "bits_per_byte_corr": 0.5083386047703162}, "model_output": [{"sum_logits": -13.910178184509277, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.080224990844727, "logits_per_token": -3.4775445461273193, "logits_per_char": -0.5350068532503568, "bits_per_byte": 0.7718517340264537, "num_chars": 26}, {"sum_logits": -9.865897178649902, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -19.276180267333984, "logits_per_token": -1.4094138826642717, "logits_per_char": -0.3523534706660679, "bits_per_byte": 0.5083386047703162, "num_chars": 28}, {"sum_logits": -20.921253204345703, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.65618896484375, "logits_per_token": -2.615156650543213, "logits_per_char": -0.6339773698286577, "bits_per_byte": 0.9146360074882899, "num_chars": 33}, {"sum_logits": -20.590923309326172, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.858314514160156, "logits_per_token": -3.431820551554362, "logits_per_char": -0.5883120945521764, "bits_per_byte": 0.8487549413060336, "num_chars": 35}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 612, "native_id": "Mercury_415084", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.788900852203369, "logits_per_token_corr": -1.9472252130508423, "logits_per_char_corr": -1.1127001217433385, "bits_per_byte_corr": 1.6052869476368137}, "model_output": [{"sum_logits": -10.77352523803711, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -26.120365142822266, "logits_per_token": -1.7955875396728516, "logits_per_char": -1.1970583597819011, "bits_per_byte": 1.726990159313266, "num_chars": 9}, {"sum_logits": -6.5013651847839355, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -18.339717864990234, "logits_per_token": -1.6253412961959839, "logits_per_char": -0.9287664549691337, "bits_per_byte": 1.3399267587289545, "num_chars": 7}, {"sum_logits": -7.788900852203369, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.275707244873047, "logits_per_token": -1.9472252130508423, "logits_per_char": -1.1127001217433385, "bits_per_byte": 1.6052869476368137, "num_chars": 7}, {"sum_logits": -11.413166046142578, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -28.04775619506836, "logits_per_token": -1.902194341023763, "logits_per_char": -1.1413166046142578, "bits_per_byte": 1.6465718055624023, "num_chars": 10}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 613, "native_id": "Mercury_415082", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.084633827209473, "logits_per_token_corr": -1.7711584568023682, "logits_per_char_corr": -1.180772304534912, "bits_per_byte_corr": 1.7034943481727745}, "model_output": [{"sum_logits": -7.084633827209473, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -18.9197940826416, "logits_per_token": -1.7711584568023682, "logits_per_char": -1.180772304534912, "bits_per_byte": 1.7034943481727745, "num_chars": 6}, {"sum_logits": -8.36336898803711, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.09695053100586, "logits_per_token": -2.0908422470092773, "logits_per_char": -1.1947669982910156, "bits_per_byte": 1.7236844234534792, "num_chars": 7}, {"sum_logits": -8.968395233154297, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.464330673217773, "logits_per_token": -2.242098808288574, "logits_per_char": -1.121049404144287, "bits_per_byte": 1.6173324159516522, "num_chars": 8}, {"sum_logits": -10.994989395141602, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.79486656188965, "logits_per_token": -2.7487473487854004, "logits_per_char": -1.3743736743927002, "bits_per_byte": 1.982802084376116, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 614, "native_id": "Mercury_SC_416169", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.513989448547363, "logits_per_token_corr": -2.878497362136841, "logits_per_char_corr": -0.6396660804748535, "bits_per_byte_corr": 0.9228430821266147}, "model_output": [{"sum_logits": -11.513989448547363, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -21.513044357299805, "logits_per_token": -2.878497362136841, "logits_per_char": -0.6396660804748535, "bits_per_byte": 0.9228430821266147, "num_chars": 18}, {"sum_logits": -6.906434535980225, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -15.865842819213867, "logits_per_token": -1.7266086339950562, "logits_per_char": -0.4062608550576603, "bits_per_byte": 0.5861105208994175, "num_chars": 17}, {"sum_logits": -9.511955261230469, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -19.255971908569336, "logits_per_token": -2.377988815307617, "logits_per_char": -0.5595267800723805, "bits_per_byte": 0.8072265108555731, "num_chars": 17}, {"sum_logits": -11.497220039367676, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -20.413619995117188, "logits_per_token": -2.874305009841919, "logits_per_char": -0.7185762524604797, "bits_per_byte": 1.0366863959260546, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 615, "native_id": "MEA_2011_8_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.518213272094727, "logits_per_token_corr": -1.8518213272094726, "logits_per_char_corr": -0.4306561226068541, "bits_per_byte_corr": 0.6213054524138242}, "model_output": [{"sum_logits": -18.518213272094727, "num_tokens": 10, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -41.794410705566406, "logits_per_token": -1.8518213272094726, "logits_per_char": -0.4306561226068541, "bits_per_byte": 0.6213054524138242, "num_chars": 43}, {"sum_logits": -19.330415725708008, "num_tokens": 9, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -41.392154693603516, "logits_per_token": -2.147823969523112, "logits_per_char": -0.39449828011648996, "bits_per_byte": 0.5691407123636941, "num_chars": 49}, {"sum_logits": -26.850730895996094, "num_tokens": 12, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -50.75537109375, "logits_per_token": -2.2375609079996743, "logits_per_char": -0.497235757333261, "bits_per_byte": 0.717359561257879, "num_chars": 54}, {"sum_logits": -22.410388946533203, "num_tokens": 8, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -43.789093017578125, "logits_per_token": -2.8012986183166504, "logits_per_char": -0.6056861877441406, "bits_per_byte": 0.8738204593940411, "num_chars": 37}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 616, "native_id": "TIMSS_2003_4_pg82", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -38.35445022583008, "logits_per_token_corr": -3.835445022583008, "logits_per_char_corr": -1.0366067628602724, "bits_per_byte_corr": 1.4955074361315512}, "model_output": [{"sum_logits": -38.35445022583008, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -43.853328704833984, "logits_per_token": -3.835445022583008, "logits_per_char": -1.0366067628602724, "bits_per_byte": 1.4955074361315512, "num_chars": 37}, {"sum_logits": -36.860313415527344, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.661163330078125, "logits_per_token": -2.835408724271334, "logits_per_char": -0.6825983965838397, "bits_per_byte": 0.984781321670971, "num_chars": 54}, {"sum_logits": -22.12026023864746, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -27.33655548095703, "logits_per_token": -2.457806693183051, "logits_per_char": -0.5027331872419878, "bits_per_byte": 0.7252906761248396, "num_chars": 44}, {"sum_logits": -21.186677932739258, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -26.659616470336914, "logits_per_token": -2.3540753258599176, "logits_per_char": -0.623137586257037, "bits_per_byte": 0.8989975054851918, "num_chars": 34}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 617, "native_id": "CSZ30338", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.911166667938232, "logits_per_token_corr": -1.727791666984558, "logits_per_char_corr": -0.4607444445292155, "bits_per_byte_corr": 0.6647137252399168}, "model_output": [{"sum_logits": -6.09152364730835, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -16.666240692138672, "logits_per_token": -3.045761823654175, "logits_per_char": -0.4351088319505964, "bits_per_byte": 0.6277293541025658, "num_chars": 14}, {"sum_logits": -4.138120651245117, "num_tokens": 2, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -17.785240173339844, "logits_per_token": -2.0690603256225586, "logits_per_char": -0.3183169731727013, "bits_per_byte": 0.4592343186273713, "num_chars": 13}, {"sum_logits": -6.911166667938232, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.23699188232422, "logits_per_token": -1.727791666984558, "logits_per_char": -0.4607444445292155, "bits_per_byte": 0.6647137252399168, "num_chars": 15}, {"sum_logits": -8.670467376708984, "num_tokens": 4, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.7362117767334, "logits_per_token": -2.167616844177246, "logits_per_char": -0.5780311584472656, "bits_per_byte": 0.8339226857717719, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 618, "native_id": "TIMSS_2003_8_pg85", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.039722442626953, "logits_per_token_corr": -2.0079444885253905, "logits_per_char_corr": -0.4015888977050781, "bits_per_byte_corr": 0.5793703111955977}, "model_output": [{"sum_logits": -10.42176342010498, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.442935943603516, "logits_per_token": -2.0843526840209963, "logits_per_char": -0.37220583643232075, "bits_per_byte": 0.5369795144112236, "num_chars": 28}, {"sum_logits": -10.039722442626953, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.5059814453125, "logits_per_token": -2.0079444885253905, "logits_per_char": -0.4015888977050781, "bits_per_byte": 0.5793703111955977, "num_chars": 25}, {"sum_logits": -10.235912322998047, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -26.02695655822754, "logits_per_token": -3.411970774332682, "logits_per_char": -0.6397445201873779, "bits_per_byte": 0.9229562467108826, "num_chars": 16}, {"sum_logits": -8.980188369750977, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -24.358741760253906, "logits_per_token": -2.9933961232503257, "logits_per_char": -0.4081903804432262, "bits_per_byte": 0.5888942376044448, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 619, "native_id": "Mercury_7221988", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.564107894897461, "logits_per_token_corr": -1.794872556413923, "logits_per_char_corr": -0.3589745112827846, "bits_per_byte_corr": 0.5178907472335847}, "model_output": [{"sum_logits": -15.417457580566406, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.754093170166016, "logits_per_token": -3.0834915161132814, "logits_per_char": -0.7007935263893821, "bits_per_byte": 1.0110313452097768, "num_chars": 22}, {"sum_logits": -17.91988754272461, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.024078369140625, "logits_per_token": -2.986647923787435, "logits_per_char": -0.5780608884749874, "bits_per_byte": 0.8339655771353316, "num_chars": 31}, {"sum_logits": -12.564107894897461, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -29.20097541809082, "logits_per_token": -1.794872556413923, "logits_per_char": -0.3589745112827846, "bits_per_byte": 0.5178907472335847, "num_chars": 35}, {"sum_logits": -16.478107452392578, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -30.40468978881836, "logits_per_token": -2.354015350341797, "logits_per_char": -0.47080307006835936, "bits_per_byte": 0.6792252544234092, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 620, "native_id": "NCEOGA_2013_5_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.940151214599609, "logits_per_token_corr": -3.4700756072998047, "logits_per_char_corr": -0.5338577857384315, "bits_per_byte_corr": 0.7701939800253512}, "model_output": [{"sum_logits": -7.138471603393555, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.91162395477295, "logits_per_token": -7.138471603393555, "logits_per_char": -0.5098908288138253, "bits_per_byte": 0.7356169701249976, "num_chars": 14}, {"sum_logits": -6.787703514099121, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -19.40489959716797, "logits_per_token": -3.3938517570495605, "logits_per_char": -0.5656419595082601, "bits_per_byte": 0.816048849901869, "num_chars": 12}, {"sum_logits": -5.258581161499023, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.351346969604492, "logits_per_token": -2.6292905807495117, "logits_per_char": -0.35057207743326824, "bits_per_byte": 0.5057685975874812, "num_chars": 15}, {"sum_logits": -6.940151214599609, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.863035202026367, "logits_per_token": -3.4700756072998047, "logits_per_char": -0.5338577857384315, "bits_per_byte": 0.7701939800253512, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 621, "native_id": "MCAS_2013_8_29416", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.617266654968262, "logits_per_token_corr": -2.3234533309936523, "logits_per_char_corr": -0.43026913536919487, "bits_per_byte_corr": 0.6207471478451655}, "model_output": [{"sum_logits": -13.32481575012207, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -25.288860321044922, "logits_per_token": -2.664963150024414, "logits_per_char": -0.888321050008138, "bits_per_byte": 1.2815763735649384, "num_chars": 15}, {"sum_logits": -11.617266654968262, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -25.600980758666992, "logits_per_token": -2.3234533309936523, "logits_per_char": -0.43026913536919487, "bits_per_byte": 0.6207471478451655, "num_chars": 27}, {"sum_logits": -13.803829193115234, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -26.466754913330078, "logits_per_token": -2.7607658386230467, "logits_per_char": -0.8627393245697021, "bits_per_byte": 1.2446697451374973, "num_chars": 16}, {"sum_logits": -12.497984886169434, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -27.34178924560547, "logits_per_token": -2.4995969772338866, "logits_per_char": -0.44635660307747976, "bits_per_byte": 0.6439564577283862, "num_chars": 28}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 622, "native_id": "Mercury_SC_401142", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.86559772491455, "logits_per_token_corr": -2.9551992416381836, "logits_per_char_corr": -0.6819690557626578, "bits_per_byte_corr": 0.9838733747892221}, "model_output": [{"sum_logits": -15.936128616333008, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -20.449508666992188, "logits_per_token": -5.312042872111003, "logits_per_char": -1.5936128616333007, "bits_per_byte": 2.2990973725768846, "num_chars": 10}, {"sum_logits": -8.86559772491455, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.262109756469727, "logits_per_token": -2.9551992416381836, "logits_per_char": -0.6819690557626578, "bits_per_byte": 0.9838733747892221, "num_chars": 13}, {"sum_logits": -7.641745567321777, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -26.41425323486328, "logits_per_token": -2.5472485224405923, "logits_per_char": -0.5094497044881184, "bits_per_byte": 0.7349805622478844, "num_chars": 15}, {"sum_logits": -15.523860931396484, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -25.00912094116211, "logits_per_token": -5.174620310465495, "logits_per_char": -1.108847209385463, "bits_per_byte": 1.599728370085123, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 623, "native_id": "Mercury_7206395", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.510881423950195, "logits_per_token_corr": -1.6123201582166884, "logits_per_char_corr": -0.3818653006302683, "bits_per_byte_corr": 0.550915175507257}, "model_output": [{"sum_logits": -18.719085693359375, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -31.63791847229004, "logits_per_token": -2.6741550990513394, "logits_per_char": -0.4565630656916921, "bits_per_byte": 0.6586812707269394, "num_chars": 41}, {"sum_logits": -15.608635902404785, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -32.42532730102539, "logits_per_token": -2.229805128914969, "logits_per_char": -0.38069843664401914, "bits_per_byte": 0.5492317466209022, "num_chars": 41}, {"sum_logits": -14.510881423950195, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -37.82709884643555, "logits_per_token": -1.6123201582166884, "logits_per_char": -0.3818653006302683, "bits_per_byte": 0.550915175507257, "num_chars": 38}, {"sum_logits": -12.941385269165039, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -24.618114471435547, "logits_per_token": -1.6176731586456299, "logits_per_char": -0.323534631729126, "bits_per_byte": 0.46676180875178247, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 624, "native_id": "Mercury_179025", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.786910057067871, "logits_per_token_corr": -1.5956366856892903, "logits_per_char_corr": -0.5318788952297635, "bits_per_byte_corr": 0.7673390445020316}, "model_output": [{"sum_logits": -2.7757041454315186, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.311904907226562, "logits_per_token": -1.3878520727157593, "logits_per_char": -0.3084115717146132, "bits_per_byte": 0.444943845065763, "num_chars": 9}, {"sum_logits": -4.786910057067871, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.35372543334961, "logits_per_token": -1.5956366856892903, "logits_per_char": -0.5318788952297635, "bits_per_byte": 0.7673390445020316, "num_chars": 9}, {"sum_logits": -3.4963254928588867, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -17.811382293701172, "logits_per_token": -1.1654418309529622, "logits_per_char": -0.34963254928588866, "bits_per_byte": 0.50441314498848, "num_chars": 10}, {"sum_logits": -5.091763496398926, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -21.729215621948242, "logits_per_token": -1.6972544987996419, "logits_per_char": -0.5091763496398926, "bits_per_byte": 0.7345861945639457, "num_chars": 10}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 625, "native_id": "Mercury_7130620", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.3211402893066406, "logits_per_token_corr": -3.3211402893066406, "logits_per_char_corr": -0.6642280578613281, "bits_per_byte_corr": 0.958278525096534}, "model_output": [{"sum_logits": -2.8832626342773438, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -13.955784797668457, "logits_per_token": -2.8832626342773438, "logits_per_char": -0.36040782928466797, "bits_per_byte": 0.5199585880069202, "num_chars": 8}, {"sum_logits": -3.3211402893066406, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -12.437054634094238, "logits_per_token": -3.3211402893066406, "logits_per_char": -0.6642280578613281, "bits_per_byte": 0.958278525096534, "num_chars": 5}, {"sum_logits": -7.031759262084961, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -13.809765815734863, "logits_per_token": -7.031759262084961, "logits_per_char": -1.1719598770141602, "bits_per_byte": 1.690780702690383, "num_chars": 6}, {"sum_logits": -2.8247432708740234, "num_tokens": 1, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -12.586496353149414, "logits_per_token": -2.8247432708740234, "logits_per_char": -0.40353475298200336, "bits_per_byte": 0.5821775869539073, "num_chars": 7}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 626, "native_id": "Mercury_177870", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.43378448486328, "logits_per_token_corr": -2.937087164984809, "logits_per_char_corr": -0.5624209464864528, "bits_per_byte_corr": 0.8114019103886655}, "model_output": [{"sum_logits": -20.037410736083984, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -33.62932586669922, "logits_per_token": -2.504676342010498, "logits_per_char": -0.4770812080019996, "bits_per_byte": 0.6882826928862954, "num_chars": 42}, {"sum_logits": -14.763188362121582, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.58989715576172, "logits_per_token": -1.8453985452651978, "logits_per_char": -0.3785432913364508, "bits_per_byte": 0.5461225291732761, "num_chars": 39}, {"sum_logits": -26.43378448486328, "num_tokens": 9, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -45.61751937866211, "logits_per_token": -2.937087164984809, "logits_per_char": -0.5624209464864528, "bits_per_byte": 0.8114019103886655, "num_chars": 47}, {"sum_logits": -14.025050163269043, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.10301971435547, "logits_per_token": -1.7531312704086304, "logits_per_char": -0.3420743942260742, "bits_per_byte": 0.4935090321654081, "num_chars": 41}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 627, "native_id": "Mercury_7282083", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.07895851135254, "logits_per_token_corr": -3.1348698139190674, "logits_per_char_corr": -0.7376164268044865, "bits_per_byte_corr": 1.0641555610298343}, "model_output": [{"sum_logits": -26.712154388427734, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.587501525878906, "logits_per_token": -3.339019298553467, "logits_per_char": -0.7632044110979352, "bits_per_byte": 1.101071219076364, "num_chars": 35}, {"sum_logits": -25.07895851135254, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.63062286376953, "logits_per_token": -3.1348698139190674, "logits_per_char": -0.7376164268044865, "bits_per_byte": 1.0641555610298343, "num_chars": 34}, {"sum_logits": -15.658590316772461, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -28.085960388183594, "logits_per_token": -2.60976505279541, "logits_per_char": -0.47450273687189276, "bits_per_byte": 0.6845627453738122, "num_chars": 33}, {"sum_logits": -17.10678482055664, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -31.40758514404297, "logits_per_token": -2.851130803426107, "logits_per_char": -0.4751884672376845, "bits_per_byte": 0.6855520451719276, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 628, "native_id": "Mercury_SC_400233", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.8565030097961426, "logits_per_token_corr": -0.9521676699320475, "logits_per_char_corr": -0.1680295888115378, "bits_per_byte_corr": 0.24241545450119137}, "model_output": [{"sum_logits": -3.047590494155884, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": true, "sum_logits_uncond": -19.411489486694336, "logits_per_token": -1.0158634980519612, "logits_per_char": -0.23443003801199105, "bits_per_byte": 0.3382110532755537, "num_chars": 13}, {"sum_logits": -8.461201667785645, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -25.937288284301758, "logits_per_token": -2.115300416946411, "logits_per_char": -0.5640801111857097, "bits_per_byte": 0.813795579072303, "num_chars": 15}, {"sum_logits": -2.8565030097961426, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -16.878002166748047, "logits_per_token": -0.9521676699320475, "logits_per_char": -0.1680295888115378, "bits_per_byte": 0.24241545450119137, "num_chars": 17}, {"sum_logits": -3.1865243911743164, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -19.78123664855957, "logits_per_token": -1.0621747970581055, "logits_per_char": -0.18744261124554804, "bits_per_byte": 0.2704225256954243, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 629, "native_id": "Mercury_7082443", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.922996520996094, "logits_per_token_corr": -3.2403745651245117, "logits_per_char_corr": -0.6646922184870794, "bits_per_byte_corr": 0.9589481673294817}, "model_output": [{"sum_logits": -6.732427597045898, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -19.406389236450195, "logits_per_token": -0.7480475107828776, "logits_per_char": -0.15656808365223018, "bits_per_byte": 0.22587999784672314, "num_chars": 43}, {"sum_logits": -25.922996520996094, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -36.21238327026367, "logits_per_token": -3.2403745651245117, "logits_per_char": -0.6646922184870794, "bits_per_byte": 0.9589481673294817, "num_chars": 39}, {"sum_logits": -25.2990665435791, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -37.22161102294922, "logits_per_token": -2.52990665435791, "logits_per_char": -0.6324766635894775, "bits_per_byte": 0.912470946039192, "num_chars": 40}, {"sum_logits": -12.13244915008545, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -22.421112060546875, "logits_per_token": -1.3480499055650499, "logits_per_char": -0.2888678369067964, "bits_per_byte": 0.41674819577805644, "num_chars": 42}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 630, "native_id": "NCEOGA_2013_8_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.877605438232422, "logits_per_token_corr": -3.3597006797790527, "logits_per_char_corr": -0.4799572399684361, "bits_per_byte_corr": 0.6924319299417144}, "model_output": [{"sum_logits": -17.027446746826172, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -37.55415725708008, "logits_per_token": -2.1284308433532715, "logits_per_char": -0.3274508989774264, "bits_per_byte": 0.4724117880897054, "num_chars": 52}, {"sum_logits": -26.877605438232422, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -45.023590087890625, "logits_per_token": -3.3597006797790527, "logits_per_char": -0.4799572399684361, "bits_per_byte": 0.6924319299417144, "num_chars": 56}, {"sum_logits": -16.335464477539062, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.31611633300781, "logits_per_token": -2.3336377825055803, "logits_per_char": -0.32030322504978553, "bits_per_byte": 0.4620998743603992, "num_chars": 51}, {"sum_logits": -14.482396125793457, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -40.94219207763672, "logits_per_token": -1.8102995157241821, "logits_per_char": -0.2896479225158691, "bits_per_byte": 0.4178736214177353, "num_chars": 50}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 631, "native_id": "Mercury_7210140", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.897655487060547, "logits_per_token_corr": -2.4330728318956165, "logits_per_char_corr": -0.45620115598042804, "bits_per_byte_corr": 0.6581591453812489}, "model_output": [{"sum_logits": -21.897655487060547, "num_tokens": 9, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -47.06108856201172, "logits_per_token": -2.4330728318956165, "logits_per_char": -0.45620115598042804, "bits_per_byte": 0.6581591453812489, "num_chars": 48}, {"sum_logits": -20.28866195678711, "num_tokens": 9, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -46.59940719604492, "logits_per_token": -2.2542957729763455, "logits_per_char": -0.42268045743306476, "bits_per_byte": 0.6097989998197992, "num_chars": 48}, {"sum_logits": -21.936426162719727, "num_tokens": 10, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -41.71107482910156, "logits_per_token": -2.1936426162719727, "logits_per_char": -0.42185434928307164, "bits_per_byte": 0.6086071776885653, "num_chars": 52}, {"sum_logits": -39.00872039794922, "num_tokens": 10, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -55.70882797241211, "logits_per_token": -3.9008720397949217, "logits_per_char": -0.7360135924141362, "bits_per_byte": 1.061843159803508, "num_chars": 53}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 632, "native_id": "Mercury_7106593", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.95199966430664, "logits_per_token_corr": -2.541090878573331, "logits_per_char_corr": -0.5823333263397217, "bits_per_byte_corr": 0.8401294020552944}, "model_output": [{"sum_logits": -18.991809844970703, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -31.626667022705078, "logits_per_token": -2.373976230621338, "logits_per_char": -0.5585826424991384, "bits_per_byte": 0.8058644082607386, "num_chars": 34}, {"sum_logits": -21.927038192749023, "num_tokens": 11, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -49.389060974121094, "logits_per_token": -1.9933671084317295, "logits_per_char": -0.39155425344194683, "bits_per_byte": 0.5648933796800829, "num_chars": 56}, {"sum_logits": -19.689228057861328, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -40.967041015625, "logits_per_token": -2.461153507232666, "logits_per_char": -0.4802250745819836, "bits_per_byte": 0.6928183336104581, "num_chars": 41}, {"sum_logits": -27.95199966430664, "num_tokens": 11, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -46.67538833618164, "logits_per_token": -2.541090878573331, "logits_per_char": -0.5823333263397217, "bits_per_byte": 0.8401294020552944, "num_chars": 48}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 633, "native_id": "Mercury_416536", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.530970573425293, "logits_per_token_corr": -5.530970573425293, "logits_per_char_corr": -0.9218284289042155, "bits_per_byte_corr": 1.3299173029315314}, "model_output": [{"sum_logits": -7.805570602416992, "num_tokens": 1, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -13.587088584899902, "logits_per_token": -7.805570602416992, "logits_per_char": -1.5611141204833985, "bits_per_byte": 2.252211599884753, "num_chars": 5}, {"sum_logits": -5.530970573425293, "num_tokens": 1, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -14.396956443786621, "logits_per_token": -5.530970573425293, "logits_per_char": -0.9218284289042155, "bits_per_byte": 1.3299173029315314, "num_chars": 6}, {"sum_logits": -8.576247215270996, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -17.346343994140625, "logits_per_token": -4.288123607635498, "logits_per_char": -1.0720309019088745, "bits_per_byte": 1.5466136658647671, "num_chars": 8}, {"sum_logits": -10.366279602050781, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -19.255990982055664, "logits_per_token": -5.183139801025391, "logits_per_char": -1.2957849502563477, "bits_per_byte": 1.869422521794728, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 634, "native_id": "Mercury_410026", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.635982513427734, "logits_per_token_corr": -4.817991256713867, "logits_per_char_corr": -0.4379992051558061, "bits_per_byte_corr": 0.6318992811920431}, "model_output": [{"sum_logits": -11.784957885742188, "num_tokens": 2, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -15.993900299072266, "logits_per_token": -5.892478942871094, "logits_per_char": -0.6547198825412326, "bits_per_byte": 0.9445611277143194, "num_chars": 18}, {"sum_logits": -7.866065979003906, "num_tokens": 2, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -18.658653259277344, "logits_per_token": -3.933032989501953, "logits_per_char": -0.437003665500217, "bits_per_byte": 0.6304630210679154, "num_chars": 18}, {"sum_logits": -12.990344047546387, "num_tokens": 2, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -21.452125549316406, "logits_per_token": -6.495172023773193, "logits_per_char": -0.5904701839793812, "bits_per_byte": 0.8518684062204591, "num_chars": 22}, {"sum_logits": -9.635982513427734, "num_tokens": 2, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -21.700550079345703, "logits_per_token": -4.817991256713867, "logits_per_char": -0.4379992051558061, "bits_per_byte": 0.6318992811920431, "num_chars": 22}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 635, "native_id": "ACTAAP_2011_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.016347885131836, "logits_per_token_corr": -1.7795942094590929, "logits_per_char_corr": -0.34818147576373554, "bits_per_byte_corr": 0.5023196884141029}, "model_output": [{"sum_logits": -13.574978828430176, "num_tokens": 10, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -32.842979431152344, "logits_per_token": -1.3574978828430175, "logits_per_char": -0.295108235400656, "bits_per_byte": 0.42575118773832515, "num_chars": 46}, {"sum_logits": -16.016347885131836, "num_tokens": 9, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -39.49128723144531, "logits_per_token": -1.7795942094590929, "logits_per_char": -0.34818147576373554, "bits_per_byte": 0.5023196884141029, "num_chars": 46}, {"sum_logits": -18.01056671142578, "num_tokens": 11, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -43.89805603027344, "logits_per_token": -1.637324246493253, "logits_per_char": -0.32746484929865055, "bits_per_byte": 0.4724319141489543, "num_chars": 55}, {"sum_logits": -21.598453521728516, "num_tokens": 11, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -41.17749786376953, "logits_per_token": -1.9634957747025923, "logits_per_char": -0.3856866700308664, "bits_per_byte": 0.5564282461909087, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 636, "native_id": "Mercury_417138", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.26340103149414, "logits_per_token_corr": -1.5694000937721946, "logits_per_char_corr": -0.35965418815612793, "bits_per_byte_corr": 0.5188713136881647}, "model_output": [{"sum_logits": -18.40460968017578, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -37.4101448059082, "logits_per_token": -1.6731463345614346, "logits_per_char": -0.39158744000374, "bits_per_byte": 0.564941257768206, "num_chars": 47}, {"sum_logits": -17.26340103149414, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -37.088279724121094, "logits_per_token": -1.5694000937721946, "logits_per_char": -0.35965418815612793, "bits_per_byte": 0.5188713136881647, "num_chars": 48}, {"sum_logits": -18.008790969848633, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -37.78938293457031, "logits_per_token": -1.637162815440785, "logits_per_char": -0.37518314520517987, "bits_per_byte": 0.5412748630130257, "num_chars": 48}, {"sum_logits": -19.087608337402344, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -38.57630920410156, "logits_per_token": -1.7352371215820312, "logits_per_char": -0.38954302729392537, "bits_per_byte": 0.561991793690224, "num_chars": 49}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 637, "native_id": "Mercury_7138915", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.252755165100098, "logits_per_token_corr": -1.0229777422818271, "logits_per_char_corr": -0.19401302008793272, "bits_per_byte_corr": 0.27990162194895246}, "model_output": [{"sum_logits": -11.537080764770508, "num_tokens": 11, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -47.63944625854492, "logits_per_token": -1.0488255240700461, "logits_per_char": -0.1989151855994915, "bits_per_byte": 0.28697395182210034, "num_chars": 58}, {"sum_logits": -15.630533218383789, "num_tokens": 11, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -51.24091720581055, "logits_per_token": -1.4209575653076172, "logits_per_char": -0.2694919520410998, "bits_per_byte": 0.3887947027694604, "num_chars": 58}, {"sum_logits": -14.185842514038086, "num_tokens": 11, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -48.3038444519043, "logits_per_token": -1.289622046730735, "logits_per_char": -0.24458349162134632, "bits_per_byte": 0.35285939044567716, "num_chars": 58}, {"sum_logits": -11.252755165100098, "num_tokens": 11, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -42.94003677368164, "logits_per_token": -1.0229777422818271, "logits_per_char": -0.19401302008793272, "bits_per_byte": 0.27990162194895246, "num_chars": 58}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 638, "native_id": "NYSEDREGENTS_2008_4_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.233927726745605, "logits_per_token_corr": -9.233927726745605, "logits_per_char_corr": -1.846785545349121, "bits_per_byte_corr": 2.664348347862511}, "model_output": [{"sum_logits": -5.939851760864258, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.809765815734863, "logits_per_token": -5.939851760864258, "logits_per_char": -0.9899752934773763, "bits_per_byte": 1.428232446503433, "num_chars": 6}, {"sum_logits": -4.39634895324707, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.348932266235352, "logits_per_token": -4.39634895324707, "logits_per_char": -0.6280498504638672, "bits_per_byte": 0.9060844046959272, "num_chars": 7}, {"sum_logits": -9.233927726745605, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.437054634094238, "logits_per_token": -9.233927726745605, "logits_per_char": -1.846785545349121, "bits_per_byte": 2.664348347862511, "num_chars": 5}, {"sum_logits": -6.795702934265137, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.172284126281738, "logits_per_token": -6.795702934265137, "logits_per_char": -1.1326171557108562, "bits_per_byte": 1.634021153770989, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 639, "native_id": "Mercury_404435", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.02527618408203, "logits_per_token_corr": -2.628159523010254, "logits_per_char_corr": -1.2367809520048254, "bits_per_byte_corr": 1.7842977461245746}, "model_output": [{"sum_logits": -23.939416885375977, "num_tokens": 7, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -29.543598175048828, "logits_per_token": -3.4199166979108537, "logits_per_char": -1.4082009932574104, "bits_per_byte": 2.0316045895488384, "num_chars": 17}, {"sum_logits": -18.56805419921875, "num_tokens": 5, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -25.35431671142578, "logits_per_token": -3.71361083984375, "logits_per_char": -1.856805419921875, "bits_per_byte": 2.678803971218963, "num_chars": 10}, {"sum_logits": -24.314071655273438, "num_tokens": 7, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -32.57758712768555, "logits_per_token": -3.4734388078962053, "logits_per_char": -1.7367194039481026, "bits_per_byte": 2.5055564714933642, "num_chars": 14}, {"sum_logits": -21.02527618408203, "num_tokens": 8, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -30.553483963012695, "logits_per_token": -2.628159523010254, "logits_per_char": -1.2367809520048254, "bits_per_byte": 1.7842977461245746, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 640, "native_id": "MDSA_2009_5_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.27113914489746, "logits_per_token_corr": -1.6059282620747883, "logits_per_char_corr": -0.26398820746434876, "bits_per_byte_corr": 0.3808544777622564}, "model_output": [{"sum_logits": -16.035003662109375, "num_tokens": 8, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -35.863548278808594, "logits_per_token": -2.004375457763672, "logits_per_char": -0.3644319014115767, "bits_per_byte": 0.525764096908595, "num_chars": 44}, {"sum_logits": -19.27113914489746, "num_tokens": 12, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -42.559898376464844, "logits_per_token": -1.6059282620747883, "logits_per_char": -0.26398820746434876, "bits_per_byte": 0.3808544777622564, "num_chars": 73}, {"sum_logits": -16.693845748901367, "num_tokens": 12, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -44.434207916259766, "logits_per_token": -1.3911538124084473, "logits_per_char": -0.2454977316014907, "bits_per_byte": 0.3541783599312148, "num_chars": 68}, {"sum_logits": -34.00606155395508, "num_tokens": 13, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -51.479820251464844, "logits_per_token": -2.615850888765775, "logits_per_char": -0.523170177753155, "bits_per_byte": 0.7547750209860165, "num_chars": 65}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 641, "native_id": "OHAT_2007_8_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.161325454711914, "logits_per_token_corr": -3.0230464935302734, "logits_per_char_corr": -0.492123847783998, "bits_per_byte_corr": 0.709984634701679}, "model_output": [{"sum_logits": -16.90199851989746, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.61256408691406, "logits_per_token": -4.225499629974365, "logits_per_char": -0.5121817733302261, "bits_per_byte": 0.7389221044177632, "num_chars": 33}, {"sum_logits": -19.55634117126465, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -35.0782356262207, "logits_per_token": -3.259390195210775, "logits_per_char": -0.5146405571385434, "bits_per_byte": 0.7424693796246432, "num_chars": 38}, {"sum_logits": -21.161325454711914, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -43.04189682006836, "logits_per_token": -3.0230464935302734, "logits_per_char": -0.492123847783998, "bits_per_byte": 0.709984634701679, "num_chars": 43}, {"sum_logits": -38.352046966552734, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -52.320796966552734, "logits_per_token": -3.8352046966552735, "logits_per_char": -0.6728429292377672, "bits_per_byte": 0.9707071573092279, "num_chars": 57}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 642, "native_id": "Mercury_LBS10302", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.158441066741943, "logits_per_token_corr": -1.0396102666854858, "logits_per_char_corr": -0.2970315047672817, "bits_per_byte_corr": 0.42852587891585164}, "model_output": [{"sum_logits": -7.4729695320129395, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.508949279785156, "logits_per_token": -1.8682423830032349, "logits_per_char": -0.6793608665466309, "bits_per_byte": 0.9801105531415574, "num_chars": 11}, {"sum_logits": -7.293760299682617, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -16.672985076904297, "logits_per_token": -3.6468801498413086, "logits_per_char": -0.7293760299682617, "bits_per_byte": 1.052267181379247, "num_chars": 10}, {"sum_logits": -4.158441066741943, "num_tokens": 4, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -21.761764526367188, "logits_per_token": -1.0396102666854858, "logits_per_char": -0.2970315047672817, "bits_per_byte": 0.42852587891585164, "num_chars": 14}, {"sum_logits": -7.800496578216553, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -25.268211364746094, "logits_per_token": -1.300082763036092, "logits_per_char": -0.4333609210120307, "bits_per_byte": 0.6252076516595797, "num_chars": 18}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 643, "native_id": "Mercury_7027248", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.399121284484863, "logits_per_token_corr": -5.199560642242432, "logits_per_char_corr": -0.4951962516421363, "bits_per_byte_corr": 0.7144171765114266}, "model_output": [{"sum_logits": -12.208593368530273, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -19.831016540527344, "logits_per_token": -6.104296684265137, "logits_per_char": -0.939122566810021, "bits_per_byte": 1.354867469924705, "num_chars": 13}, {"sum_logits": -10.399121284484863, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -18.150941848754883, "logits_per_token": -5.199560642242432, "logits_per_char": -0.4951962516421363, "bits_per_byte": 0.7144171765114266, "num_chars": 21}, {"sum_logits": -11.870720863342285, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.366445541381836, "logits_per_token": -5.935360431671143, "logits_per_char": -0.6247747822811729, "bits_per_byte": 0.9013594800701775, "num_chars": 19}, {"sum_logits": -17.309650421142578, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.19373893737793, "logits_per_token": -5.769883473714192, "logits_per_char": -0.7868022918701172, "bits_per_byte": 1.1351157646419043, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 644, "native_id": "Mercury_SC_401360", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.889495849609375, "logits_per_token_corr": -2.963165283203125, "logits_per_char_corr": -0.46786820261101975, "bits_per_byte_corr": 0.6749911356970358}, "model_output": [{"sum_logits": -8.889495849609375, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -21.186338424682617, "logits_per_token": -2.963165283203125, "logits_per_char": -0.46786820261101975, "bits_per_byte": 0.6749911356970358, "num_chars": 19}, {"sum_logits": -12.031397819519043, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -23.14968490600586, "logits_per_token": -4.010465939839681, "logits_per_char": -0.6332314641852128, "bits_per_byte": 0.9135598931155201, "num_chars": 19}, {"sum_logits": -10.391003608703613, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -19.027414321899414, "logits_per_token": -3.463667869567871, "logits_per_char": -0.6112355063943302, "bits_per_byte": 0.881826433890988, "num_chars": 17}, {"sum_logits": -12.07198429107666, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -20.89792823791504, "logits_per_token": -4.02399476369222, "logits_per_char": -0.7101167230045095, "bits_per_byte": 1.0244818747316635, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 645, "native_id": "ACTAAP_2013_5_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.527313232421875, "logits_per_token_corr": -2.2527313232421875, "logits_per_char_corr": -0.39521602162143643, "bits_per_byte_corr": 0.5701761944735213}, "model_output": [{"sum_logits": -22.527313232421875, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -37.578556060791016, "logits_per_token": -2.2527313232421875, "logits_per_char": -0.39521602162143643, "bits_per_byte": 0.5701761944735213, "num_chars": 57}, {"sum_logits": -54.98286437988281, "num_tokens": 13, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -60.151371002197266, "logits_per_token": -4.229451106144832, "logits_per_char": -0.8868203932239164, "bits_per_byte": 1.2794113834642638, "num_chars": 62}, {"sum_logits": -41.10838317871094, "num_tokens": 11, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -55.71630096435547, "logits_per_token": -3.7371257435191763, "logits_per_char": -0.6630384383663055, "bits_per_byte": 0.9565622669505188, "num_chars": 62}, {"sum_logits": -46.12894058227539, "num_tokens": 12, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -58.65254211425781, "logits_per_token": -3.8440783818562827, "logits_per_char": -0.7440151706818612, "bits_per_byte": 1.073386997089648, "num_chars": 62}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 646, "native_id": "Mercury_407125", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -30.251007080078125, "logits_per_token_corr": -2.5209172566731772, "logits_per_char_corr": -0.6050201416015625, "bits_per_byte_corr": 0.8728595579271397}, "model_output": [{"sum_logits": -25.172016143798828, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -32.984256744384766, "logits_per_token": -2.2883651039817114, "logits_per_char": -0.5720912759954279, "bits_per_byte": 0.825353246815036, "num_chars": 44}, {"sum_logits": -28.962175369262695, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -36.19648361206055, "logits_per_token": -3.620271921157837, "logits_per_char": -0.7063945212015291, "bits_per_byte": 1.019111872649312, "num_chars": 41}, {"sum_logits": -30.251007080078125, "num_tokens": 12, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -38.62314224243164, "logits_per_token": -2.5209172566731772, "logits_per_char": -0.6050201416015625, "bits_per_byte": 0.8728595579271397, "num_chars": 50}, {"sum_logits": -27.100431442260742, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -33.9067268371582, "logits_per_token": -2.7100431442260744, "logits_per_char": -0.5211621431203989, "bits_per_byte": 0.7518780393794038, "num_chars": 52}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 647, "native_id": "Mercury_404820", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.318968772888184, "logits_per_token_corr": -8.318968772888184, "logits_per_char_corr": -2.772989590962728, "bits_per_byte_corr": 4.000578331321517}, "model_output": [{"sum_logits": -2.70668625831604, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -8.91906452178955, "logits_per_token": -2.70668625831604, "logits_per_char": -0.9022287527720133, "bits_per_byte": 1.3016409473725534, "num_chars": 3}, {"sum_logits": -4.824304580688477, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -9.511491775512695, "logits_per_token": -4.824304580688477, "logits_per_char": -1.608101526896159, "bits_per_byte": 2.320000098100725, "num_chars": 3}, {"sum_logits": -5.245816230773926, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -10.320112228393555, "logits_per_token": -5.245816230773926, "logits_per_char": -1.7486054102579753, "bits_per_byte": 2.5227043538526046, "num_chars": 3}, {"sum_logits": -8.318968772888184, "num_tokens": 1, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -10.17769718170166, "logits_per_token": -8.318968772888184, "logits_per_char": -2.772989590962728, "bits_per_byte": 4.000578331321517, "num_chars": 3}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 648, "native_id": "Mercury_SC_416168", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.594446182250977, "logits_per_token_corr": -6.594446182250977, "logits_per_char_corr": -0.7327162424723307, "bits_per_byte_corr": 1.0570860893943863}, "model_output": [{"sum_logits": -9.132997512817383, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -15.486867904663086, "logits_per_token": -9.132997512817383, "logits_per_char": -1.5221662521362305, "bits_per_byte": 2.196021703367057, "num_chars": 6}, {"sum_logits": -6.594446182250977, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -13.7499361038208, "logits_per_token": -6.594446182250977, "logits_per_char": -0.7327162424723307, "bits_per_byte": 1.0570860893943863, "num_chars": 9}, {"sum_logits": -10.815441131591797, "num_tokens": 2, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -16.239093780517578, "logits_per_token": -5.407720565795898, "logits_per_char": -1.3519301414489746, "bits_per_byte": 1.9504229106981517, "num_chars": 8}, {"sum_logits": -5.405679702758789, "num_tokens": 1, "num_tokens_all": 173, "is_greedy": false, "sum_logits_uncond": -10.831206321716309, "logits_per_token": -5.405679702758789, "logits_per_char": -1.0811359405517578, "bits_per_byte": 1.5597494599619666, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 649, "native_id": "TIMSS_1995_8_K18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.999032020568848, "logits_per_token_corr": -2.2855760029384067, "logits_per_char_corr": -0.36361436410383746, "bits_per_byte_corr": 0.5245846398889771}, "model_output": [{"sum_logits": -15.999032020568848, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.20998001098633, "logits_per_token": -2.2855760029384067, "logits_per_char": -0.36361436410383746, "bits_per_byte": 0.5245846398889771, "num_chars": 44}, {"sum_logits": -28.2778377532959, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -39.64554977416992, "logits_per_token": -4.0396911076137, "logits_per_char": -0.6147356033325195, "bits_per_byte": 0.886876006386348, "num_chars": 46}, {"sum_logits": -17.100692749023438, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -33.73173141479492, "logits_per_token": -2.8501154581705728, "logits_per_char": -0.41709006704935214, "bits_per_byte": 0.6017337713365779, "num_chars": 41}, {"sum_logits": -21.250438690185547, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.810649871826172, "logits_per_token": -3.0357769557407925, "logits_per_char": -0.6439526875813802, "bits_per_byte": 0.9290273489414447, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 650, "native_id": "Mercury_SC_405130", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.0314531326293945, "logits_per_token_corr": -1.2062906265258788, "logits_per_char_corr": -0.2513105471928914, "bits_per_byte_corr": 0.36256448015853676}, "model_output": [{"sum_logits": -6.0314531326293945, "num_tokens": 5, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -28.278614044189453, "logits_per_token": -1.2062906265258788, "logits_per_char": -0.2513105471928914, "bits_per_byte": 0.36256448015853676, "num_chars": 24}, {"sum_logits": -12.645870208740234, "num_tokens": 6, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -35.1705207824707, "logits_per_token": -2.107645034790039, "logits_per_char": -0.5058348083496094, "bits_per_byte": 0.729765369515525, "num_chars": 25}, {"sum_logits": -5.752382278442383, "num_tokens": 5, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -32.030487060546875, "logits_per_token": -1.1504764556884766, "logits_per_char": -0.2300952911376953, "bits_per_byte": 0.33195733545649375, "num_chars": 25}, {"sum_logits": -9.257142066955566, "num_tokens": 5, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -33.16924285888672, "logits_per_token": -1.8514284133911132, "logits_per_char": -0.37028568267822265, "bits_per_byte": 0.53420931811244, "num_chars": 25}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 651, "native_id": "Mercury_SC_408631", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.538164138793945, "logits_per_token_corr": -2.0538164138793946, "logits_per_char_corr": -0.43698221571902013, "bits_per_byte_corr": 0.6304320755749545}, "model_output": [{"sum_logits": -24.995502471923828, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -35.329708099365234, "logits_per_token": -2.777278052435981, "logits_per_char": -0.6248875617980957, "bits_per_byte": 0.901522186519956, "num_chars": 40}, {"sum_logits": -18.406938552856445, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -29.025455474853516, "logits_per_token": -2.3008673191070557, "logits_per_char": -0.44894972080137674, "bits_per_byte": 0.6476975358090963, "num_chars": 41}, {"sum_logits": -20.538164138793945, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -36.238487243652344, "logits_per_token": -2.0538164138793946, "logits_per_char": -0.43698221571902013, "bits_per_byte": 0.6304320755749545, "num_chars": 47}, {"sum_logits": -33.87446212768555, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -42.83702087402344, "logits_per_token": -3.3874462127685545, "logits_per_char": -0.6774892425537109, "bits_per_byte": 0.977410370488561, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 652, "native_id": "Mercury_SC_408763", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.906279563903809, "logits_per_token_corr": -3.302093187967936, "logits_per_char_corr": -0.7620215049156775, "bits_per_byte_corr": 1.0993646461933826}, "model_output": [{"sum_logits": -8.945525169372559, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -21.656862258911133, "logits_per_token": -4.472762584686279, "logits_per_char": -0.6881173207209661, "bits_per_byte": 0.9927434461546514, "num_chars": 13}, {"sum_logits": -7.100321292877197, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -20.713035583496094, "logits_per_token": -3.5501606464385986, "logits_per_char": -0.645483753897927, "bits_per_byte": 0.9312362107236004, "num_chars": 11}, {"sum_logits": -9.906279563903809, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -22.440162658691406, "logits_per_token": -3.302093187967936, "logits_per_char": -0.7620215049156775, "bits_per_byte": 1.0993646461933826, "num_chars": 13}, {"sum_logits": -10.207073211669922, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -20.47861671447754, "logits_per_token": -5.103536605834961, "logits_per_char": -0.6004160712747013, "bits_per_byte": 0.8662172884986683, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 653, "native_id": "MCAS_8_2015_18", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.742033958435059, "logits_per_token_corr": -2.948406791687012, "logits_per_char_corr": -0.546001257719817, "bits_per_byte_corr": 0.7877133068320827}, "model_output": [{"sum_logits": -14.742033958435059, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -24.026165008544922, "logits_per_token": -2.948406791687012, "logits_per_char": -0.546001257719817, "bits_per_byte": 0.7877133068320827, "num_chars": 27}, {"sum_logits": -14.894903182983398, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.123414993286133, "logits_per_token": -2.9789806365966798, "logits_per_char": -0.5319608279636928, "bits_per_byte": 0.767457248450958, "num_chars": 28}, {"sum_logits": -11.339771270751953, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.64518928527832, "logits_per_token": -2.2679542541503905, "logits_per_char": -0.3910265955431708, "bits_per_byte": 0.5641321302462323, "num_chars": 29}, {"sum_logits": -17.0577335357666, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.704973220825195, "logits_per_token": -3.4115467071533203, "logits_per_char": -0.5169010162353516, "bits_per_byte": 0.745730532753743, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 654, "native_id": "Mercury_411729", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.9573974609375, "logits_per_token_corr": -1.5914794921875, "logits_per_char_corr": -0.7233997691761364, "bits_per_byte_corr": 1.0436452595713825}, "model_output": [{"sum_logits": -7.917100429534912, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -21.12757110595703, "logits_per_token": -1.979275107383728, "logits_per_char": -0.7197364026849921, "bits_per_byte": 1.0383601389016461, "num_chars": 11}, {"sum_logits": -7.9573974609375, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -21.278423309326172, "logits_per_token": -1.5914794921875, "logits_per_char": -0.7233997691761364, "bits_per_byte": 1.0436452595713825, "num_chars": 11}, {"sum_logits": -7.814153671264648, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -18.775365829467773, "logits_per_token": -1.5628307342529297, "logits_per_char": -0.6010887439434345, "bits_per_byte": 0.8671877500219919, "num_chars": 13}, {"sum_logits": -7.797416687011719, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -20.89188003540039, "logits_per_token": -1.5594833374023438, "logits_per_char": -0.599801283616286, "bits_per_byte": 0.8653303373926722, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 655, "native_id": "MDSA_2012_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.402124404907227, "logits_per_token_corr": -3.8007081349690757, "logits_per_char_corr": -0.9501770337422689, "bits_per_byte_corr": 1.3708156945475414}, "model_output": [{"sum_logits": -5.405956268310547, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -17.417579650878906, "logits_per_token": -1.8019854227701824, "logits_per_char": -0.5405956268310547, "bits_per_byte": 0.7799146299559835, "num_chars": 10}, {"sum_logits": -5.84426212310791, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -16.25747299194336, "logits_per_token": -1.9480873743693035, "logits_per_char": -0.48702184359232586, "bits_per_byte": 0.7026239985557537, "num_chars": 12}, {"sum_logits": -11.402124404907227, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -17.064403533935547, "logits_per_token": -3.8007081349690757, "logits_per_char": -0.9501770337422689, "bits_per_byte": 1.3708156945475414, "num_chars": 12}, {"sum_logits": -3.796283006668091, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -14.488778114318848, "logits_per_token": -1.8981415033340454, "logits_per_char": -0.3163569172223409, "bits_per_byte": 0.4564065556279194, "num_chars": 12}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 656, "native_id": "MCAS_1999_8_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.32754135131836, "logits_per_token_corr": -1.6655082702636719, "logits_per_char_corr": -0.4626411861843533, "bits_per_byte_corr": 0.6674501450196336}, "model_output": [{"sum_logits": -17.958585739135742, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -26.319908142089844, "logits_per_token": -2.9930976231892905, "logits_per_char": -0.7808080756145975, "bits_per_byte": 1.1264679385760439, "num_chars": 23}, {"sum_logits": -8.32754135131836, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -22.572423934936523, "logits_per_token": -1.6655082702636719, "logits_per_char": -0.4626411861843533, "bits_per_byte": 0.6674501450196336, "num_chars": 18}, {"sum_logits": -15.671357154846191, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -26.101299285888672, "logits_per_token": -3.134271430969238, "logits_per_char": -0.7835678577423095, "bits_per_byte": 1.130449462565631, "num_chars": 20}, {"sum_logits": -15.259676933288574, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.831748962402344, "logits_per_token": -2.543279488881429, "logits_per_char": -0.47686490416526794, "bits_per_byte": 0.6879706324137171, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 657, "native_id": "WASL_2004_8_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.554004669189453, "logits_per_token_corr": -3.4442505836486816, "logits_per_char_corr": -0.6720488943704744, "bits_per_byte_corr": 0.9695616071438908}, "model_output": [{"sum_logits": -19.957351684570312, "num_tokens": 6, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -29.42120361328125, "logits_per_token": -3.3262252807617188, "logits_per_char": -0.5702100481305804, "bits_per_byte": 0.8226392087036365, "num_chars": 35}, {"sum_logits": -21.467430114746094, "num_tokens": 6, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -29.505098342895508, "logits_per_token": -3.577905019124349, "logits_per_char": -0.6505281852953362, "bits_per_byte": 0.9385137868847525, "num_chars": 33}, {"sum_logits": -24.10030746459961, "num_tokens": 10, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -39.322021484375, "logits_per_token": -2.410030746459961, "logits_per_char": -0.5239197274912959, "bits_per_byte": 0.7558563926761327, "num_chars": 46}, {"sum_logits": -27.554004669189453, "num_tokens": 8, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -34.8559455871582, "logits_per_token": -3.4442505836486816, "logits_per_char": -0.6720488943704744, "bits_per_byte": 0.9695616071438908, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 658, "native_id": "Mercury_414365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.134278297424316, "logits_per_token_corr": -1.8763254710606165, "logits_per_char_corr": -0.35498049452498154, "bits_per_byte_corr": 0.5121285990638706}, "model_output": [{"sum_logits": -9.984737396240234, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -28.54011344909668, "logits_per_token": -1.6641228993733723, "logits_per_char": -0.3220883031045237, "bits_per_byte": 0.4646751976175715, "num_chars": 31}, {"sum_logits": -8.422181129455566, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -28.96706771850586, "logits_per_token": -1.403696854909261, "logits_per_char": -0.2477112096898696, "bits_per_byte": 0.35737173379243775, "num_chars": 34}, {"sum_logits": -13.134278297424316, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -30.79039192199707, "logits_per_token": -1.8763254710606165, "logits_per_char": -0.35498049452498154, "bits_per_byte": 0.5121285990638706, "num_chars": 37}, {"sum_logits": -19.370864868164062, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.229461669921875, "logits_per_token": -2.152318318684896, "logits_per_char": -0.38741729736328123, "bits_per_byte": 0.5589250136610122, "num_chars": 50}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 659, "native_id": "Mercury_SC_415406", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.231128692626953, "logits_per_token_corr": -1.2051881154378254, "logits_per_char_corr": -0.34433946155366446, "bits_per_byte_corr": 0.4967768335662045}, "model_output": [{"sum_logits": -10.757863998413086, "num_tokens": 5, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -31.584836959838867, "logits_per_token": -2.1515727996826173, "logits_per_char": -0.5662033683375308, "bits_per_byte": 0.8168587916357697, "num_chars": 19}, {"sum_logits": -7.231128692626953, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -33.665855407714844, "logits_per_token": -1.2051881154378254, "logits_per_char": -0.34433946155366446, "bits_per_byte": 0.4967768335662045, "num_chars": 21}, {"sum_logits": -14.023811340332031, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -29.699295043945312, "logits_per_token": -2.003401620047433, "logits_per_char": -0.7011905670166015, "bits_per_byte": 1.0116041537536982, "num_chars": 20}, {"sum_logits": -12.827263832092285, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -32.74183654785156, "logits_per_token": -2.1378773053487143, "logits_per_char": -0.6751191490574887, "bits_per_byte": 0.9739910483551156, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 660, "native_id": "MCAS_2000_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.1962890625, "logits_per_token_corr": -1.34423828125, "logits_per_char_corr": -0.252044677734375, "bits_per_byte_corr": 0.36362360665010096}, "model_output": [{"sum_logits": -30.564476013183594, "num_tokens": 18, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -43.432708740234375, "logits_per_token": -1.6980264451768663, "logits_per_char": -0.3183799584706624, "bits_per_byte": 0.4593251872043888, "num_chars": 96}, {"sum_logits": -24.1962890625, "num_tokens": 18, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -38.90659713745117, "logits_per_token": -1.34423828125, "logits_per_char": -0.252044677734375, "bits_per_byte": 0.36362360665010096, "num_chars": 96}, {"sum_logits": -28.578426361083984, "num_tokens": 18, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -43.539798736572266, "logits_per_token": -1.5876903533935547, "logits_per_char": -0.2976919412612915, "bits_per_byte": 0.42947868737058237, "num_chars": 96}, {"sum_logits": -34.42948913574219, "num_tokens": 18, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -46.291969299316406, "logits_per_token": -1.9127493964301214, "logits_per_char": -0.35864051183064777, "bits_per_byte": 0.5174088878803269, "num_chars": 96}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 661, "native_id": "Mercury_416230", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.910984992980957, "logits_per_token_corr": -1.1592487494150798, "logits_per_char_corr": -0.2959784041059778, "bits_per_byte_corr": 0.42700657581423057}, "model_output": [{"sum_logits": -12.863789558410645, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -28.54863166809082, "logits_per_token": -1.071982463200887, "logits_per_char": -0.27369765017894987, "bits_per_byte": 0.39486224261641695, "num_chars": 47}, {"sum_logits": -16.272680282592773, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -32.920616149902344, "logits_per_token": -1.3560566902160645, "logits_per_char": -0.3462272400551654, "bits_per_byte": 0.4995003222486187, "num_chars": 47}, {"sum_logits": -13.910984992980957, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -31.750612258911133, "logits_per_token": -1.1592487494150798, "logits_per_char": -0.2959784041059778, "bits_per_byte": 0.42700657581423057, "num_chars": 47}, {"sum_logits": -11.602404594421387, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -30.22358512878418, "logits_per_token": -0.9668670495351156, "logits_per_char": -0.24685967222173164, "bits_per_byte": 0.3561432249100231, "num_chars": 47}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 662, "native_id": "Mercury_7001295", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.890666961669922, "logits_per_token_corr": -1.555809565952846, "logits_per_char_corr": -0.27924787081204927, "bits_per_byte_corr": 0.4028695183996348}, "model_output": [{"sum_logits": -12.879167556762695, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.036861419677734, "logits_per_token": -1.609895944595337, "logits_per_char": -0.33023506555801785, "bits_per_byte": 0.4764284914085364, "num_chars": 39}, {"sum_logits": -27.80014419555664, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -43.8880615234375, "logits_per_token": -4.6333573659261065, "logits_per_char": -0.712824210142478, "bits_per_byte": 1.0283879529988842, "num_chars": 39}, {"sum_logits": -10.890666961669922, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -30.454914093017578, "logits_per_token": -1.555809565952846, "logits_per_char": -0.27924787081204927, "bits_per_byte": 0.4028695183996348, "num_chars": 39}, {"sum_logits": -13.00457763671875, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -35.3093147277832, "logits_per_token": -1.8577968052455358, "logits_per_char": -0.34222572728207235, "bits_per_byte": 0.49372735961481934, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 663, "native_id": "MSA_2012_5_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.3445024490356445, "logits_per_token_corr": -5.3445024490356445, "logits_per_char_corr": -0.4111155730027419, "bits_per_byte_corr": 0.5931143984037064}, "model_output": [{"sum_logits": -5.3445024490356445, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -5.3445024490356445, "logits_per_char": -0.4111155730027419, "bits_per_byte": 0.5931143984037064, "num_chars": 13}, {"sum_logits": -4.56691837310791, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -15.851914405822754, "logits_per_token": -4.56691837310791, "logits_per_char": -0.5708647966384888, "bits_per_byte": 0.823583811129026, "num_chars": 8}, {"sum_logits": -1.846251368522644, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": true, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -1.846251368522644, "logits_per_char": -0.15385428071022034, "bits_per_byte": 0.22196480780033284, "num_chars": 12}, {"sum_logits": -4.260157585144043, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -14.91162395477295, "logits_per_token": -4.260157585144043, "logits_per_char": -0.30429697036743164, "bits_per_byte": 0.4390077301069449, "num_chars": 14}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 664, "native_id": "MCAS_2005_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.827003479003906, "logits_per_token_corr": -2.0751821344549004, "logits_per_char_corr": -0.4004737452456826, "bits_per_byte_corr": 0.5777614862725914}, "model_output": [{"sum_logits": -30.180540084838867, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -41.49053955078125, "logits_per_token": -3.0180540084838867, "logits_per_char": -0.6560986974964971, "bits_per_byte": 0.9465503372125845, "num_chars": 46}, {"sum_logits": -22.586647033691406, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -32.10187911987305, "logits_per_token": -2.2586647033691407, "logits_per_char": -0.5019254896375868, "bits_per_byte": 0.7241254147964314, "num_chars": 45}, {"sum_logits": -22.827003479003906, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -41.750221252441406, "logits_per_token": -2.0751821344549004, "logits_per_char": -0.4004737452456826, "bits_per_byte": 0.5777614862725914, "num_chars": 57}, {"sum_logits": -31.08059310913086, "num_tokens": 13, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -46.07905578613281, "logits_per_token": -2.3908148545485277, "logits_per_char": -0.4933427477639819, "bits_per_byte": 0.7117431356581427, "num_chars": 63}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 665, "native_id": "Mercury_7206553", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.759992599487305, "logits_per_token_corr": -7.379996299743652, "logits_per_char_corr": -0.8682348587933708, "bits_per_byte_corr": 1.2525981251090255}, "model_output": [{"sum_logits": -9.92347240447998, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.44771957397461, "logits_per_token": -4.96173620223999, "logits_per_char": -0.496173620223999, "bits_per_byte": 0.7158272213176016, "num_chars": 20}, {"sum_logits": -9.422798156738281, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.607101440429688, "logits_per_token": -4.711399078369141, "logits_per_char": -0.52348878648546, "bits_per_byte": 0.7552346762240972, "num_chars": 18}, {"sum_logits": -10.293257713317871, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.806760787963867, "logits_per_token": -5.1466288566589355, "logits_per_char": -0.7352326938084194, "bits_per_byte": 1.0607165612576024, "num_chars": 14}, {"sum_logits": -14.759992599487305, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.998741149902344, "logits_per_token": -7.379996299743652, "logits_per_char": -0.8682348587933708, "bits_per_byte": 1.2525981251090255, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 666, "native_id": "VASoL_2010_3_39", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.961376190185547, "logits_per_token_corr": -3.423053741455078, "logits_per_char_corr": -0.5844238095167207, "bits_per_byte_corr": 0.8431453317678148}, "model_output": [{"sum_logits": -23.961376190185547, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.1231575012207, "logits_per_token": -3.423053741455078, "logits_per_char": -0.5844238095167207, "bits_per_byte": 0.8431453317678148, "num_chars": 41}, {"sum_logits": -25.2545166015625, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -37.3790283203125, "logits_per_token": -4.209086100260417, "logits_per_char": -0.6645925421463815, "bits_per_byte": 0.9588043647670629, "num_chars": 38}, {"sum_logits": -19.082393646240234, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -32.66571807861328, "logits_per_token": -3.816478729248047, "logits_per_char": -0.5782543529163707, "bits_per_byte": 0.8342446873255039, "num_chars": 33}, {"sum_logits": -27.144424438476562, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -43.58441162109375, "logits_per_token": -4.524070739746094, "logits_per_char": -0.6786106109619141, "bits_per_byte": 0.9790281631300864, "num_chars": 40}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 667, "native_id": "Mercury_416380", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.959644079208374, "logits_per_token_corr": -2.959644079208374, "logits_per_char_corr": -0.42280629702976774, "bits_per_byte_corr": 0.6099805479819103}, "model_output": [{"sum_logits": -2.959644079208374, "num_tokens": 1, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -2.959644079208374, "logits_per_char": -0.42280629702976774, "bits_per_byte": 0.6099805479819103, "num_chars": 7}, {"sum_logits": -3.0552213191986084, "num_tokens": 1, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -13.882975578308105, "logits_per_token": -3.0552213191986084, "logits_per_char": -0.339469035466512, "bits_per_byte": 0.4897502940032484, "num_chars": 9}, {"sum_logits": -2.6295835971832275, "num_tokens": 2, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.3147917985916138, "logits_per_char": -0.1753055731455485, "bits_per_byte": 0.252912481017462, "num_chars": 15}, {"sum_logits": -8.000123023986816, "num_tokens": 3, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -15.30787467956543, "logits_per_token": -2.666707674662272, "logits_per_char": -0.500007688999176, "bits_per_byte": 0.7213586133259806, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 668, "native_id": "OHAT_2008_5_34", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.464624404907227, "logits_per_token_corr": -0.8604204004461115, "logits_per_char_corr": -0.1971796751022339, "bits_per_byte_corr": 0.2844701394342942}, "model_output": [{"sum_logits": -22.668806076049805, "num_tokens": 10, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.3575325012207, "logits_per_token": -2.2668806076049806, "logits_per_char": -0.5812514378474309, "bits_per_byte": 0.8385685668926706, "num_chars": 39}, {"sum_logits": -20.94342803955078, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.31003952026367, "logits_per_token": -1.9039480035955256, "logits_per_char": -0.4986530485607329, "bits_per_byte": 0.7194042802832497, "num_chars": 42}, {"sum_logits": -9.464624404907227, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.21900749206543, "logits_per_token": -0.8604204004461115, "logits_per_char": -0.1971796751022339, "bits_per_byte": 0.2844701394342942, "num_chars": 48}, {"sum_logits": -33.55470275878906, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -41.08522415161133, "logits_per_token": -3.0504275235262783, "logits_per_char": -0.6579353482115502, "bits_per_byte": 0.9492000640910387, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 669, "native_id": "Mercury_7268328", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.529296875, "logits_per_token_corr": -2.1529296875, "logits_per_char_corr": -0.4305859375, "bits_per_byte_corr": 0.6212041967082215}, "model_output": [{"sum_logits": -19.73036003112793, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -37.419532775878906, "logits_per_token": -3.2883933385213218, "logits_per_char": -0.5803047067978803, "bits_per_byte": 0.8372027227024272, "num_chars": 34}, {"sum_logits": -21.755937576293945, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -43.50718688964844, "logits_per_token": -2.417326397365994, "logits_per_char": -0.4834652794731988, "bits_per_byte": 0.6974929611384817, "num_chars": 45}, {"sum_logits": -21.529296875, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -44.52738571166992, "logits_per_token": -2.1529296875, "logits_per_char": -0.4305859375, "bits_per_byte": 0.6212041967082215, "num_chars": 50}, {"sum_logits": -29.38529396057129, "num_tokens": 12, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -51.442710876464844, "logits_per_token": -2.448774496714274, "logits_per_char": -0.4897548993428548, "bits_per_byte": 0.7065669645335177, "num_chars": 60}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 670, "native_id": "NYSEDREGENTS_2008_8_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -30.461259841918945, "logits_per_token_corr": -2.538438320159912, "logits_per_char_corr": -0.5857934584984412, "bits_per_byte_corr": 0.8451213175615031}, "model_output": [{"sum_logits": -21.19546127319336, "num_tokens": 9, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.004070281982422, "logits_per_token": -2.3550512525770397, "logits_per_char": -0.6233959197998047, "bits_per_byte": 0.8993702020062383, "num_chars": 34}, {"sum_logits": -37.055084228515625, "num_tokens": 9, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -42.57434844970703, "logits_per_token": -4.11723158094618, "logits_per_char": -1.0014887629328548, "bits_per_byte": 1.4448428717902904, "num_chars": 37}, {"sum_logits": -30.461259841918945, "num_tokens": 12, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.73350524902344, "logits_per_token": -2.538438320159912, "logits_per_char": -0.5857934584984412, "bits_per_byte": 0.8451213175615031, "num_chars": 52}, {"sum_logits": -22.890697479248047, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.097064971923828, "logits_per_token": -2.861337184906006, "logits_per_char": -0.7153342962265015, "bits_per_byte": 1.032009241744512, "num_chars": 32}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 671, "native_id": "Mercury_SC_414156", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.16790008544922, "logits_per_token_corr": -2.106172735040838, "logits_per_char_corr": -0.4371301902914947, "bits_per_byte_corr": 0.6306455577568414}, "model_output": [{"sum_logits": -26.990724563598633, "num_tokens": 8, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -34.2536735534668, "logits_per_token": -3.373840570449829, "logits_per_char": -0.5997938791910807, "bits_per_byte": 0.8653196550651479, "num_chars": 45}, {"sum_logits": -23.890214920043945, "num_tokens": 10, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -35.394569396972656, "logits_per_token": -2.3890214920043946, "logits_per_char": -0.4875554065315091, "bits_per_byte": 0.703393767162116, "num_chars": 49}, {"sum_logits": -23.16790008544922, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -48.205928802490234, "logits_per_token": -2.106172735040838, "logits_per_char": -0.4371301902914947, "bits_per_byte": 0.6306455577568414, "num_chars": 53}, {"sum_logits": -26.61805534362793, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -45.07825469970703, "logits_per_token": -2.4198232130570845, "logits_per_char": -0.49292695080792465, "bits_per_byte": 0.7111432674516218, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 672, "native_id": "Mercury_7094133", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.443939208984375, "logits_per_token_corr": -2.407323201497396, "logits_per_char_corr": -0.4126839773995536, "bits_per_byte_corr": 0.5953771276490968}, "model_output": [{"sum_logits": -15.406017303466797, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.091690063476562, "logits_per_token": -5.135339101155599, "logits_per_char": -0.7703008651733398, "bits_per_byte": 1.1113092381788539, "num_chars": 20}, {"sum_logits": -10.110759735107422, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.929584503173828, "logits_per_token": -2.5276899337768555, "logits_per_char": -0.30638665863961884, "bits_per_byte": 0.44202251301423534, "num_chars": 33}, {"sum_logits": -7.731805801391602, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -20.731550216674805, "logits_per_token": -1.9329514503479004, "logits_per_char": -0.3361654696257218, "bits_per_byte": 0.48498425594748673, "num_chars": 23}, {"sum_logits": -14.443939208984375, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -31.06076431274414, "logits_per_token": -2.407323201497396, "logits_per_char": -0.4126839773995536, "bits_per_byte": 0.5953771276490968, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 673, "native_id": "MEA_2013_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.078531265258789, "logits_per_token_corr": -1.4397901807512556, "logits_per_char_corr": -1.007853126525879, "bits_per_byte_corr": 1.4540247075843673}, "model_output": [{"sum_logits": -8.1580810546875, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.483535766601562, "logits_per_token": -1.1654401506696428, "logits_per_char": -0.81580810546875, "bits_per_byte": 1.1769623080776317, "num_chars": 10}, {"sum_logits": -7.031640529632568, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -18.771488189697266, "logits_per_token": -1.0045200756617956, "logits_per_char": -0.6392400481484153, "bits_per_byte": 0.9222284474020035, "num_chars": 11}, {"sum_logits": -10.078531265258789, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -19.115842819213867, "logits_per_token": -1.4397901807512556, "logits_per_char": -1.007853126525879, "bits_per_byte": 1.4540247075843673, "num_chars": 10}, {"sum_logits": -9.325767517089844, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -18.98377799987793, "logits_per_token": -1.3322525024414062, "logits_per_char": -0.9325767517089844, "bits_per_byte": 1.3454238549398565, "num_chars": 10}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 674, "native_id": "OHAT_2010_8_35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.636137962341309, "logits_per_token_corr": -0.9393563270568848, "logits_per_char_corr": -0.18787126541137694, "bits_per_byte_corr": 0.2710409429347225}, "model_output": [{"sum_logits": -5.636137962341309, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -23.106691360473633, "logits_per_token": -0.9393563270568848, "logits_per_char": -0.18787126541137694, "bits_per_byte": 0.2710409429347225, "num_chars": 30}, {"sum_logits": -17.63335418701172, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.761306762695312, "logits_per_token": -2.938892364501953, "logits_per_char": -0.6297626495361328, "bits_per_byte": 0.9085554514235258, "num_chars": 28}, {"sum_logits": -7.331084728240967, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -25.15652847290039, "logits_per_token": -1.8327711820602417, "logits_per_char": -0.30546186367670697, "bits_per_byte": 0.4406883159074023, "num_chars": 24}, {"sum_logits": -16.29793930053711, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -27.115680694580078, "logits_per_token": -4.074484825134277, "logits_per_char": -0.7408154227516868, "bits_per_byte": 1.0687707366186874, "num_chars": 22}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 675, "native_id": "Mercury_SC_416174", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.347207546234131, "logits_per_token_corr": -6.347207546234131, "logits_per_char_corr": -1.0578679243723552, "bits_per_byte_corr": 1.5261808084085944}, "model_output": [{"sum_logits": -3.146564483642578, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.369638442993164, "logits_per_token": -1.573282241821289, "logits_per_char": -0.24204342181865984, "bits_per_byte": 0.34919484433782694, "num_chars": 13}, {"sum_logits": -7.164544582366943, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.7823486328125, "logits_per_token": -7.164544582366943, "logits_per_char": -1.0235063689095634, "bits_per_byte": 1.4766075627451578, "num_chars": 7}, {"sum_logits": -6.347207546234131, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -16.363441467285156, "logits_per_token": -6.347207546234131, "logits_per_char": -1.0578679243723552, "bits_per_byte": 1.5261808084085944, "num_chars": 6}, {"sum_logits": -8.869841575622559, "num_tokens": 1, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.91931438446045, "logits_per_token": -8.869841575622559, "logits_per_char": -1.4783069292704265, "bits_per_byte": 2.132746075771768, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 676, "native_id": "TIMSS_1995_8_J6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.328948974609375, "logits_per_token_corr": -1.8143276638454862, "logits_per_char_corr": -0.3628655327690972, "bits_per_byte_corr": 0.5235043046358843}, "model_output": [{"sum_logits": -9.958538055419922, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -29.51067543029785, "logits_per_token": -1.9917076110839844, "logits_per_char": -0.4329799154530401, "bits_per_byte": 0.6246579768290724, "num_chars": 23}, {"sum_logits": -16.328948974609375, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -40.6453971862793, "logits_per_token": -1.8143276638454862, "logits_per_char": -0.3628655327690972, "bits_per_byte": 0.5235043046358843, "num_chars": 45}, {"sum_logits": -33.770790100097656, "num_tokens": 14, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -53.120452880859375, "logits_per_token": -2.4121992928641185, "logits_per_char": -0.46261356301503637, "bits_per_byte": 0.6674102932102465, "num_chars": 73}, {"sum_logits": -25.264156341552734, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -53.12800216674805, "logits_per_token": -2.296741485595703, "logits_per_char": -0.43558890244056436, "bits_per_byte": 0.6284219494177202, "num_chars": 58}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 677, "native_id": "Mercury_SC_401587", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.544102668762207, "logits_per_token_corr": -4.544102668762207, "logits_per_char_corr": -0.6491575241088867, "bits_per_byte_corr": 0.9365363407883214}, "model_output": [{"sum_logits": -4.544102668762207, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.212084770202637, "logits_per_token": -4.544102668762207, "logits_per_char": -0.6491575241088867, "bits_per_byte": 0.9365363407883214, "num_chars": 7}, {"sum_logits": -7.895073890686035, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.747591018676758, "logits_per_token": -3.9475369453430176, "logits_per_char": -0.8772304322984483, "bits_per_byte": 1.2655759943947622, "num_chars": 9}, {"sum_logits": -7.732438087463379, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.11282730102539, "logits_per_token": -1.9331095218658447, "logits_per_char": -0.6443698406219482, "bits_per_byte": 0.9296291735643644, "num_chars": 12}, {"sum_logits": -2.5458478927612305, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.223102569580078, "logits_per_token": -0.8486159642537435, "logits_per_char": -0.18184627805437362, "bits_per_byte": 0.2623487235533488, "num_chars": 14}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 678, "native_id": "MDSA_2011_5_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.695270538330078, "logits_per_token_corr": -1.8695270538330078, "logits_per_char_corr": -0.4248925122347745, "bits_per_byte_corr": 0.6129903203124027}, "model_output": [{"sum_logits": -23.60970115661621, "num_tokens": 7, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -39.515140533447266, "logits_per_token": -3.372814450945173, "logits_per_char": -0.7869900385538737, "bits_per_byte": 1.1353866258515035, "num_chars": 30}, {"sum_logits": -24.75876235961914, "num_tokens": 10, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -41.749385833740234, "logits_per_token": -2.475876235961914, "logits_per_char": -0.5626991445367987, "bits_per_byte": 0.8118032653362848, "num_chars": 44}, {"sum_logits": -28.319114685058594, "num_tokens": 8, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -40.56797409057617, "logits_per_token": -3.539889335632324, "logits_per_char": -0.6436162428422407, "bits_per_byte": 0.9285419617847546, "num_chars": 44}, {"sum_logits": -18.695270538330078, "num_tokens": 10, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -39.749366760253906, "logits_per_token": -1.8695270538330078, "logits_per_char": -0.4248925122347745, "bits_per_byte": 0.6129903203124027, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 679, "native_id": "AIMS_2008_8_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.032078742980957, "logits_per_token_corr": -2.147439820425851, "logits_per_char_corr": -0.39558101955213043, "bits_per_byte_corr": 0.5707027751780687}, "model_output": [{"sum_logits": -15.063504219055176, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.20901870727539, "logits_per_token": -2.510584036509196, "logits_per_char": -0.4564698248198538, "bits_per_byte": 0.6585467525835301, "num_chars": 33}, {"sum_logits": -16.97034454345703, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.815622329711914, "logits_per_token": -3.3940689086914064, "logits_per_char": -0.5474304691437752, "bits_per_byte": 0.7897752230658106, "num_chars": 31}, {"sum_logits": -15.032078742980957, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.005924224853516, "logits_per_token": -2.147439820425851, "logits_per_char": -0.39558101955213043, "bits_per_byte": 0.5707027751780687, "num_chars": 38}, {"sum_logits": -22.540584564208984, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -38.940330505371094, "logits_per_token": -3.2200835091727122, "logits_per_char": -0.5779637067745893, "bits_per_byte": 0.833825373578102, "num_chars": 39}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 680, "native_id": "Mercury_7159215", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.077133178710938, "logits_per_token_corr": -3.0154266357421875, "logits_per_char_corr": -0.486359134797127, "bits_per_byte_corr": 0.7016679118633661}, "model_output": [{"sum_logits": -18.44728660583496, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -40.73114013671875, "logits_per_token": -2.30591082572937, "logits_per_char": -0.4730073488675631, "bits_per_byte": 0.6824053565157594, "num_chars": 39}, {"sum_logits": -14.95427131652832, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -35.22591781616211, "logits_per_token": -2.49237855275472, "logits_per_char": -0.41539642545912003, "bits_per_byte": 0.599290363013305, "num_chars": 36}, {"sum_logits": -14.532608032226562, "num_tokens": 5, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -37.66448974609375, "logits_per_token": -2.9065216064453123, "logits_per_char": -0.4403820615826231, "bits_per_byte": 0.6353370163421649, "num_chars": 33}, {"sum_logits": -15.077133178710938, "num_tokens": 5, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -35.439842224121094, "logits_per_token": -3.0154266357421875, "logits_per_char": -0.486359134797127, "bits_per_byte": 0.7016679118633661, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 681, "native_id": "MCAS_2006_9_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.764501094818115, "logits_per_token_corr": -1.9411252737045288, "logits_per_char_corr": -0.43136117193433976, "bits_per_byte_corr": 0.6223226235821706}, "model_output": [{"sum_logits": -10.560491561889648, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -20.26654815673828, "logits_per_token": -3.5201638539632163, "logits_per_char": -0.4800223437222568, "bits_per_byte": 0.6925258548044948, "num_chars": 22}, {"sum_logits": -7.764501094818115, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -22.333951950073242, "logits_per_token": -1.9411252737045288, "logits_per_char": -0.43136117193433976, "bits_per_byte": 0.6223226235821706, "num_chars": 18}, {"sum_logits": -12.270437240600586, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -19.26781463623047, "logits_per_token": -4.090145746866862, "logits_per_char": -0.6135218620300293, "bits_per_byte": 0.8851249478283221, "num_chars": 20}, {"sum_logits": -15.970894813537598, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -25.911582946777344, "logits_per_token": -3.9927237033843994, "logits_per_char": -0.9394644007963293, "bits_per_byte": 1.3553606321215597, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 682, "native_id": "MCAS_1999_4_27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.930541038513184, "logits_per_token_corr": -2.786108207702637, "logits_per_char_corr": -0.5804392099380493, "bits_per_byte_corr": 0.8373967697157333}, "model_output": [{"sum_logits": -13.930541038513184, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -28.274045944213867, "logits_per_token": -2.786108207702637, "logits_per_char": -0.5804392099380493, "bits_per_byte": 0.8373967697157333, "num_chars": 24}, {"sum_logits": -14.758769035339355, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -26.67344856262207, "logits_per_token": -2.459794839223226, "logits_per_char": -0.3599699764716916, "bits_per_byte": 0.5193268999249994, "num_chars": 41}, {"sum_logits": -23.36756134033203, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.452674865722656, "logits_per_token": -3.3382230486188615, "logits_per_char": -0.47688900694555164, "bits_per_byte": 0.6880054053753041, "num_chars": 49}, {"sum_logits": -21.528711318969727, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.117931365966797, "logits_per_token": -3.588118553161621, "logits_per_char": -0.6523851914839311, "bits_per_byte": 0.9411928805039405, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 683, "native_id": "Mercury_7016538", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.272489070892334, "logits_per_token_corr": -1.636244535446167, "logits_per_char_corr": -0.3636098967658149, "bits_per_byte_corr": 0.524578194882566}, "model_output": [{"sum_logits": -6.138629913330078, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -16.582656860351562, "logits_per_token": -3.069314956665039, "logits_per_char": -1.023104985555013, "bits_per_byte": 1.4760284889700521, "num_chars": 6}, {"sum_logits": -4.382723808288574, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -16.772687911987305, "logits_per_token": -2.191361904144287, "logits_per_char": -0.626103401184082, "bits_per_byte": 0.9032762719726373, "num_chars": 7}, {"sum_logits": -3.272489070892334, "num_tokens": 2, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -17.292133331298828, "logits_per_token": -1.636244535446167, "logits_per_char": -0.3636098967658149, "bits_per_byte": 0.524578194882566, "num_chars": 9}, {"sum_logits": -4.782244682312012, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -18.027198791503906, "logits_per_token": -1.5940815607706706, "logits_per_char": -0.28130851072423596, "bits_per_byte": 0.40584239338200656, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 684, "native_id": "Mercury_SC_409266", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.941787242889404, "logits_per_token_corr": -1.388357448577881, "logits_per_char_corr": -0.36535722330996867, "bits_per_byte_corr": 0.527099054222632}, "model_output": [{"sum_logits": -6.941787242889404, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -23.275787353515625, "logits_per_token": -1.388357448577881, "logits_per_char": -0.36535722330996867, "bits_per_byte": 0.527099054222632, "num_chars": 19}, {"sum_logits": -9.597350120544434, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -23.973567962646484, "logits_per_token": -1.9194700241088867, "logits_per_char": -0.41727609219758405, "bits_per_byte": 0.6020021488954129, "num_chars": 23}, {"sum_logits": -16.16109275817871, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -29.793622970581055, "logits_per_token": -3.232218551635742, "logits_per_char": -0.6464437103271484, "bits_per_byte": 0.9326211351035086, "num_chars": 25}, {"sum_logits": -16.944808959960938, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -28.43818473815918, "logits_per_token": -3.3889617919921875, "logits_per_char": -0.6275855170355903, "bits_per_byte": 0.9054145131616327, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 685, "native_id": "OHAT_2007_5_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.74316120147705, "logits_per_token_corr": -3.6857903003692627, "logits_per_char_corr": -1.474316120147705, "bits_per_byte_corr": 2.1269885552412795}, "model_output": [{"sum_logits": -16.634418487548828, "num_tokens": 5, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -18.535306930541992, "logits_per_token": -3.3268836975097655, "logits_per_char": -1.1881727491106306, "bits_per_byte": 1.714170932862545, "num_chars": 14}, {"sum_logits": -14.74316120147705, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -15.715030670166016, "logits_per_token": -3.6857903003692627, "logits_per_char": -1.474316120147705, "bits_per_byte": 2.1269885552412795, "num_chars": 10}, {"sum_logits": -15.155174255371094, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -16.64655303955078, "logits_per_token": -3.7887935638427734, "logits_per_char": -1.5155174255371093, "bits_per_byte": 2.1864294742047674, "num_chars": 10}, {"sum_logits": -10.183247566223145, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -12.470272064208984, "logits_per_token": -5.091623783111572, "logits_per_char": -0.9257497787475586, "bits_per_byte": 1.3355746149041174, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 686, "native_id": "Mercury_7230073", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.317102432250977, "logits_per_token_corr": -2.329275608062744, "logits_per_char_corr": -0.6655073165893555, "bits_per_byte_corr": 0.9601241053194743}, "model_output": [{"sum_logits": -13.954211235046387, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -15.836193084716797, "logits_per_token": -4.651403745015462, "logits_per_char": -1.0734008642343373, "bits_per_byte": 1.5485901037179186, "num_chars": 13}, {"sum_logits": -9.317102432250977, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -17.50924301147461, "logits_per_token": -2.329275608062744, "logits_per_char": -0.6655073165893555, "bits_per_byte": 0.9601241053194743, "num_chars": 14}, {"sum_logits": -7.509658336639404, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -19.564382553100586, "logits_per_token": -1.877414584159851, "logits_per_char": -0.5006438891092936, "bits_per_byte": 0.7222764560698609, "num_chars": 15}, {"sum_logits": -10.66141128540039, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -21.980567932128906, "logits_per_token": -3.5538037618001304, "logits_per_char": -0.710760752360026, "bits_per_byte": 1.0254110126890548, "num_chars": 15}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 687, "native_id": "Mercury_7245840", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.819772720336914, "logits_per_token_corr": -4.909886360168457, "logits_per_char_corr": -0.5455429289076064, "bits_per_byte_corr": 0.7870520781276095}, "model_output": [{"sum_logits": -3.206176519393921, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.653596878051758, "logits_per_token": -1.6030882596969604, "logits_per_char": -0.21374510129292806, "bits_per_byte": 0.308368997649838, "num_chars": 15}, {"sum_logits": -9.819772720336914, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.517580032348633, "logits_per_token": -4.909886360168457, "logits_per_char": -0.5455429289076064, "bits_per_byte": 0.7870520781276095, "num_chars": 18}, {"sum_logits": -9.458101272583008, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.74452781677246, "logits_per_token": -3.152700424194336, "logits_per_char": -0.591131329536438, "bits_per_byte": 0.8528222376369314, "num_chars": 16}, {"sum_logits": -15.031949043273926, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -25.397428512573242, "logits_per_token": -5.010649681091309, "logits_per_char": -0.653563001881475, "bits_per_byte": 0.9428921017235856, "num_chars": 23}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 688, "native_id": "Mercury_SC_401788", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.8353590965271, "logits_per_token_corr": -2.611786365509033, "logits_per_char_corr": -0.5223572731018067, "bits_per_byte_corr": 0.7536022474767998}, "model_output": [{"sum_logits": -16.18194007873535, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -23.881258010864258, "logits_per_token": -5.393980026245117, "logits_per_char": -0.7705685751778739, "bits_per_byte": 1.1116954620747919, "num_chars": 21}, {"sum_logits": -7.8353590965271, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.722477912902832, "logits_per_token": -2.611786365509033, "logits_per_char": -0.5223572731018067, "bits_per_byte": 0.7536022474767998, "num_chars": 15}, {"sum_logits": -8.602310180664062, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.552635192871094, "logits_per_token": -2.867436726888021, "logits_per_char": -0.3740134861158288, "bits_per_byte": 0.5395874016452871, "num_chars": 23}, {"sum_logits": -14.6907377243042, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.686241149902344, "logits_per_token": -3.67268443107605, "logits_per_char": -0.5065771629070414, "bits_per_byte": 0.7308363607541142, "num_chars": 29}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 689, "native_id": "ACTAAP_2014_7_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.973692893981934, "logits_per_token_corr": -0.9956154823303223, "logits_per_char_corr": -0.22124788496229383, "bits_per_byte_corr": 0.3191932264425025}, "model_output": [{"sum_logits": -13.679479598999023, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.49067306518555, "logits_per_token": -2.2799132664998374, "logits_per_char": -0.5699783166249593, "bits_per_byte": 0.8223048908096591, "num_chars": 24}, {"sum_logits": -12.379486083984375, "num_tokens": 7, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.383834838867188, "logits_per_token": -1.7684980119977678, "logits_per_char": -0.495179443359375, "bits_per_byte": 0.714392927285241, "num_chars": 25}, {"sum_logits": -8.17622184753418, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.793087005615234, "logits_per_token": -1.635244369506836, "logits_per_char": -0.31447007105900693, "bits_per_byte": 0.4536844120251552, "num_chars": 26}, {"sum_logits": -5.973692893981934, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.427997589111328, "logits_per_token": -0.9956154823303223, "logits_per_char": -0.22124788496229383, "bits_per_byte": 0.3191932264425025, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 690, "native_id": "MCAS_2004_5_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.12255096435547, "logits_per_token_corr": -2.3032215663364957, "logits_per_char_corr": -0.520082289172757, "bits_per_byte_corr": 0.7503201394442555}, "model_output": [{"sum_logits": -17.204872131347656, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -38.10575485229492, "logits_per_token": -2.867478688557943, "logits_per_char": -0.6617258512056791, "bits_per_byte": 0.9546686039631471, "num_chars": 26}, {"sum_logits": -20.24240493774414, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.27851867675781, "logits_per_token": -3.37373415629069, "logits_per_char": -0.5953648511101218, "bits_per_byte": 0.8589299182167859, "num_chars": 34}, {"sum_logits": -26.805328369140625, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.3369255065918, "logits_per_token": -3.8293326241629466, "logits_per_char": -0.9573331560407367, "bits_per_byte": 1.3811397966995431, "num_chars": 28}, {"sum_logits": -16.12255096435547, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -34.64683532714844, "logits_per_token": -2.3032215663364957, "logits_per_char": -0.520082289172757, "bits_per_byte": 0.7503201394442555, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 691, "native_id": "NCEOGA_2013_8_7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.271673202514648, "logits_per_token_corr": -4.09055773417155, "logits_per_char_corr": -0.6817596223619249, "bits_per_byte_corr": 0.9835712262605881}, "model_output": [{"sum_logits": -12.271673202514648, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.58094596862793, "logits_per_token": -4.09055773417155, "logits_per_char": -0.6817596223619249, "bits_per_byte": 0.9835712262605881, "num_chars": 18}, {"sum_logits": -9.984766006469727, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -28.237668991088867, "logits_per_token": -1.9969532012939453, "logits_per_char": -0.3565987859453474, "bits_per_byte": 0.5144633000707473, "num_chars": 28}, {"sum_logits": -7.428909778594971, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -22.929763793945312, "logits_per_token": -2.4763032595316568, "logits_per_char": -0.2857272991767296, "bits_per_byte": 0.41221735756916117, "num_chars": 26}, {"sum_logits": -16.065898895263672, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -24.408594131469727, "logits_per_token": -5.355299631754558, "logits_per_char": -1.0710599263509115, "bits_per_byte": 1.5452128442424686, "num_chars": 15}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 692, "native_id": "LEAP__7_10339", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -56.35746765136719, "logits_per_token_corr": -2.3482278188069663, "logits_per_char_corr": -0.4657641954658445, "bits_per_byte_corr": 0.6719556950226945}, "model_output": [{"sum_logits": -56.35746765136719, "num_tokens": 24, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -70.67817687988281, "logits_per_token": -2.3482278188069663, "logits_per_char": -0.4657641954658445, "bits_per_byte": 0.6719556950226945, "num_chars": 121}, {"sum_logits": -67.62364196777344, "num_tokens": 24, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -79.72856140136719, "logits_per_token": -2.8176517486572266, "logits_per_char": -0.5453519513530116, "bits_per_byte": 0.7867765557566744, "num_chars": 124}, {"sum_logits": -57.31267547607422, "num_tokens": 24, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -74.0762939453125, "logits_per_token": -2.388028144836426, "logits_per_char": -0.47365847500887787, "bits_per_byte": 0.6833447329708281, "num_chars": 121}, {"sum_logits": -60.067230224609375, "num_tokens": 23, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -75.48916625976562, "logits_per_token": -2.611618705417799, "logits_per_char": -0.4964233902860279, "bits_per_byte": 0.7161875633474534, "num_chars": 121}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 693, "native_id": "Mercury_7018270", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.837532043457031, "logits_per_token_corr": -5.279177347819011, "logits_per_char_corr": -1.4397756403142756, "bits_per_byte_corr": 2.07715717627563}, "model_output": [{"sum_logits": -10.67058277130127, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.23455238342285, "logits_per_token": -5.335291385650635, "logits_per_char": -1.778430461883545, "bits_per_byte": 2.5657328079271022, "num_chars": 6}, {"sum_logits": -6.4117913246154785, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.397303581237793, "logits_per_token": -3.2058956623077393, "logits_per_char": -0.9159701892307827, "bits_per_byte": 1.3214656496063248, "num_chars": 7}, {"sum_logits": -15.837532043457031, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -21.7208251953125, "logits_per_token": -5.279177347819011, "logits_per_char": -1.4397756403142756, "bits_per_byte": 2.07715717627563, "num_chars": 11}, {"sum_logits": -11.439434051513672, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -25.511146545410156, "logits_per_token": -2.859858512878418, "logits_per_char": -0.6355241139729818, "bits_per_byte": 0.9168674875948319, "num_chars": 18}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 694, "native_id": "Mercury_7034808", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.354559898376465, "logits_per_token_corr": -2.870911979675293, "logits_per_char_corr": -0.5741823959350586, "bits_per_byte_corr": 0.8283700951818475}, "model_output": [{"sum_logits": -14.354559898376465, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.391101837158203, "logits_per_token": -2.870911979675293, "logits_per_char": -0.5741823959350586, "bits_per_byte": 0.8283700951818475, "num_chars": 25}, {"sum_logits": -25.222957611083984, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -33.34062957763672, "logits_per_token": -5.0445915222167965, "logits_per_char": -0.813643793905935, "bits_per_byte": 1.1738398665190175, "num_chars": 31}, {"sum_logits": -13.837504386901855, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.31439971923828, "logits_per_token": -1.9767863409859794, "logits_per_char": -0.4193183147546017, "bits_per_byte": 0.6049484532508159, "num_chars": 33}, {"sum_logits": -20.697879791259766, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.698509216308594, "logits_per_token": -3.4496466318766275, "logits_per_char": -0.6087611703311696, "bits_per_byte": 0.8782567215231709, "num_chars": 34}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 695, "native_id": "Mercury_7216300", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.7934513092041, "logits_per_token_corr": -5.448362827301025, "logits_per_char_corr": -0.8382096657386193, "bits_per_byte_corr": 1.2092809279871706}, "model_output": [{"sum_logits": -8.115988731384277, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -21.111854553222656, "logits_per_token": -2.7053295771280923, "logits_per_char": -0.5797134808131627, "bits_per_byte": 0.8363497639062298, "num_chars": 14}, {"sum_logits": -9.720256805419922, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.558706283569336, "logits_per_token": -3.2400856018066406, "logits_per_char": -0.5115924634431538, "bits_per_byte": 0.7380719099661365, "num_chars": 19}, {"sum_logits": -21.7934513092041, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -27.93888282775879, "logits_per_token": -5.448362827301025, "logits_per_char": -0.8382096657386193, "bits_per_byte": 1.2092809279871706, "num_chars": 26}, {"sum_logits": -17.881324768066406, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.547882080078125, "logits_per_token": -3.5762649536132813, "logits_per_char": -0.812787489457564, "bits_per_byte": 1.1726044803378606, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 696, "native_id": "Mercury_SC_400985", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -36.569122314453125, "logits_per_token_corr": -3.324465664950284, "logits_per_char_corr": -0.7780664322224069, "bits_per_byte_corr": 1.1225125832502416}, "model_output": [{"sum_logits": -31.03973388671875, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -32.74454879760742, "logits_per_token": -3.8799667358398438, "logits_per_char": -0.8168351022820723, "bits_per_byte": 1.1784439512872216, "num_chars": 38}, {"sum_logits": -16.680683135986328, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -33.76278305053711, "logits_per_token": -1.8534092373318143, "logits_per_char": -0.39715912228538874, "bits_per_byte": 0.5729794961653554, "num_chars": 42}, {"sum_logits": -36.569122314453125, "num_tokens": 11, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -46.44032287597656, "logits_per_token": -3.324465664950284, "logits_per_char": -0.7780664322224069, "bits_per_byte": 1.1225125832502416, "num_chars": 47}, {"sum_logits": -32.2327766418457, "num_tokens": 12, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -36.558753967285156, "logits_per_token": -2.6860647201538086, "logits_per_char": -0.6320152282714844, "bits_per_byte": 0.9118052355942318, "num_chars": 51}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 697, "native_id": "Mercury_7188528", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.451513290405273, "logits_per_token_corr": -3.2257566452026367, "logits_per_char_corr": -0.3795007817885455, "bits_per_byte_corr": 0.5475038959002126}, "model_output": [{"sum_logits": -6.451513290405273, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -18.340065002441406, "logits_per_token": -3.2257566452026367, "logits_per_char": -0.3795007817885455, "bits_per_byte": 0.5475038959002126, "num_chars": 17}, {"sum_logits": -6.234134197235107, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -17.077747344970703, "logits_per_token": -3.1170670986175537, "logits_per_char": -0.3281123261702688, "bits_per_byte": 0.47336602582072895, "num_chars": 19}, {"sum_logits": -17.34168815612793, "num_tokens": 3, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -29.519920349121094, "logits_per_token": -5.78056271870931, "logits_per_char": -0.912720429269891, "bits_per_byte": 1.3167772370266637, "num_chars": 19}, {"sum_logits": -9.46666145324707, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -23.228256225585938, "logits_per_token": -4.733330726623535, "logits_per_char": -0.45079340253557476, "bits_per_byte": 0.6503574063040032, "num_chars": 21}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 698, "native_id": "TIMSS_1995_8_R2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.145450592041016, "logits_per_token_corr": -1.6965906620025635, "logits_per_char_corr": -0.3934123274208843, "bits_per_byte_corr": 0.5675740137951026}, "model_output": [{"sum_logits": -31.00164794921875, "num_tokens": 16, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -46.711647033691406, "logits_per_token": -1.9376029968261719, "logits_per_char": -0.44929924564085144, "bits_per_byte": 0.6482017935616743, "num_chars": 69}, {"sum_logits": -27.145450592041016, "num_tokens": 16, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -48.03924560546875, "logits_per_token": -1.6965906620025635, "logits_per_char": -0.3934123274208843, "bits_per_byte": 0.5675740137951026, "num_chars": 69}, {"sum_logits": -20.20400047302246, "num_tokens": 11, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -34.92149353027344, "logits_per_token": -1.8367273157293147, "logits_per_char": -0.4591818289323287, "bits_per_byte": 0.6624593474674708, "num_chars": 44}, {"sum_logits": -21.13847541809082, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -30.465286254882812, "logits_per_token": -2.6423094272613525, "logits_per_char": -0.6405598611542673, "bits_per_byte": 0.9241325350804483, "num_chars": 33}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 699, "native_id": "Mercury_SC_400032", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.0494804382324219, "logits_per_token_corr": -1.0494804382324219, "logits_per_char_corr": -0.08745670318603516, "bits_per_byte_corr": 0.12617335197908158}, "model_output": [{"sum_logits": -1.7902851104736328, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -1.7902851104736328, "logits_per_char": -0.13771423926720253, "bits_per_byte": 0.198679650050732, "num_chars": 13}, {"sum_logits": -10.85583209991455, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.197172164916992, "logits_per_token": -10.85583209991455, "logits_per_char": -0.7237221399943033, "bits_per_byte": 1.0441103423520797, "num_chars": 15}, {"sum_logits": -1.0494804382324219, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": true, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -1.0494804382324219, "logits_per_char": -0.08745670318603516, "bits_per_byte": 0.12617335197908158, "num_chars": 12}, {"sum_logits": -8.962042808532715, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.39728546142578, "logits_per_token": -8.962042808532715, "logits_per_char": -0.6893879083486704, "bits_per_byte": 0.9945765166241565, "num_chars": 13}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 700, "native_id": "Mercury_7252245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.582993507385254, "logits_per_token_corr": -5.582993507385254, "logits_per_char_corr": -0.4294610390296349, "bits_per_byte_corr": 0.619581311263521}, "model_output": [{"sum_logits": -4.208374977111816, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -11.888246536254883, "logits_per_token": -4.208374977111816, "logits_per_char": -0.526046872138977, "bits_per_byte": 0.7589252137105981, "num_chars": 8}, {"sum_logits": -3.9596872329711914, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.037845611572266, "logits_per_token": -3.9596872329711914, "logits_per_char": -0.4949609041213989, "bits_per_byte": 0.7140776418103729, "num_chars": 8}, {"sum_logits": -4.403167724609375, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.045238494873047, "logits_per_token": -2.2015838623046875, "logits_per_char": -0.33870520958533656, "bits_per_byte": 0.48864832619237314, "num_chars": 13}, {"sum_logits": -5.582993507385254, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.550889015197754, "logits_per_token": -5.582993507385254, "logits_per_char": -0.4294610390296349, "bits_per_byte": 0.619581311263521, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 701, "native_id": "MCAS_2002_8_17", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.9992618560791, "logits_per_token_corr": -2.1249077320098877, "logits_per_char_corr": -0.35415128866831463, "bits_per_byte_corr": 0.5109323078865803}, "model_output": [{"sum_logits": -26.123332977294922, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.316349029541016, "logits_per_token": -3.2654166221618652, "logits_per_char": -0.621984118507022, "bits_per_byte": 0.8973334032824186, "num_chars": 42}, {"sum_logits": -24.218013763427734, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -33.305973052978516, "logits_per_token": -2.690890418158637, "logits_per_char": -0.526478560074516, "bits_per_byte": 0.7595480077544121, "num_chars": 46}, {"sum_logits": -13.918539047241211, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.640073776245117, "logits_per_token": -2.7837078094482424, "logits_per_char": -0.5799391269683838, "bits_per_byte": 0.8366753024953633, "num_chars": 24}, {"sum_logits": -16.9992618560791, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.37685775756836, "logits_per_token": -2.1249077320098877, "logits_per_char": -0.35415128866831463, "bits_per_byte": 0.5109323078865803, "num_chars": 48}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 702, "native_id": "MDSA_2007_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.73350715637207, "logits_per_token_corr": -3.9333767890930176, "logits_per_char_corr": -0.5619109698704311, "bits_per_byte_corr": 0.810666169653761}, "model_output": [{"sum_logits": -13.787996292114258, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -22.048912048339844, "logits_per_token": -4.595998764038086, "logits_per_char": -0.7256840153744346, "bits_per_byte": 1.0469407302338392, "num_chars": 19}, {"sum_logits": -12.914101600646973, "num_tokens": 3, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -19.946535110473633, "logits_per_token": -4.304700533548991, "logits_per_char": -0.4612179143088205, "bits_per_byte": 0.6653967977429642, "num_chars": 28}, {"sum_logits": -15.73350715637207, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.849010467529297, "logits_per_token": -3.9333767890930176, "logits_per_char": -0.5619109698704311, "bits_per_byte": 0.810666169653761, "num_chars": 28}, {"sum_logits": -20.16429328918457, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -30.21764373779297, "logits_per_token": -3.3607155481974282, "logits_per_char": -0.6301341652870178, "bits_per_byte": 0.9090914353549401, "num_chars": 32}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 703, "native_id": "NCEOGA_2013_5_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.927149772644043, "logits_per_token_corr": -0.9908937215805054, "logits_per_char_corr": -0.25571450879496915, "bits_per_byte_corr": 0.3689180537221243}, "model_output": [{"sum_logits": -19.32474136352539, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -26.42220115661621, "logits_per_token": -3.864948272705078, "logits_per_char": -0.8783973347056996, "bits_per_byte": 1.2672594787109062, "num_chars": 22}, {"sum_logits": -19.201454162597656, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.3250732421875, "logits_per_token": -3.200242360432943, "logits_per_char": -0.9143549601236979, "bits_per_byte": 1.3191353665836325, "num_chars": 21}, {"sum_logits": -16.318073272705078, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -24.19476890563965, "logits_per_token": -2.7196788787841797, "logits_per_char": -0.6043730841742622, "bits_per_byte": 0.8719260513856025, "num_chars": 27}, {"sum_logits": -7.927149772644043, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -24.22813606262207, "logits_per_token": -0.9908937215805054, "logits_per_char": -0.25571450879496915, "bits_per_byte": 0.3689180537221243, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 704, "native_id": "Mercury_7082758", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.226725578308105, "logits_per_token_corr": -1.7044542630513508, "logits_per_char_corr": -0.3099007751002456, "bits_per_byte_corr": 0.4470923114050915}, "model_output": [{"sum_logits": -20.18588638305664, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -29.99869155883789, "logits_per_token": -4.037177276611328, "logits_per_char": -0.6728628794352214, "bits_per_byte": 0.9707359393601598, "num_chars": 30}, {"sum_logits": -10.226725578308105, "num_tokens": 6, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -26.861072540283203, "logits_per_token": -1.7044542630513508, "logits_per_char": -0.3099007751002456, "bits_per_byte": 0.4470923114050915, "num_chars": 33}, {"sum_logits": -14.25044059753418, "num_tokens": 4, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -27.74382781982422, "logits_per_token": -3.562610149383545, "logits_per_char": -0.5277940962049696, "bits_per_byte": 0.7614459252059292, "num_chars": 27}, {"sum_logits": -14.723289489746094, "num_tokens": 5, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -28.719966888427734, "logits_per_token": -2.944657897949219, "logits_per_char": -0.4907763163248698, "bits_per_byte": 0.7080405577481516, "num_chars": 30}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 705, "native_id": "Mercury_7094308", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.70859146118164, "logits_per_token_corr": -2.3869416373116628, "logits_per_char_corr": -0.4284254220815805, "bits_per_byte_corr": 0.6180872318283013}, "model_output": [{"sum_logits": -20.13580894470215, "num_tokens": 10, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -27.69633674621582, "logits_per_token": -2.013580894470215, "logits_per_char": -0.6101760286273379, "bits_per_byte": 0.880297930570615, "num_chars": 33}, {"sum_logits": -18.974374771118164, "num_tokens": 8, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -25.22972869873047, "logits_per_token": -2.3717968463897705, "logits_per_char": -0.48652243002867085, "bits_per_byte": 0.7019034970841154, "num_chars": 39}, {"sum_logits": -16.70859146118164, "num_tokens": 7, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -30.711650848388672, "logits_per_token": -2.3869416373116628, "logits_per_char": -0.4284254220815805, "bits_per_byte": 0.6180872318283013, "num_chars": 39}, {"sum_logits": -18.421527862548828, "num_tokens": 11, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -28.497299194335938, "logits_per_token": -1.6746843511408025, "logits_per_char": -0.4093672858344184, "bits_per_byte": 0.5905921531759145, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 706, "native_id": "Mercury_7136028", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.883111953735352, "logits_per_token_corr": -2.220777988433838, "logits_per_char_corr": -0.306314205301219, "bits_per_byte_corr": 0.44191798494222995}, "model_output": [{"sum_logits": -8.883111953735352, "num_tokens": 4, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -27.063684463500977, "logits_per_token": -2.220777988433838, "logits_per_char": -0.306314205301219, "bits_per_byte": 0.44191798494222995, "num_chars": 29}, {"sum_logits": -9.226435661315918, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -24.347312927246094, "logits_per_token": -1.8452871322631836, "logits_per_char": -0.32951555933271137, "bits_per_byte": 0.4753904633453972, "num_chars": 28}, {"sum_logits": -8.658222198486328, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -25.93906021118164, "logits_per_token": -1.7316444396972657, "logits_per_char": -0.36075925827026367, "bits_per_byte": 0.520465592861664, "num_chars": 24}, {"sum_logits": -6.372198104858398, "num_tokens": 3, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -24.725784301757812, "logits_per_token": -2.1240660349527993, "logits_per_char": -0.33537884762412623, "bits_per_byte": 0.48384940028672985, "num_chars": 19}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 707, "native_id": "Mercury_7159075", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.202522277832031, "logits_per_token_corr": -6.101261138916016, "logits_per_char_corr": -0.5810724894205729, "bits_per_byte_corr": 0.8383103988846674}, "model_output": [{"sum_logits": -11.038046836853027, "num_tokens": 2, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -20.071311950683594, "logits_per_token": -5.519023418426514, "logits_per_char": -0.6492968727560604, "bits_per_byte": 0.9367373783905536, "num_chars": 17}, {"sum_logits": -12.202522277832031, "num_tokens": 2, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -23.430259704589844, "logits_per_token": -6.101261138916016, "logits_per_char": -0.5810724894205729, "bits_per_byte": 0.8383103988846674, "num_chars": 21}, {"sum_logits": -7.106326580047607, "num_tokens": 3, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -19.608457565307617, "logits_per_token": -2.3687755266825357, "logits_per_char": -0.3383965038117908, "bits_per_byte": 0.48820295790378454, "num_chars": 21}, {"sum_logits": -11.890373229980469, "num_tokens": 2, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -22.745426177978516, "logits_per_token": -5.945186614990234, "logits_per_char": -0.5404715104536577, "bits_per_byte": 0.7797355678738197, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 708, "native_id": "MCAS_2015_5_19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.954341888427734, "logits_per_token_corr": -4.977170944213867, "logits_per_char_corr": -0.9049401716752485, "bits_per_byte_corr": 1.3055526979780263}, "model_output": [{"sum_logits": -12.97945499420166, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -22.964542388916016, "logits_per_token": -6.48972749710083, "logits_per_char": -0.9984196149385892, "bits_per_byte": 1.440415027199206, "num_chars": 13}, {"sum_logits": -9.954341888427734, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -17.27259635925293, "logits_per_token": -4.977170944213867, "logits_per_char": -0.9049401716752485, "bits_per_byte": 1.3055526979780263, "num_chars": 11}, {"sum_logits": -8.082330703735352, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -16.668676376342773, "logits_per_token": -4.041165351867676, "logits_per_char": -0.8082330703735352, "bits_per_byte": 1.1660338425111976, "num_chars": 10}, {"sum_logits": -9.874457359313965, "num_tokens": 3, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -16.557912826538086, "logits_per_token": -3.2914857864379883, "logits_per_char": -0.897677941755815, "bits_per_byte": 1.2950755148874564, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 709, "native_id": "MSA_2012_5_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.273096084594727, "logits_per_token_corr": -1.8788493474324544, "logits_per_char_corr": -0.4175220772072121, "bits_per_byte_corr": 0.6023570302489365}, "model_output": [{"sum_logits": -11.273096084594727, "num_tokens": 6, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -21.544422149658203, "logits_per_token": -1.8788493474324544, "logits_per_char": -0.4175220772072121, "bits_per_byte": 0.6023570302489365, "num_chars": 27}, {"sum_logits": -12.167293548583984, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -22.23828125, "logits_per_token": -1.7381847926548548, "logits_per_char": -0.39249334027690275, "bits_per_byte": 0.5662481955998389, "num_chars": 31}, {"sum_logits": -14.777609825134277, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -29.082462310791016, "logits_per_token": -2.1110871178763255, "logits_per_char": -0.41048916180928546, "bits_per_byte": 0.5922106780813489, "num_chars": 36}, {"sum_logits": -23.889507293701172, "num_tokens": 9, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -36.023860931396484, "logits_per_token": -2.6543896993001304, "logits_per_char": -0.5429433475841176, "bits_per_byte": 0.7833016750438219, "num_chars": 44}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 710, "native_id": "MCAS_2014_5_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.3919677734375, "logits_per_token_corr": -2.5991075303819446, "logits_per_char_corr": -0.48733266194661456, "bits_per_byte_corr": 0.7030724146541036}, "model_output": [{"sum_logits": -21.02545166015625, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -37.73096466064453, "logits_per_token": -2.3361612955729165, "logits_per_char": -0.5256362915039062, "bits_per_byte": 0.7583328710644959, "num_chars": 40}, {"sum_logits": -25.316120147705078, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -43.401771545410156, "logits_per_token": -2.531612014770508, "logits_per_char": -0.5625804477267795, "bits_per_byte": 0.8116320220371006, "num_chars": 45}, {"sum_logits": -32.86259460449219, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -49.292724609375, "logits_per_token": -3.651399400499132, "logits_per_char": -0.7468771501020952, "bits_per_byte": 1.077515960606349, "num_chars": 44}, {"sum_logits": -23.3919677734375, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -44.55714416503906, "logits_per_token": -2.5991075303819446, "logits_per_char": -0.48733266194661456, "bits_per_byte": 0.7030724146541036, "num_chars": 48}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 711, "native_id": "Mercury_SC_400392", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -36.403350830078125, "logits_per_token_corr": -4.550418853759766, "logits_per_char_corr": -1.0112041897243924, "bits_per_byte_corr": 1.4588592698425715}, "model_output": [{"sum_logits": -29.30520248413086, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.49764633178711, "logits_per_token": -4.18645749773298, "logits_per_char": -0.9157875776290894, "bits_per_byte": 1.321202196754153, "num_chars": 32}, {"sum_logits": -20.952495574951172, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.636274337768555, "logits_per_token": -2.993213653564453, "logits_per_char": -0.6162498698515051, "bits_per_byte": 0.8890606311838742, "num_chars": 34}, {"sum_logits": -36.403350830078125, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -43.79884719848633, "logits_per_token": -4.550418853759766, "logits_per_char": -1.0112041897243924, "bits_per_byte": 1.4588592698425715, "num_chars": 36}, {"sum_logits": -30.008914947509766, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.564327239990234, "logits_per_token": -3.7511143684387207, "logits_per_char": -0.7897082880923623, "bits_per_byte": 1.1393082309805824, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 712, "native_id": "Mercury_7159320", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.086663246154785, "logits_per_token_corr": -3.362221082051595, "logits_per_char_corr": -0.6304164528846741, "bits_per_byte_corr": 0.9094986902721836}, "model_output": [{"sum_logits": -10.086663246154785, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.74582290649414, "logits_per_token": -3.362221082051595, "logits_per_char": -0.6304164528846741, "bits_per_byte": 0.9094986902721836, "num_chars": 16}, {"sum_logits": -6.306865692138672, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.092941284179688, "logits_per_token": -3.153432846069336, "logits_per_char": -0.394179105758667, "bits_per_byte": 0.5686802411004837, "num_chars": 16}, {"sum_logits": -10.695040702819824, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -21.282928466796875, "logits_per_token": -5.347520351409912, "logits_per_char": -0.668440043926239, "bits_per_byte": 0.9643551365046787, "num_chars": 16}, {"sum_logits": -13.383108139038086, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -22.724273681640625, "logits_per_token": -4.461036046346028, "logits_per_char": -0.7435060077243381, "bits_per_byte": 1.0726524302158245, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 713, "native_id": "Mercury_7218365", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.691433906555176, "logits_per_token_corr": -2.691433906555176, "logits_per_char_corr": -0.3844905580793108, "bits_per_byte_corr": 0.5547026214100502}, "model_output": [{"sum_logits": -3.102126121520996, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -13.443840026855469, "logits_per_token": -3.102126121520996, "logits_per_char": -0.6204252243041992, "bits_per_byte": 0.895084394346734, "num_chars": 5}, {"sum_logits": -2.691433906555176, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": true, "sum_logits_uncond": -15.50045108795166, "logits_per_token": -2.691433906555176, "logits_per_char": -0.3844905580793108, "bits_per_byte": 0.5547026214100502, "num_chars": 7}, {"sum_logits": -2.7580652236938477, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -13.72641372680664, "logits_per_token": -2.7580652236938477, "logits_per_char": -0.4596775372823079, "bits_per_byte": 0.6631745034457137, "num_chars": 6}, {"sum_logits": -3.5520200729370117, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -15.813887596130371, "logits_per_token": -3.5520200729370117, "logits_per_char": -0.3229109157215465, "bits_per_byte": 0.46586197676072383, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 714, "native_id": "MCAS_2004_9_10-v1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.1328959465026855, "logits_per_token_corr": -7.1328959465026855, "logits_per_char_corr": -2.377631982167562, "bits_per_byte_corr": 3.4301978697346023}, "model_output": [{"sum_logits": -5.64354944229126, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "sum_logits_uncond": -9.175007820129395, "logits_per_token": -5.64354944229126, "logits_per_char": -1.88118314743042, "bits_per_byte": 2.7139735978037085, "num_chars": 3}, {"sum_logits": -6.3457465171813965, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "sum_logits_uncond": -10.129685401916504, "logits_per_token": -6.3457465171813965, "logits_per_char": -2.1152488390604653, "bits_per_byte": 3.0516590103608627, "num_chars": 3}, {"sum_logits": -7.1328959465026855, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "sum_logits_uncond": -10.514604568481445, "logits_per_token": -7.1328959465026855, "logits_per_char": -2.377631982167562, "bits_per_byte": 3.4301978697346023, "num_chars": 3}, {"sum_logits": -21.763147354125977, "num_tokens": 2, "num_tokens_all": 261, "is_greedy": false, "sum_logits_uncond": -24.171228408813477, "logits_per_token": -10.881573677062988, "logits_per_char": -3.1090210505894254, "bits_per_byte": 4.485369251707981, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 715, "native_id": "AIMS_2009_4_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.615009307861328, "logits_per_token_corr": -2.328637209805575, "logits_per_char_corr": -0.4493861282080935, "bits_per_byte_corr": 0.6483271386105741}, "model_output": [{"sum_logits": -14.515459060668945, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -29.663747787475586, "logits_per_token": -1.8144323825836182, "logits_per_char": -0.4398623957778468, "bits_per_byte": 0.634587297062694, "num_chars": 33}, {"sum_logits": -18.132356643676758, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -30.19290542602539, "logits_per_token": -2.014706293741862, "logits_per_char": -0.4533089160919189, "bits_per_byte": 0.6539865252370325, "num_chars": 40}, {"sum_logits": -31.990556716918945, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -40.896888732910156, "logits_per_token": -3.1990556716918945, "logits_per_char": -0.666469931602478, "bits_per_byte": 0.9615128652251924, "num_chars": 48}, {"sum_logits": -25.615009307861328, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -39.299232482910156, "logits_per_token": -2.328637209805575, "logits_per_char": -0.4493861282080935, "bits_per_byte": 0.6483271386105741, "num_chars": 57}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 716, "native_id": "Mercury_SC_414274", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.626544952392578, "logits_per_token_corr": -1.6251716613769531, "logits_per_char_corr": -0.37503961416391224, "bits_per_byte_corr": 0.5410677914915751}, "model_output": [{"sum_logits": -16.321304321289062, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.50570297241211, "logits_per_token": -2.040163040161133, "logits_per_char": -0.5264936877835181, "bits_per_byte": 0.7595698324251695, "num_chars": 31}, {"sum_logits": -14.626544952392578, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -28.487316131591797, "logits_per_token": -1.6251716613769531, "logits_per_char": -0.37503961416391224, "bits_per_byte": 0.5410677914915751, "num_chars": 39}, {"sum_logits": -23.012054443359375, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.22417449951172, "logits_per_token": -2.5568949381510415, "logits_per_char": -0.5479060581752232, "bits_per_byte": 0.7904613530029823, "num_chars": 42}, {"sum_logits": -23.210968017578125, "num_tokens": 12, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -34.15648651123047, "logits_per_token": -1.934247334798177, "logits_per_char": -0.4463647695688101, "bits_per_byte": 0.6439682394849299, "num_chars": 52}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 717, "native_id": "MCAS_2005_9_6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.981866359710693, "logits_per_token_corr": -3.4909331798553467, "logits_per_char_corr": -0.9974094799586705, "bits_per_byte_corr": 1.4389577104730475}, "model_output": [{"sum_logits": -6.576478958129883, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -19.342445373535156, "logits_per_token": -3.2882394790649414, "logits_per_char": -1.0960798263549805, "bits_per_byte": 1.5813089299019025, "num_chars": 6}, {"sum_logits": -7.300103664398193, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -18.967803955078125, "logits_per_token": -3.6500518321990967, "logits_per_char": -1.0428719520568848, "bits_per_byte": 1.5045461935157414, "num_chars": 7}, {"sum_logits": -6.981866359710693, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -20.341148376464844, "logits_per_token": -3.4909331798553467, "logits_per_char": -0.9974094799586705, "bits_per_byte": 1.4389577104730475, "num_chars": 7}, {"sum_logits": -7.7009077072143555, "num_tokens": 3, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -19.513307571411133, "logits_per_token": -2.5669692357381186, "logits_per_char": -1.1001296724591936, "bits_per_byte": 1.5871516227928184, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 718, "native_id": "MCAS_1998_4_23", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.713743209838867, "logits_per_token_corr": -3.523749245537652, "logits_per_char_corr": -0.8131729028163812, "bits_per_byte_corr": 1.173160514279319}, "model_output": [{"sum_logits": -31.713743209838867, "num_tokens": 9, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -42.63471984863281, "logits_per_token": -3.523749245537652, "logits_per_char": -0.8131729028163812, "bits_per_byte": 1.173160514279319, "num_chars": 39}, {"sum_logits": -24.51427459716797, "num_tokens": 11, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -32.8895378112793, "logits_per_token": -2.228570417924361, "logits_per_char": -0.49028549194335935, "bits_per_byte": 0.7073324478469986, "num_chars": 50}, {"sum_logits": -13.906465530395508, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -32.81251907348633, "logits_per_token": -1.9866379329136439, "logits_per_char": -0.4635488510131836, "bits_per_byte": 0.6687596285669773, "num_chars": 30}, {"sum_logits": -17.906326293945312, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -25.41756820678711, "logits_per_token": -2.558046613420759, "logits_per_char": -0.497397952609592, "bits_per_byte": 0.7175935595786976, "num_chars": 36}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 719, "native_id": "Mercury_7075023", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.61239242553711, "logits_per_token_corr": -3.944627489362444, "logits_per_char_corr": -0.6275543733076616, "bits_per_byte_corr": 0.9053695822597951}, "model_output": [{"sum_logits": -26.05647850036621, "num_tokens": 7, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -30.94470977783203, "logits_per_token": -3.722354071480887, "logits_per_char": -0.6355238658625905, "bits_per_byte": 0.9168671296472007, "num_chars": 41}, {"sum_logits": -16.28753089904785, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -25.310256958007812, "logits_per_token": -2.714588483174642, "logits_per_char": -0.47904502644258384, "bits_per_byte": 0.6911158840117346, "num_chars": 34}, {"sum_logits": -26.099956512451172, "num_tokens": 8, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -32.77958679199219, "logits_per_token": -3.2624945640563965, "logits_per_char": -0.5931808298284357, "bits_per_byte": 0.855779041544499, "num_chars": 44}, {"sum_logits": -27.61239242553711, "num_tokens": 7, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -35.972599029541016, "logits_per_token": -3.944627489362444, "logits_per_char": -0.6275543733076616, "bits_per_byte": 0.9053695822597951, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 720, "native_id": "Mercury_SC_400182", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.956109046936035, "logits_per_token_corr": -6.956109046936035, "logits_per_char_corr": -1.1593515078226726, "bits_per_byte_corr": 1.6725906709841136}, "model_output": [{"sum_logits": -3.157244920730591, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.47834587097168, "logits_per_token": -3.157244920730591, "logits_per_char": -0.39465561509132385, "bits_per_byte": 0.5693676987516455, "num_chars": 8}, {"sum_logits": -6.956109046936035, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.4844331741333, "logits_per_token": -6.956109046936035, "logits_per_char": -1.1593515078226726, "bits_per_byte": 1.6725906709841136, "num_chars": 6}, {"sum_logits": -5.341931343078613, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -13.95900821685791, "logits_per_token": -5.341931343078613, "logits_per_char": -0.4856301220980557, "bits_per_byte": 0.7006161688576702, "num_chars": 11}, {"sum_logits": -5.558035850524902, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -12.887895584106445, "logits_per_token": -5.558035850524902, "logits_per_char": -0.6175595389472114, "bits_per_byte": 0.8909500842934566, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 721, "native_id": "Mercury_SC_400133", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.233057022094727, "logits_per_token_corr": -2.0582642555236816, "logits_per_char_corr": -0.5488704681396485, "bits_per_byte_corr": 0.7918527024760436}, "model_output": [{"sum_logits": -4.894418239593506, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": true, "sum_logits_uncond": -17.91010284423828, "logits_per_token": -1.6314727465311687, "logits_per_char": -0.32629454930623375, "bits_per_byte": 0.470743528153541, "num_chars": 15}, {"sum_logits": -8.233057022094727, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -21.605239868164062, "logits_per_token": -2.0582642555236816, "logits_per_char": -0.5488704681396485, "bits_per_byte": 0.7918527024760436, "num_chars": 15}, {"sum_logits": -13.17519474029541, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -22.346920013427734, "logits_per_token": -3.2937986850738525, "logits_per_char": -0.878346316019694, "bits_per_byte": 1.2671858743056132, "num_chars": 15}, {"sum_logits": -8.80634880065918, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -22.809886932373047, "logits_per_token": -2.9354496002197266, "logits_per_char": -0.440317440032959, "bits_per_byte": 0.6352437871529298, "num_chars": 20}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 722, "native_id": "MSA_2013_5_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.962855339050293, "logits_per_token_corr": -2.9876184463500977, "logits_per_char_corr": -0.5975236892700195, "bits_per_byte_corr": 0.8620444633241544}, "model_output": [{"sum_logits": -3.6972248554229736, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -15.506977081298828, "logits_per_token": -1.8486124277114868, "logits_per_char": -0.41080276171366376, "bits_per_byte": 0.592663107108219, "num_chars": 9}, {"sum_logits": -2.277768135070801, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.05547523498535, "logits_per_token": -1.1388840675354004, "logits_per_char": -0.22777681350708007, "bits_per_byte": 0.32861247927639076, "num_chars": 10}, {"sum_logits": -11.031780242919922, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -27.269527435302734, "logits_per_token": -3.677260080973307, "logits_per_char": -0.6894862651824951, "bits_per_byte": 0.9947184155405531, "num_chars": 16}, {"sum_logits": -8.962855339050293, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -22.209787368774414, "logits_per_token": -2.9876184463500977, "logits_per_char": -0.5975236892700195, "bits_per_byte": 0.8620444633241544, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 723, "native_id": "Mercury_SC_408706", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.7522804737091064, "logits_per_token_corr": -2.7522804737091064, "logits_per_char_corr": -0.5504560947418213, "bits_per_byte_corr": 0.7941402781117016}, "model_output": [{"sum_logits": -3.1474010944366455, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -9.91943073272705, "logits_per_token": -3.1474010944366455, "logits_per_char": -0.5245668490727743, "bits_per_byte": 0.7567899917725845, "num_chars": 6}, {"sum_logits": -2.7522804737091064, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -9.832322120666504, "logits_per_token": -2.7522804737091064, "logits_per_char": -0.5504560947418213, "bits_per_byte": 0.7941402781117016, "num_chars": 5}, {"sum_logits": -3.4091408252716064, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -9.882824897766113, "logits_per_token": -3.4091408252716064, "logits_per_char": -0.34091408252716066, "bits_per_byte": 0.4918350562314988, "num_chars": 10}, {"sum_logits": -2.297595262527466, "num_tokens": 1, "num_tokens_all": 190, "is_greedy": true, "sum_logits_uncond": -10.24388599395752, "logits_per_token": -2.297595262527466, "logits_per_char": -0.25528836250305176, "bits_per_byte": 0.3683032545800814, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 724, "native_id": "Mercury_7213325", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.079811096191406, "logits_per_token_corr": -1.6159622192382812, "logits_per_char_corr": -0.2992522628219039, "bits_per_byte_corr": 0.4317297555482717}, "model_output": [{"sum_logits": -11.09063720703125, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.614564895629883, "logits_per_token": -2.7726593017578125, "logits_per_char": -0.4265629695012019, "bits_per_byte": 0.6154002807266963, "num_chars": 26}, {"sum_logits": -14.198813438415527, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.944988250732422, "logits_per_token": -3.549703359603882, "logits_per_char": -0.5679525375366211, "bits_per_byte": 0.8193823093649748, "num_chars": 25}, {"sum_logits": -8.891571044921875, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -26.83746337890625, "logits_per_token": -1.778314208984375, "logits_per_char": -0.3419835017277644, "bits_per_byte": 0.49337790200884246, "num_chars": 26}, {"sum_logits": -8.079811096191406, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -26.737319946289062, "logits_per_token": -1.6159622192382812, "logits_per_char": -0.2992522628219039, "bits_per_byte": 0.4317297555482717, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 725, "native_id": "Mercury_SC_LBS10932", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.451889991760254, "logits_per_token_corr": -2.2419816652933755, "logits_per_char_corr": -0.407633030053341, "bits_per_byte_corr": 0.5880901509609193}, "model_output": [{"sum_logits": -17.082462310791016, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -36.04648208618164, "logits_per_token": -2.440351758684431, "logits_per_char": -0.5338269472122192, "bits_per_byte": 0.7701494894365164, "num_chars": 32}, {"sum_logits": -13.451889991760254, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -34.40824508666992, "logits_per_token": -2.2419816652933755, "logits_per_char": -0.407633030053341, "bits_per_byte": 0.5880901509609193, "num_chars": 33}, {"sum_logits": -26.233434677124023, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -40.22075653076172, "logits_per_token": -2.9148260752360025, "logits_per_char": -0.6558358669281006, "bits_per_byte": 0.9461711528549646, "num_chars": 40}, {"sum_logits": -14.464845657348633, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -31.28508186340332, "logits_per_token": -1.808105707168579, "logits_per_char": -0.3214410146077474, "bits_per_byte": 0.46374135771324715, "num_chars": 45}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 726, "native_id": "Mercury_192220", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.6374337673187256, "logits_per_token_corr": -1.6374337673187256, "logits_per_char_corr": -0.2046792209148407, "bits_per_byte_corr": 0.2952896969870694}, "model_output": [{"sum_logits": -3.221360921859741, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -3.221360921859741, "logits_per_char": -0.46019441740853445, "bits_per_byte": 0.6639202038405553, "num_chars": 7}, {"sum_logits": -3.1761040687561035, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.5880520343780518, "logits_per_char": -0.2117402712504069, "bits_per_byte": 0.30547663928966545, "num_chars": 15}, {"sum_logits": -18.83047103881836, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -20.503652572631836, "logits_per_token": -6.27682367960612, "logits_per_char": -1.4484977722167969, "bits_per_byte": 2.0897405527173856, "num_chars": 13}, {"sum_logits": -1.6374337673187256, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": true, "sum_logits_uncond": -14.469803810119629, "logits_per_token": -1.6374337673187256, "logits_per_char": -0.2046792209148407, "bits_per_byte": 0.2952896969870694, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 727, "native_id": "Mercury_SC_407247", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.484317779541016, "logits_per_token_corr": -2.371079444885254, "logits_per_char_corr": -0.3270454406738281, "bits_per_byte_corr": 0.4718268354058165}, "model_output": [{"sum_logits": -13.191640853881836, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -24.926193237304688, "logits_per_token": -2.6383281707763673, "logits_per_char": -0.45488416737523574, "bits_per_byte": 0.6562591324516293, "num_chars": 29}, {"sum_logits": -9.249717712402344, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -21.7436580657959, "logits_per_token": -1.8499435424804687, "logits_per_char": -0.29837799072265625, "bits_per_byte": 0.4304684475262986, "num_chars": 31}, {"sum_logits": -9.484317779541016, "num_tokens": 4, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -22.282154083251953, "logits_per_token": -2.371079444885254, "logits_per_char": -0.3270454406738281, "bits_per_byte": 0.4718268354058165, "num_chars": 29}, {"sum_logits": -13.812017440795898, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -22.089134216308594, "logits_per_token": -2.7624034881591797, "logits_per_char": -0.5115562015109592, "bits_per_byte": 0.7380195950563863, "num_chars": 27}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 728, "native_id": "Mercury_7024798", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.346733570098877, "logits_per_token_corr": -2.6733667850494385, "logits_per_char_corr": -0.5346733570098877, "bits_per_byte_corr": 0.7713706006541735}, "model_output": [{"sum_logits": -5.236856460571289, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.440506935119629, "logits_per_token": -5.236856460571289, "logits_per_char": -0.7481223515101841, "bits_per_byte": 1.079312406502708, "num_chars": 7}, {"sum_logits": -5.777704238891602, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -11.144539833068848, "logits_per_token": -5.777704238891602, "logits_per_char": -0.8253863198416573, "bits_per_byte": 1.1907807504540064, "num_chars": 7}, {"sum_logits": -4.949335098266602, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -17.152629852294922, "logits_per_token": -2.474667549133301, "logits_per_char": -0.44993955438787286, "bits_per_byte": 0.6491255638156406, "num_chars": 11}, {"sum_logits": -5.346733570098877, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.212127685546875, "logits_per_token": -2.6733667850494385, "logits_per_char": -0.5346733570098877, "bits_per_byte": 0.7713706006541735, "num_chars": 10}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 729, "native_id": "Mercury_7180810", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.923473358154297, "logits_per_token_corr": -2.8205788930257163, "logits_per_char_corr": -0.44535456205669205, "bits_per_byte_corr": 0.6425108181169273}, "model_output": [{"sum_logits": -13.113248825073242, "num_tokens": 8, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -39.00775909423828, "logits_per_token": -1.6391561031341553, "logits_per_char": -0.2571225259818283, "bits_per_byte": 0.3709493931350938, "num_chars": 51}, {"sum_logits": -15.197860717773438, "num_tokens": 8, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -38.62451171875, "logits_per_token": -1.8997325897216797, "logits_per_char": -0.3534386213435683, "bits_per_byte": 0.5099041462713645, "num_chars": 43}, {"sum_logits": -16.923473358154297, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -39.09886169433594, "logits_per_token": -2.8205788930257163, "logits_per_char": -0.44535456205669205, "bits_per_byte": 0.6425108181169273, "num_chars": 38}, {"sum_logits": -18.996299743652344, "num_tokens": 7, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -37.101375579833984, "logits_per_token": -2.713757106236049, "logits_per_char": -0.5134135065851985, "bits_per_byte": 0.7406991198764111, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 730, "native_id": "Mercury_412780", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.881853103637695, "logits_per_token_corr": -2.2216810079721303, "logits_per_char_corr": -0.43760383490360144, "bits_per_byte_corr": 0.6313288824898721}, "model_output": [{"sum_logits": -26.940811157226562, "num_tokens": 13, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -41.423851013183594, "logits_per_token": -2.072370089017428, "logits_per_char": -0.42763192313058035, "bits_per_byte": 0.6169424548267419, "num_chars": 63}, {"sum_logits": -28.881853103637695, "num_tokens": 13, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -41.27812194824219, "logits_per_token": -2.2216810079721303, "logits_per_char": -0.43760383490360144, "bits_per_byte": 0.6313288824898721, "num_chars": 66}, {"sum_logits": -38.21443176269531, "num_tokens": 15, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -51.8491096496582, "logits_per_token": -2.5476287841796874, "logits_per_char": -0.5234853666122645, "bits_per_byte": 0.7552297423899975, "num_chars": 73}, {"sum_logits": -39.39403533935547, "num_tokens": 15, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -51.71831512451172, "logits_per_token": -2.6262690226236978, "logits_per_char": -0.5183425702546772, "bits_per_byte": 0.7478102555885991, "num_chars": 76}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 731, "native_id": "LEAP_2011_8_10434", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -44.320899963378906, "logits_per_token_corr": -3.6934083302815757, "logits_per_char_corr": -0.703506348625062, "bits_per_byte_corr": 1.0149451203960083}, "model_output": [{"sum_logits": -36.988372802734375, "num_tokens": 13, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -48.42353057861328, "logits_per_token": -2.8452594463641825, "logits_per_char": -0.596586658108619, "bits_per_byte": 0.8606926131144425, "num_chars": 62}, {"sum_logits": -44.320899963378906, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -53.01556396484375, "logits_per_token": -3.6934083302815757, "logits_per_char": -0.703506348625062, "bits_per_byte": 1.0149451203960083, "num_chars": 63}, {"sum_logits": -33.77516174316406, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -45.27119445800781, "logits_per_token": -2.8145968119303384, "logits_per_char": -0.5629193623860677, "bits_per_byte": 0.8121209725353405, "num_chars": 60}, {"sum_logits": -25.443571090698242, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -38.456546783447266, "logits_per_token": -2.12029759089152, "logits_per_char": -0.43868226018445244, "bits_per_byte": 0.6328847212945262, "num_chars": 58}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 732, "native_id": "Mercury_7200340", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.122125625610352, "logits_per_token_corr": -1.4580139584011502, "logits_per_char_corr": -0.3280531406402588, "bits_per_byte_corr": 0.47328063915009105}, "model_output": [{"sum_logits": -5.660334587097168, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -22.23424530029297, "logits_per_token": -1.1320669174194335, "logits_per_char": -0.2461015037868334, "bits_per_byte": 0.3550494190688361, "num_chars": 23}, {"sum_logits": -18.460031509399414, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -37.20179748535156, "logits_per_token": -2.6371473584856306, "logits_per_char": -0.5768759846687317, "bits_per_byte": 0.8322561222901148, "num_chars": 32}, {"sum_logits": -14.058432579040527, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -37.413185119628906, "logits_per_token": -1.5620480643378363, "logits_per_char": -0.3799576372713656, "bits_per_byte": 0.5481629990396806, "num_chars": 37}, {"sum_logits": -13.122125625610352, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -32.159061431884766, "logits_per_token": -1.4580139584011502, "logits_per_char": -0.3280531406402588, "bits_per_byte": 0.47328063915009105, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 733, "native_id": "Mercury_7056525", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.1614990234375, "logits_per_token_corr": -3.02691650390625, "logits_per_char_corr": -0.60538330078125, "bits_per_byte_corr": 0.8733834858747286}, "model_output": [{"sum_logits": -34.0849494934082, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -49.22795486450195, "logits_per_token": -4.260618686676025, "logits_per_char": -0.6956112141511879, "bits_per_byte": 1.0035548490433905, "num_chars": 49}, {"sum_logits": -21.66741180419922, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.7957763671875, "logits_per_token": -3.0953445434570312, "logits_per_char": -0.5555746616461338, "bits_per_byte": 0.8015248092010169, "num_chars": 39}, {"sum_logits": -18.1614990234375, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.47161865234375, "logits_per_token": -3.02691650390625, "logits_per_char": -0.60538330078125, "bits_per_byte": 0.8733834858747286, "num_chars": 30}, {"sum_logits": -22.599191665649414, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.304420471191406, "logits_per_token": -3.2284559522356306, "logits_per_char": -0.5649797916412354, "bits_per_byte": 0.8150935436038756, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 734, "native_id": "Mercury_7085278", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.871541976928711, "logits_per_token_corr": -2.1245059967041016, "logits_per_char_corr": -0.5507978509973597, "bits_per_byte_corr": 0.7946333281667599}, "model_output": [{"sum_logits": -19.741859436035156, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -30.029224395751953, "logits_per_token": -3.9483718872070312, "logits_per_char": -0.8225774765014648, "bits_per_byte": 1.1867284460964738, "num_chars": 24}, {"sum_logits": -17.267616271972656, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.81297302246094, "logits_per_token": -3.4535232543945313, "logits_per_char": -0.6907046508789062, "bits_per_byte": 0.9964761745426567, "num_chars": 25}, {"sum_logits": -15.208375930786133, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -28.510644912719727, "logits_per_token": -2.534729321797689, "logits_per_char": -0.5431562832423619, "bits_per_byte": 0.7836088762619996, "num_chars": 28}, {"sum_logits": -14.871541976928711, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -37.879180908203125, "logits_per_token": -2.1245059967041016, "logits_per_char": -0.5507978509973597, "bits_per_byte": 0.7946333281667599, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 735, "native_id": "AKDE&ED_2008_4_35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.57079315185547, "logits_per_token_corr": -2.7958275931222096, "logits_per_char_corr": -0.5150208724172491, "bits_per_byte_corr": 0.7430180585912066}, "model_output": [{"sum_logits": -13.162551879882812, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.966135025024414, "logits_per_token": -1.8803645542689733, "logits_per_char": -0.37607291085379463, "bits_per_byte": 0.5425585235018365, "num_chars": 35}, {"sum_logits": -12.148971557617188, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -29.591815948486328, "logits_per_token": -1.735567365373884, "logits_per_char": -0.3471134730747768, "bits_per_byte": 0.500778886231085, "num_chars": 35}, {"sum_logits": -16.419591903686523, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.23639678955078, "logits_per_token": -2.345655986240932, "logits_per_char": -0.4320945237812243, "bits_per_byte": 0.6233806266548985, "num_chars": 38}, {"sum_logits": -19.57079315185547, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.196189880371094, "logits_per_token": -2.7958275931222096, "logits_per_char": -0.5150208724172491, "bits_per_byte": 0.7430180585912066, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 736, "native_id": "MCAS_1999_8_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.11330795288086, "logits_per_token_corr": -3.5141634941101074, "logits_per_char_corr": -0.5856939156850179, "bits_per_byte_corr": 0.8449777076382211}, "model_output": [{"sum_logits": -21.529014587402344, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -38.04429626464844, "logits_per_token": -3.075573512486049, "logits_per_char": -0.7423798133587015, "bits_per_byte": 1.0710276751894423, "num_chars": 29}, {"sum_logits": -30.76902961730957, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -51.52973937988281, "logits_per_token": -3.4187810685899525, "logits_per_char": -0.7692257404327393, "bits_per_byte": 1.1097581610472513, "num_chars": 40}, {"sum_logits": -29.086509704589844, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -50.111419677734375, "logits_per_token": -3.6358137130737305, "logits_per_char": -0.6764304582462755, "bits_per_byte": 0.9758828676188517, "num_chars": 43}, {"sum_logits": -28.11330795288086, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -49.528560638427734, "logits_per_token": -3.5141634941101074, "logits_per_char": -0.5856939156850179, "bits_per_byte": 0.8449777076382211, "num_chars": 48}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 737, "native_id": "Mercury_SC_400063", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.59368896484375, "logits_per_token_corr": -4.296844482421875, "logits_per_char_corr": -0.7812444513494318, "bits_per_byte_corr": 1.127097495684654}, "model_output": [{"sum_logits": -4.270380020141602, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.942916870117188, "logits_per_token": -1.4234600067138672, "logits_per_char": -0.42703800201416015, "bits_per_byte": 0.6160856077774026, "num_chars": 10}, {"sum_logits": -5.870591163635254, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.50050926208496, "logits_per_token": -2.935295581817627, "logits_per_char": -0.5336901057850231, "bits_per_byte": 0.7699520689881122, "num_chars": 11}, {"sum_logits": -11.972883224487305, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.138832092285156, "logits_per_token": -3.9909610748291016, "logits_per_char": -0.9209910172682542, "bits_per_byte": 1.3287091733171466, "num_chars": 13}, {"sum_logits": -8.59368896484375, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.284713745117188, "logits_per_token": -4.296844482421875, "logits_per_char": -0.7812444513494318, "bits_per_byte": 1.127097495684654, "num_chars": 11}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 738, "native_id": "Mercury_SC_401666", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.92163848876953, "logits_per_token_corr": -2.9869397481282554, "logits_per_char_corr": -0.4843686078045819, "bits_per_byte_corr": 0.6987961884424636}, "model_output": [{"sum_logits": -17.2984676361084, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -30.82461929321289, "logits_per_token": -2.4712096623011996, "logits_per_char": -0.6178024155752999, "bits_per_byte": 0.891300481200348, "num_chars": 28}, {"sum_logits": -18.904626846313477, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -27.476606369018555, "logits_per_token": -3.150771141052246, "logits_per_char": -0.6751652445111956, "bits_per_byte": 0.9740575500375862, "num_chars": 28}, {"sum_logits": -24.378746032714844, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -33.48518371582031, "logits_per_token": -3.482678004673549, "logits_per_char": -0.7387498797792377, "bits_per_byte": 1.06579078801559, "num_chars": 33}, {"sum_logits": -17.92163848876953, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -33.863460540771484, "logits_per_token": -2.9869397481282554, "logits_per_char": -0.4843686078045819, "bits_per_byte": 0.6987961884424636, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 739, "native_id": "TIMSS_2011_8_pg31", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.576239585876465, "logits_per_token_corr": -2.2627065976460776, "logits_per_char_corr": -0.5028236883657949, "bits_per_byte_corr": 0.7254212416473511}, "model_output": [{"sum_logits": -11.128666877746582, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.004119873046875, "logits_per_token": -2.2257333755493165, "logits_per_char": -0.5058484944430265, "bits_per_byte": 0.729785114374627, "num_chars": 22}, {"sum_logits": -13.235809326171875, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -29.00918960571289, "logits_per_token": -2.647161865234375, "logits_per_char": -0.529432373046875, "bits_per_byte": 0.7638094590813509, "num_chars": 25}, {"sum_logits": -13.576239585876465, "num_tokens": 6, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -25.873031616210938, "logits_per_token": -2.2627065976460776, "logits_per_char": -0.5028236883657949, "bits_per_byte": 0.7254212416473511, "num_chars": 27}, {"sum_logits": -20.220129013061523, "num_tokens": 8, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -33.2381591796875, "logits_per_token": -2.5275161266326904, "logits_per_char": -0.4595483866604892, "bits_per_byte": 0.662988178484088, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 740, "native_id": "Mercury_412673", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.5374369621276855, "logits_per_token_corr": -2.179145654042562, "logits_per_char_corr": -0.6537436962127685, "bits_per_byte_corr": 0.9431527885392598}, "model_output": [{"sum_logits": -8.064724922180176, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -14.927358627319336, "logits_per_token": -4.032362461090088, "logits_per_char": -0.8064724922180175, "bits_per_byte": 1.163493865137133, "num_chars": 10}, {"sum_logits": -6.5374369621276855, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.160510063171387, "logits_per_token": -2.179145654042562, "logits_per_char": -0.6537436962127685, "bits_per_byte": 0.9431527885392598, "num_chars": 10}, {"sum_logits": -13.392449378967285, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.519989013671875, "logits_per_token": -2.2320748964945474, "logits_per_char": -0.9566035270690918, "bits_per_byte": 1.2880813536270974, "num_chars": 14}, {"sum_logits": -23.006023406982422, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -32.59749984741211, "logits_per_token": -2.300602340698242, "logits_per_char": -1.0002618872601052, "bits_per_byte": 1.443072864341426, "num_chars": 23}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 741, "native_id": "Mercury_7130655", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.388103485107422, "logits_per_token_corr": -2.6388103485107424, "logits_per_char_corr": -0.41231411695480347, "bits_per_byte_corr": 0.5948435318196343}, "model_output": [{"sum_logits": -21.837831497192383, "num_tokens": 10, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -39.366214752197266, "logits_per_token": -2.1837831497192384, "logits_per_char": -0.4963143522089178, "bits_per_byte": 0.7160302546543384, "num_chars": 44}, {"sum_logits": -19.792158126831055, "num_tokens": 11, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -43.785858154296875, "logits_per_token": -1.7992871024391868, "logits_per_char": -0.31922835688437184, "bits_per_byte": 0.46054916738854634, "num_chars": 62}, {"sum_logits": -38.06365966796875, "num_tokens": 11, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -49.94662094116211, "logits_per_token": -3.4603326970880683, "logits_per_char": -0.5361078826474471, "bits_per_byte": 0.77344018367751, "num_chars": 71}, {"sum_logits": -26.388103485107422, "num_tokens": 10, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -36.00413131713867, "logits_per_token": -2.6388103485107424, "logits_per_char": -0.41231411695480347, "bits_per_byte": 0.5948435318196343, "num_chars": 64}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 742, "native_id": "MCAS_2004_5_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.714897155761719, "logits_per_token_corr": -1.102128165108817, "logits_per_char_corr": -0.296726813683143, "bits_per_byte_corr": 0.4280863025997614}, "model_output": [{"sum_logits": -9.925402641296387, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -25.294891357421875, "logits_per_token": -1.417914663042341, "logits_per_char": -0.38174625543447643, "bits_per_byte": 0.5507434295936463, "num_chars": 26}, {"sum_logits": -7.714897155761719, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.674942016601562, "logits_per_token": -1.102128165108817, "logits_per_char": -0.296726813683143, "bits_per_byte": 0.4280863025997614, "num_chars": 26}, {"sum_logits": -7.736444473266602, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.9139347076416, "logits_per_token": -1.1052063533238001, "logits_per_char": -0.29755555666410005, "bits_per_byte": 0.4292819259885605, "num_chars": 26}, {"sum_logits": -11.055742263793945, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -26.40203094482422, "logits_per_token": -1.5793917519705636, "logits_per_char": -0.42522085629976714, "bits_per_byte": 0.6134640206666733, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 743, "native_id": "Mercury_7187373", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -30.918123245239258, "logits_per_token_corr": -3.8647654056549072, "logits_per_char_corr": -0.7927723909035708, "bits_per_byte_corr": 1.14372879691109}, "model_output": [{"sum_logits": -14.42340087890625, "num_tokens": 6, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -27.521068572998047, "logits_per_token": -2.403900146484375, "logits_per_char": -0.46527099609375, "bits_per_byte": 0.6712441587344037, "num_chars": 31}, {"sum_logits": -17.833616256713867, "num_tokens": 6, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -33.211463928222656, "logits_per_token": -2.972269376118978, "logits_per_char": -0.5245181251974667, "bits_per_byte": 0.7567196980793053, "num_chars": 34}, {"sum_logits": -23.322301864624023, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -37.25519561767578, "logits_per_token": -4.664460372924805, "logits_per_char": -0.6859500548418831, "bits_per_byte": 0.9896167424186082, "num_chars": 34}, {"sum_logits": -30.918123245239258, "num_tokens": 8, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -47.16178894042969, "logits_per_token": -3.8647654056549072, "logits_per_char": -0.7927723909035708, "bits_per_byte": 1.14372879691109, "num_chars": 39}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 744, "native_id": "Mercury_SC_401361", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.00337028503418, "logits_per_token_corr": -5.00337028503418, "logits_per_char_corr": -0.6254212856292725, "bits_per_byte_corr": 0.9022921872443995}, "model_output": [{"sum_logits": -5.731470108032227, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.503323554992676, "logits_per_token": -5.731470108032227, "logits_per_char": -0.8187814440046038, "bits_per_byte": 1.181251928838195, "num_chars": 7}, {"sum_logits": -5.555961608886719, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.221264839172363, "logits_per_token": -5.555961608886719, "logits_per_char": -0.6944952011108398, "bits_per_byte": 1.0019447825645118, "num_chars": 8}, {"sum_logits": -5.00337028503418, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.015191078186035, "logits_per_token": -5.00337028503418, "logits_per_char": -0.6254212856292725, "bits_per_byte": 0.9022921872443995, "num_chars": 8}, {"sum_logits": -6.447236061096191, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.186803817749023, "logits_per_token": -6.447236061096191, "logits_per_char": -0.8059045076370239, "bits_per_byte": 1.1626744365988315, "num_chars": 8}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 745, "native_id": "MCAS_2006_8_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.908159255981445, "logits_per_token_corr": -2.2120176951090493, "logits_per_char_corr": -0.4147533178329468, "bits_per_byte_corr": 0.5983625548302662}, "model_output": [{"sum_logits": -25.962139129638672, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -32.74846649169922, "logits_per_token": -3.245267391204834, "logits_per_char": -0.6656958751189404, "bits_per_byte": 0.9603961377750239, "num_chars": 39}, {"sum_logits": -14.960103034973145, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -29.53680419921875, "logits_per_token": -2.137157576424735, "logits_per_char": -0.38359238551213193, "bits_per_byte": 0.5534068323015178, "num_chars": 39}, {"sum_logits": -14.220919609069824, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -28.048519134521484, "logits_per_token": -2.031559944152832, "logits_per_char": -0.37423472655446904, "bits_per_byte": 0.5399065841289576, "num_chars": 38}, {"sum_logits": -19.908159255981445, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -38.46786880493164, "logits_per_token": -2.2120176951090493, "logits_per_char": -0.4147533178329468, "bits_per_byte": 0.5983625548302662, "num_chars": 48}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 746, "native_id": "Mercury_7233765", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.671431064605713, "logits_per_token_corr": -2.3357155323028564, "logits_per_char_corr": -0.4671431064605713, "bits_per_byte_corr": 0.6739450430766154}, "model_output": [{"sum_logits": -3.803722381591797, "num_tokens": 1, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -15.700824737548828, "logits_per_token": -3.803722381591797, "logits_per_char": -0.34579294378107245, "bits_per_byte": 0.49887376516770776, "num_chars": 11}, {"sum_logits": -4.671431064605713, "num_tokens": 2, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -17.041057586669922, "logits_per_token": -2.3357155323028564, "logits_per_char": -0.4671431064605713, "bits_per_byte": 0.6739450430766154, "num_chars": 10}, {"sum_logits": -4.376529693603516, "num_tokens": 2, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -20.851985931396484, "logits_per_token": -2.188264846801758, "logits_per_char": -0.36471080780029297, "bits_per_byte": 0.5261664737724685, "num_chars": 12}, {"sum_logits": -6.810583114624023, "num_tokens": 2, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -19.50847816467285, "logits_per_token": -3.4052915573120117, "logits_per_char": -0.5675485928853353, "bits_per_byte": 0.8187995404197708, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 747, "native_id": "Mercury_SC_407613", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.953895568847656, "logits_per_token_corr": -3.238473892211914, "logits_per_char_corr": -0.6476947784423828, "bits_per_byte_corr": 0.9344260448691728}, "model_output": [{"sum_logits": -18.800172805786133, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.07736587524414, "logits_per_token": -3.7600345611572266, "logits_per_char": -0.6064571872834237, "bits_per_byte": 0.8749327766058933, "num_chars": 31}, {"sum_logits": -14.85995864868164, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -25.845388412475586, "logits_per_token": -3.71498966217041, "logits_per_char": -0.6191649436950684, "bits_per_byte": 0.8932661937618112, "num_chars": 24}, {"sum_logits": -12.953895568847656, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -23.584260940551758, "logits_per_token": -3.238473892211914, "logits_per_char": -0.6476947784423828, "bits_per_byte": 0.9344260448691728, "num_chars": 20}, {"sum_logits": -17.353038787841797, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -26.189414978027344, "logits_per_token": -4.338259696960449, "logits_per_char": -0.8263351803734189, "bits_per_byte": 1.192149666837675, "num_chars": 21}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 748, "native_id": "MCAS_2005_5_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.461844444274902, "logits_per_token_corr": -1.1154611110687256, "logits_per_char_corr": -0.2974562962849935, "bits_per_byte_corr": 0.4291387235318666}, "model_output": [{"sum_logits": -4.461844444274902, "num_tokens": 4, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -18.457468032836914, "logits_per_token": -1.1154611110687256, "logits_per_char": -0.2974562962849935, "bits_per_byte": 0.4291387235318666, "num_chars": 15}, {"sum_logits": -16.122529983520508, "num_tokens": 4, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -21.935718536376953, "logits_per_token": -4.030632495880127, "logits_per_char": -0.8485542096589741, "bits_per_byte": 1.2242049502013352, "num_chars": 19}, {"sum_logits": -13.36099624633789, "num_tokens": 5, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -24.006132125854492, "logits_per_token": -2.672199249267578, "logits_per_char": -0.6362379164922805, "bits_per_byte": 0.9178972869495989, "num_chars": 21}, {"sum_logits": -11.616592407226562, "num_tokens": 5, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -22.487751007080078, "logits_per_token": -2.3233184814453125, "logits_per_char": -0.505069235096807, "bits_per_byte": 0.7286608807802689, "num_chars": 23}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 749, "native_id": "Mercury_405778", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.275463104248047, "logits_per_token_corr": -5.137731552124023, "logits_per_char_corr": -0.6850308736165365, "bits_per_byte_corr": 0.9882906442231214}, "model_output": [{"sum_logits": -3.8083229064941406, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -3.8083229064941406, "logits_per_char": -0.7616645812988281, "bits_per_byte": 1.0988497142613773, "num_chars": 5}, {"sum_logits": -3.7728538513183594, "num_tokens": 1, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -13.372587203979492, "logits_per_token": -3.7728538513183594, "logits_per_char": -0.4716067314147949, "bits_per_byte": 0.6803846926624667, "num_chars": 8}, {"sum_logits": -10.275463104248047, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -20.179704666137695, "logits_per_token": -5.137731552124023, "logits_per_char": -0.6850308736165365, "bits_per_byte": 0.9882906442231214, "num_chars": 15}, {"sum_logits": -4.159963607788086, "num_tokens": 2, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -18.389385223388672, "logits_per_token": -2.079981803894043, "logits_per_char": -0.297140257699149, "bits_per_byte": 0.4286827762313388, "num_chars": 14}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 750, "native_id": "Mercury_7263060", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.563873291015625, "logits_per_token_corr": -4.312774658203125, "logits_per_char_corr": -0.9375597083050272, "bits_per_byte_corr": 1.3526127417099376}, "model_output": [{"sum_logits": -8.374039649963379, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -23.891433715820312, "logits_per_token": -2.0935099124908447, "logits_per_char": -0.3489183187484741, "bits_per_byte": 0.5033827281340999, "num_chars": 24}, {"sum_logits": -16.936330795288086, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -23.321197509765625, "logits_per_token": -4.2340826988220215, "logits_per_char": -0.8913858313309518, "bits_per_byte": 1.2859979183807742, "num_chars": 19}, {"sum_logits": -14.111373901367188, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -24.31084632873535, "logits_per_token": -3.527843475341797, "logits_per_char": -0.7427038895456415, "bits_per_byte": 1.0714952182972113, "num_chars": 19}, {"sum_logits": -21.563873291015625, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.10722351074219, "logits_per_token": -4.312774658203125, "logits_per_char": -0.9375597083050272, "bits_per_byte": 1.3526127417099376, "num_chars": 23}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 751, "native_id": "Mercury_SC_401668", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.281665802001953, "logits_per_token_corr": -2.5868517557779946, "logits_per_char_corr": -0.5543253762381417, "bits_per_byte_corr": 0.7997224713382505}, "model_output": [{"sum_logits": -15.694449424743652, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.014625549316406, "logits_per_token": -3.1388898849487306, "logits_per_char": -0.5231483141581218, "bits_per_byte": 0.7547434784858861, "num_chars": 30}, {"sum_logits": -22.473915100097656, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -31.937637329101562, "logits_per_token": -3.210559300013951, "logits_per_char": -0.7491305033365886, "bits_per_byte": 1.080766862143126, "num_chars": 30}, {"sum_logits": -28.427417755126953, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -30.954620361328125, "logits_per_token": -4.061059679303851, "logits_per_char": -0.6611027384913245, "bits_per_byte": 0.9537696423402323, "num_chars": 43}, {"sum_logits": -23.281665802001953, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -44.29486083984375, "logits_per_token": -2.5868517557779946, "logits_per_char": -0.5543253762381417, "bits_per_byte": 0.7997224713382505, "num_chars": 42}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 752, "native_id": "Mercury_7230388", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.173797607421875, "logits_per_token_corr": -3.5217247009277344, "logits_per_char_corr": -0.4942771510074013, "bits_per_byte_corr": 0.7130911945836155}, "model_output": [{"sum_logits": -19.523408889770508, "num_tokens": 4, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -32.25223922729492, "logits_per_token": -4.880852222442627, "logits_per_char": -0.6101065278053284, "bits_per_byte": 0.8801976620793641, "num_chars": 32}, {"sum_logits": -25.106155395507812, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -34.05959701538086, "logits_per_token": -3.5865936279296875, "logits_per_char": -0.5341735190533577, "bits_per_byte": 0.7706494869130391, "num_chars": 47}, {"sum_logits": -34.8082160949707, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -37.39451217651367, "logits_per_token": -4.972602299281529, "logits_per_char": -0.7567003498906675, "bits_per_byte": 1.0916878422269938, "num_chars": 46}, {"sum_logits": -28.173797607421875, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -35.66963195800781, "logits_per_token": -3.5217247009277344, "logits_per_char": -0.4942771510074013, "bits_per_byte": 0.7130911945836155, "num_chars": 57}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 753, "native_id": "Mercury_7041650", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.858153343200684, "logits_per_token_corr": -1.4858153343200684, "logits_per_char_corr": -0.30322761924899355, "bits_per_byte_corr": 0.4374649825514041}, "model_output": [{"sum_logits": -20.667509078979492, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -27.713502883911133, "logits_per_token": -5.166877269744873, "logits_per_char": -0.6889169692993165, "bits_per_byte": 0.9938970951930921, "num_chars": 30}, {"sum_logits": -22.64617347717285, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.24985885620117, "logits_per_token": -3.2351676395961215, "logits_per_char": -0.6470335279192243, "bits_per_byte": 0.9334720620186262, "num_chars": 35}, {"sum_logits": -3.9990150928497314, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.04414939880371, "logits_per_token": -0.9997537732124329, "logits_per_char": -0.1666256288687388, "bits_per_byte": 0.24038996845410707, "num_chars": 24}, {"sum_logits": -14.858153343200684, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -36.983882904052734, "logits_per_token": -1.4858153343200684, "logits_per_char": -0.30322761924899355, "bits_per_byte": 0.4374649825514041, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 754, "native_id": "Mercury_SC_409009", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.75704574584961, "logits_per_token_corr": -2.3938636779785156, "logits_per_char_corr": -0.4928542866426356, "bits_per_byte_corr": 0.7110384352207089}, "model_output": [{"sum_logits": -16.75704574584961, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -27.830873489379883, "logits_per_token": -2.3938636779785156, "logits_per_char": -0.4928542866426356, "bits_per_byte": 0.7110384352207089, "num_chars": 34}, {"sum_logits": -16.13167953491211, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -31.54566192626953, "logits_per_token": -2.688613255818685, "logits_per_char": -0.46090512956891744, "bits_per_byte": 0.66494554474984, "num_chars": 35}, {"sum_logits": -17.11334228515625, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -29.89238166809082, "logits_per_token": -2.1391677856445312, "logits_per_char": -0.42783355712890625, "bits_per_byte": 0.6172333511962015, "num_chars": 40}, {"sum_logits": -20.275054931640625, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -38.00328826904297, "logits_per_token": -2.2527838812934027, "logits_per_char": -0.422396977742513, "bits_per_byte": 0.6093900250760471, "num_chars": 48}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 755, "native_id": "Mercury_7223143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -25.894372940063477, "logits_per_token_corr": -2.3540339036421343, "logits_per_char_corr": -0.49796871038583607, "bits_per_byte_corr": 0.7184169889920343}, "model_output": [{"sum_logits": -10.688796043395996, "num_tokens": 4, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -19.643007278442383, "logits_per_token": -2.672199010848999, "logits_per_char": -0.5625682128103155, "bits_per_byte": 0.8116143707837924, "num_chars": 19}, {"sum_logits": -21.800031661987305, "num_tokens": 6, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -33.32495880126953, "logits_per_token": -3.6333386103312173, "logits_per_char": -0.838462756230281, "bits_per_byte": 1.2096460603843873, "num_chars": 26}, {"sum_logits": -9.86448860168457, "num_tokens": 8, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -23.823490142822266, "logits_per_token": -1.2330610752105713, "logits_per_char": -0.234868776230585, "bits_per_byte": 0.33884401872776804, "num_chars": 42}, {"sum_logits": -25.894372940063477, "num_tokens": 11, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -40.56902313232422, "logits_per_token": -2.3540339036421343, "logits_per_char": -0.49796871038583607, "bits_per_byte": 0.7184169889920343, "num_chars": 52}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 756, "native_id": "ACTAAP_2007_7_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.909709930419922, "logits_per_token_corr": -3.415672847202846, "logits_per_char_corr": -0.5831636568395103, "bits_per_byte_corr": 0.8413273157496393}, "model_output": [{"sum_logits": -14.189151763916016, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -21.49151611328125, "logits_per_token": -2.364858627319336, "logits_per_char": -0.5675660705566407, "bits_per_byte": 0.8188247553694893, "num_chars": 25}, {"sum_logits": -16.84300422668457, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -23.578983306884766, "logits_per_token": -2.406143460954939, "logits_per_char": -0.6015358652387347, "bits_per_byte": 0.8678328096973978, "num_chars": 28}, {"sum_logits": -22.979475021362305, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -34.048858642578125, "logits_per_token": -3.2827821459089006, "logits_per_char": -0.6383187505933974, "bits_per_byte": 0.9208992959881951, "num_chars": 36}, {"sum_logits": -23.909709930419922, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -36.535614013671875, "logits_per_token": -3.415672847202846, "logits_per_char": -0.5831636568395103, "bits_per_byte": 0.8413273157496393, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 757, "native_id": "Mercury_7215670", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.949432373046875, "logits_per_token_corr": -2.564204624720982, "logits_per_char_corr": -0.44873580932617185, "bits_per_byte_corr": 0.6473889267846287}, "model_output": [{"sum_logits": -22.303974151611328, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -33.58903121948242, "logits_per_token": -2.787996768951416, "logits_per_char": -0.4745526415236453, "bits_per_byte": 0.6846347425674129, "num_chars": 47}, {"sum_logits": -19.7911376953125, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -34.208274841308594, "logits_per_token": -2.4738922119140625, "logits_per_char": -0.42108803607047873, "bits_per_byte": 0.607501621416989, "num_chars": 47}, {"sum_logits": -17.949432373046875, "num_tokens": 7, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -33.1575927734375, "logits_per_token": -2.564204624720982, "logits_per_char": -0.44873580932617185, "bits_per_byte": 0.6473889267846287, "num_chars": 40}, {"sum_logits": -26.371267318725586, "num_tokens": 11, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -34.97409439086914, "logits_per_token": -2.3973879380659624, "logits_per_char": -0.5381891289535834, "bits_per_byte": 0.7764427874022434, "num_chars": 49}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 758, "native_id": "MEA_2010_8_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.30735969543457, "logits_per_token_corr": -3.4724799564906528, "logits_per_char_corr": -0.623265633216271, "bits_per_byte_corr": 0.8991822381982798}, "model_output": [{"sum_logits": -24.30735969543457, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -43.813228607177734, "logits_per_token": -3.4724799564906528, "logits_per_char": -0.623265633216271, "bits_per_byte": 0.8991822381982798, "num_chars": 39}, {"sum_logits": -19.217594146728516, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.917415618896484, "logits_per_token": -2.4021992683410645, "logits_per_char": -0.519394436398068, "bits_per_byte": 0.7493277776573493, "num_chars": 37}, {"sum_logits": -23.1883487701416, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -47.128543853759766, "logits_per_token": -2.5764831966824002, "logits_per_char": -0.6102197044774106, "bits_per_byte": 0.8803609415029215, "num_chars": 38}, {"sum_logits": -14.627017974853516, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.548410415649414, "logits_per_token": -4.875672658284505, "logits_per_char": -0.6648644534024325, "bits_per_byte": 0.9591966497877298, "num_chars": 22}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 759, "native_id": "Mercury_7270515", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -35.30287551879883, "logits_per_token_corr": -3.5302875518798826, "logits_per_char_corr": -0.6537569540518301, "bits_per_byte_corr": 0.9431719155579268}, "model_output": [{"sum_logits": -27.29887580871582, "num_tokens": 11, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.575584411621094, "logits_per_token": -2.4817159826105293, "logits_per_char": -0.49634319652210585, "bits_per_byte": 0.7160718682019327, "num_chars": 55}, {"sum_logits": -35.30287551879883, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -45.894500732421875, "logits_per_token": -3.5302875518798826, "logits_per_char": -0.6537569540518301, "bits_per_byte": 0.9431719155579268, "num_chars": 54}, {"sum_logits": -37.38916015625, "num_tokens": 12, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.958290100097656, "logits_per_token": -3.1157633463541665, "logits_per_char": -0.6030509702620968, "bits_per_byte": 0.8700186442010298, "num_chars": 62}, {"sum_logits": -32.663665771484375, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -40.86981964111328, "logits_per_token": -3.2663665771484376, "logits_per_char": -0.5832797459193638, "bits_per_byte": 0.8414947968894453, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 760, "native_id": "Mercury_7006160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.254087448120117, "logits_per_token_corr": -2.6567609310150146, "logits_per_char_corr": -0.5183923767834175, "bits_per_byte_corr": 0.7478821112206168}, "model_output": [{"sum_logits": -17.975139617919922, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -22.9210205078125, "logits_per_token": -2.995856602986654, "logits_per_char": -0.49930943383110893, "bits_per_byte": 0.7203512440577344, "num_chars": 36}, {"sum_logits": -18.97789764404297, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -35.638404846191406, "logits_per_token": -2.7111282348632812, "logits_per_char": -0.4518547058105469, "bits_per_byte": 0.6518885432756858, "num_chars": 42}, {"sum_logits": -21.254087448120117, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -43.624420166015625, "logits_per_token": -2.6567609310150146, "logits_per_char": -0.5183923767834175, "bits_per_byte": 0.7478821112206168, "num_chars": 41}, {"sum_logits": -19.483959197998047, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -40.77909851074219, "logits_per_token": -2.7834227425711497, "logits_per_char": -0.43297687106662325, "bits_per_byte": 0.6246535847078862, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 761, "native_id": "Mercury_SC_410630", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.941450119018555, "logits_per_token_corr": -3.7353625297546387, "logits_per_char_corr": -0.6225604216257731, "bits_per_byte_corr": 0.8981648329338903}, "model_output": [{"sum_logits": -11.622648239135742, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.07829475402832, "logits_per_token": -2.3245296478271484, "logits_per_char": -0.3874216079711914, "bits_per_byte": 0.5589312325536675, "num_chars": 30}, {"sum_logits": -14.03502368927002, "num_tokens": 4, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -25.53060531616211, "logits_per_token": -3.508755922317505, "logits_per_char": -0.5614009475708008, "bits_per_byte": 0.8099303630113412, "num_chars": 25}, {"sum_logits": -14.941450119018555, "num_tokens": 4, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -30.592041015625, "logits_per_token": -3.7353625297546387, "logits_per_char": -0.6225604216257731, "bits_per_byte": 0.8981648329338903, "num_chars": 24}, {"sum_logits": -9.409482955932617, "num_tokens": 3, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -19.59398651123047, "logits_per_token": -3.1364943186442056, "logits_per_char": -0.5534989974078011, "bits_per_byte": 0.7985302586978215, "num_chars": 17}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 762, "native_id": "Mercury_7082320", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.294837951660156, "logits_per_token_corr": -2.1618547439575195, "logits_per_char_corr": -0.5764945983886719, "bits_per_byte_corr": 0.831705898195209}, "model_output": [{"sum_logits": -17.294837951660156, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -37.90594482421875, "logits_per_token": -2.1618547439575195, "logits_per_char": -0.5764945983886719, "bits_per_byte": 0.831705898195209, "num_chars": 30}, {"sum_logits": -11.873117446899414, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -38.15798568725586, "logits_per_token": -1.6961596352713448, "logits_per_char": -0.3830037886096585, "bits_per_byte": 0.5525576664692361, "num_chars": 31}, {"sum_logits": -24.106346130371094, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.39351272583008, "logits_per_token": -4.017724355061849, "logits_per_char": -0.8035448710123698, "bits_per_byte": 1.1592702005421405, "num_chars": 30}, {"sum_logits": -21.4454402923584, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -44.397300720214844, "logits_per_token": -2.3828266991509333, "logits_per_char": -0.5498830844194461, "bits_per_byte": 0.7933135989612321, "num_chars": 39}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 763, "native_id": "MEA_2013_8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.452507019042969, "logits_per_token_corr": -2.4087511698404946, "logits_per_char_corr": -0.5352780377423322, "bits_per_byte_corr": 0.7722429705481928}, "model_output": [{"sum_logits": -14.452507019042969, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -27.33860969543457, "logits_per_token": -2.4087511698404946, "logits_per_char": -0.5352780377423322, "bits_per_byte": 0.7722429705481928, "num_chars": 27}, {"sum_logits": -21.127750396728516, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -33.01219177246094, "logits_per_token": -2.6409687995910645, "logits_per_char": -0.5281937599182129, "bits_per_byte": 0.762022518063049, "num_chars": 40}, {"sum_logits": -15.253660202026367, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -28.765056610107422, "logits_per_token": -2.1790943145751953, "logits_per_char": -0.6101464080810547, "bits_per_byte": 0.8802551971553838, "num_chars": 25}, {"sum_logits": -30.405954360961914, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -42.57556915283203, "logits_per_token": -3.3784393734402127, "logits_per_char": -0.6756878746880426, "bits_per_byte": 0.9748115460019428, "num_chars": 45}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 764, "native_id": "Mercury_7033845", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.038154602050781, "logits_per_token_corr": -1.7197363717215401, "logits_per_char_corr": -0.2561309489798039, "bits_per_byte_corr": 0.3695188499116126}, "model_output": [{"sum_logits": -23.13243293762207, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -41.92164611816406, "logits_per_token": -2.891554117202759, "logits_per_char": -0.47209046811473615, "bits_per_byte": 0.6810825772005684, "num_chars": 49}, {"sum_logits": -12.038154602050781, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -35.84431457519531, "logits_per_token": -1.7197363717215401, "logits_per_char": -0.2561309489798039, "bits_per_byte": 0.3695188499116126, "num_chars": 47}, {"sum_logits": -23.78604507446289, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -36.55078125, "logits_per_token": -2.9732556343078613, "logits_per_char": -0.5946511268615723, "bits_per_byte": 0.8579002317828406, "num_chars": 40}, {"sum_logits": -10.141061782836914, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.09952163696289, "logits_per_token": -1.4487231118338448, "logits_per_char": -0.24734297031309546, "bits_per_byte": 0.3568404766697053, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 765, "native_id": "Mercury_7221620", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.975189208984375, "logits_per_token_corr": -1.4958648681640625, "logits_per_char_corr": -0.2493108113606771, "bits_per_byte_corr": 0.3596794711903111}, "model_output": [{"sum_logits": -8.975189208984375, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -33.88418960571289, "logits_per_token": -1.4958648681640625, "logits_per_char": -0.2493108113606771, "bits_per_byte": 0.3596794711903111, "num_chars": 36}, {"sum_logits": -21.19023895263672, "num_tokens": 4, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -28.87546730041504, "logits_per_token": -5.29755973815918, "logits_per_char": -0.6232423221363741, "bits_per_byte": 0.8991486074189147, "num_chars": 34}, {"sum_logits": -14.618515014648438, "num_tokens": 4, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -27.28002166748047, "logits_per_token": -3.6546287536621094, "logits_per_char": -0.5622505774864783, "bits_per_byte": 0.8111561198772809, "num_chars": 26}, {"sum_logits": -11.394242286682129, "num_tokens": 4, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.561805725097656, "logits_per_token": -2.8485605716705322, "logits_per_char": -0.47476009527842206, "bits_per_byte": 0.6849340350706433, "num_chars": 24}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 766, "native_id": "LEAP__7_10352", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -33.462154388427734, "logits_per_token_corr": -2.0913846492767334, "logits_per_char_corr": -0.3677159822904147, "bits_per_byte_corr": 0.5305020241063763}, "model_output": [{"sum_logits": -33.63532638549805, "num_tokens": 12, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -58.60203552246094, "logits_per_token": -2.8029438654581704, "logits_per_char": -0.5338940696110801, "bits_per_byte": 0.7702463265884857, "num_chars": 63}, {"sum_logits": -28.358110427856445, "num_tokens": 14, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -55.57072448730469, "logits_per_token": -2.0255793162754605, "logits_per_char": -0.37810813903808593, "bits_per_byte": 0.5454947371103932, "num_chars": 75}, {"sum_logits": -41.145530700683594, "num_tokens": 16, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -68.560546875, "logits_per_token": -2.5715956687927246, "logits_per_char": -0.5275068038549179, "bits_per_byte": 0.7610314499572239, "num_chars": 78}, {"sum_logits": -33.462154388427734, "num_tokens": 16, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -68.2588882446289, "logits_per_token": -2.0913846492767334, "logits_per_char": -0.3677159822904147, "bits_per_byte": 0.5305020241063763, "num_chars": 91}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 767, "native_id": "Mercury_412605", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.257339477539062, "logits_per_token_corr": -1.9071674346923828, "logits_per_char_corr": -0.34675771539861505, "bits_per_byte_corr": 0.5002656363959279}, "model_output": [{"sum_logits": -15.628355026245117, "num_tokens": 6, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -30.930448532104492, "logits_per_token": -2.6047258377075195, "logits_per_char": -0.4735865159468217, "bits_per_byte": 0.6832409179888526, "num_chars": 33}, {"sum_logits": -23.6750431060791, "num_tokens": 8, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -37.882442474365234, "logits_per_token": -2.9593803882598877, "logits_per_char": -0.5636915025256929, "bits_per_byte": 0.8132349352856499, "num_chars": 42}, {"sum_logits": -15.257339477539062, "num_tokens": 8, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -33.940399169921875, "logits_per_token": -1.9071674346923828, "logits_per_char": -0.34675771539861505, "bits_per_byte": 0.5002656363959279, "num_chars": 44}, {"sum_logits": -25.67351531982422, "num_tokens": 9, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -42.686492919921875, "logits_per_token": -2.8526128133138022, "logits_per_char": -0.44264681585903826, "bits_per_byte": 0.6386043661055835, "num_chars": 58}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 768, "native_id": "Mercury_416638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.814090728759766, "logits_per_token_corr": -4.203522682189941, "logits_per_char_corr": -0.7005871136983236, "bits_per_byte_corr": 1.01073355464401}, "model_output": [{"sum_logits": -16.814090728759766, "num_tokens": 4, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.056644439697266, "logits_per_token": -4.203522682189941, "logits_per_char": -0.7005871136983236, "bits_per_byte": 1.01073355464401, "num_chars": 24}, {"sum_logits": -11.62010383605957, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -27.304550170898438, "logits_per_token": -2.324020767211914, "logits_per_char": -0.5281865380027078, "bits_per_byte": 0.7620120990413639, "num_chars": 22}, {"sum_logits": -18.463394165039062, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -22.625732421875, "logits_per_token": -6.1544647216796875, "logits_per_char": -1.1539621353149414, "bits_per_byte": 1.664815449993701, "num_chars": 16}, {"sum_logits": -13.797664642333984, "num_tokens": 4, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -24.33254623413086, "logits_per_token": -3.449416160583496, "logits_per_char": -0.7665369245741103, "bits_per_byte": 1.1058790197421409, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 769, "native_id": "MCAS_2011_8_17694", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.775930404663086, "logits_per_token_corr": -2.9551860809326174, "logits_per_char_corr": -0.36939826011657717, "bits_per_byte_corr": 0.5329290379835802}, "model_output": [{"sum_logits": -14.775930404663086, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -31.805749893188477, "logits_per_token": -2.9551860809326174, "logits_per_char": -0.36939826011657717, "bits_per_byte": 0.5329290379835802, "num_chars": 40}, {"sum_logits": -21.804288864135742, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.26747131347656, "logits_per_token": -5.4510722160339355, "logits_per_char": -0.5318119235155059, "bits_per_byte": 0.7672424247419922, "num_chars": 41}, {"sum_logits": -17.260202407836914, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -34.954593658447266, "logits_per_token": -3.4520404815673826, "logits_per_char": -0.43150506019592283, "bits_per_byte": 0.6225302104635988, "num_chars": 40}, {"sum_logits": -20.387306213378906, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -31.813720703125, "logits_per_token": -3.3978843688964844, "logits_per_char": -0.45305124918619794, "bits_per_byte": 0.6536147904699474, "num_chars": 45}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 770, "native_id": "Mercury_SC_400012", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.09054183959961, "logits_per_token_corr": -2.0100602043999567, "logits_per_char_corr": -0.4412327277951124, "bits_per_byte_corr": 0.6365642682683759}, "model_output": [{"sum_logits": -22.09901237487793, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -40.620208740234375, "logits_per_token": -3.1570017678397044, "logits_per_char": -0.6905941367149353, "bits_per_byte": 0.9963167363063478, "num_chars": 32}, {"sum_logits": -18.971250534057617, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -39.37574005126953, "logits_per_token": -2.710178647722517, "logits_per_char": -0.5420357295445033, "bits_per_byte": 0.7819922589990481, "num_chars": 35}, {"sum_logits": -22.444660186767578, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -49.433197021484375, "logits_per_token": -2.4938511318630643, "logits_per_char": -0.6066124374802048, "bits_per_byte": 0.8751567552948866, "num_chars": 37}, {"sum_logits": -18.09054183959961, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -44.689720153808594, "logits_per_token": -2.0100602043999567, "logits_per_char": -0.4412327277951124, "bits_per_byte": 0.6365642682683759, "num_chars": 41}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 771, "native_id": "Mercury_SC_413458", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.912345886230469, "logits_per_token_corr": -1.6374486287434895, "logits_per_char_corr": -0.2585445203279194, "bits_per_byte_corr": 0.3730008973263731}, "model_output": [{"sum_logits": -8.228677749633789, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -19.08292007446289, "logits_per_token": -2.742892583211263, "logits_per_char": -0.5877626964024135, "bits_per_byte": 0.8479623273198965, "num_chars": 14}, {"sum_logits": -4.912345886230469, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.627723693847656, "logits_per_token": -1.6374486287434895, "logits_per_char": -0.2585445203279194, "bits_per_byte": 0.3730008973263731, "num_chars": 19}, {"sum_logits": -5.625068664550781, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.53705883026123, "logits_per_token": -2.8125343322753906, "logits_per_char": -0.5625068664550781, "bits_per_byte": 0.8115258667013145, "num_chars": 10}, {"sum_logits": -7.836265563964844, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.215225219726562, "logits_per_token": -1.959066390991211, "logits_per_char": -0.6530221303304037, "bits_per_byte": 0.9421117890190965, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 772, "native_id": "Mercury_7139545", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.048173904418945, "logits_per_token_corr": -3.5240869522094727, "logits_per_char_corr": -0.3709565212852077, "bits_per_byte_corr": 0.5351771336439748}, "model_output": [{"sum_logits": -11.060956001281738, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -17.26000213623047, "logits_per_token": -3.6869853337605796, "logits_per_char": -0.6913097500801086, "bits_per_byte": 0.997349148159478, "num_chars": 16}, {"sum_logits": -16.98108673095703, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.221927642822266, "logits_per_token": -8.490543365478516, "logits_per_char": -0.9988874547621783, "bits_per_byte": 1.4410899773926287, "num_chars": 17}, {"sum_logits": -7.048173904418945, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.214725494384766, "logits_per_token": -3.5240869522094727, "logits_per_char": -0.3709565212852077, "bits_per_byte": 0.5351771336439748, "num_chars": 19}, {"sum_logits": -7.142346382141113, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.335933685302734, "logits_per_token": -3.5711731910705566, "logits_per_char": -0.34011173248291016, "bits_per_byte": 0.4906775098016008, "num_chars": 21}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 773, "native_id": "NYSEDREGENTS_2015_4_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.7762842178344727, "logits_per_token_corr": -3.7762842178344727, "logits_per_char_corr": -0.3432985652576793, "bits_per_byte_corr": 0.49527513764190595}, "model_output": [{"sum_logits": -7.083625793457031, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.48602294921875, "logits_per_token": -7.083625793457031, "logits_per_char": -0.544894291804387, "bits_per_byte": 0.7861162925954576, "num_chars": 13}, {"sum_logits": -3.7762842178344727, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.784173965454102, "logits_per_token": -3.7762842178344727, "logits_per_char": -0.3432985652576793, "bits_per_byte": 0.49527513764190595, "num_chars": 11}, {"sum_logits": -10.389568328857422, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.00040054321289, "logits_per_token": -10.389568328857422, "logits_per_char": -0.6926378885904948, "bits_per_byte": 0.9992652470020271, "num_chars": 15}, {"sum_logits": -9.785386085510254, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.348828315734863, "logits_per_token": -9.785386085510254, "logits_per_char": -1.0872651206122503, "bits_per_byte": 1.5685919976399612, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 774, "native_id": "TIMSS_2003_8_pg16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.183996200561523, "logits_per_token_corr": -1.4548566000802177, "logits_per_char_corr": -0.2424761000133696, "bits_per_byte_corr": 0.34981906702363597}, "model_output": [{"sum_logits": -13.377721786499023, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -23.971256256103516, "logits_per_token": -3.344430446624756, "logits_per_char": -0.7432067659166124, "bits_per_byte": 1.0722207155437917, "num_chars": 18}, {"sum_logits": -12.238336563110352, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -26.057205200195312, "logits_per_token": -3.059584140777588, "logits_per_char": -0.48953346252441404, "bits_per_byte": 0.7062474987336828, "num_chars": 25}, {"sum_logits": -10.183996200561523, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -37.31769943237305, "logits_per_token": -1.4548566000802177, "logits_per_char": -0.2424761000133696, "bits_per_byte": 0.34981906702363597, "num_chars": 42}, {"sum_logits": -14.167060852050781, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.62581253051758, "logits_per_token": -2.0238658360072543, "logits_per_char": -0.31482357449001735, "bits_per_byte": 0.4541944096720115, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 775, "native_id": "Mercury_SC_415073", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.149749279022217, "logits_per_token_corr": -7.149749279022217, "logits_per_char_corr": -1.1916248798370361, "bits_per_byte_corr": 1.7191513047420341}, "model_output": [{"sum_logits": -7.149749279022217, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -14.175207138061523, "logits_per_token": -7.149749279022217, "logits_per_char": -1.1916248798370361, "bits_per_byte": 1.7191513047420341, "num_chars": 6}, {"sum_logits": -3.748569965362549, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -3.748569965362549, "logits_per_char": -0.6247616608937582, "bits_per_byte": 0.9013405499096246, "num_chars": 6}, {"sum_logits": -9.440597534179688, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.751949310302734, "logits_per_token": -4.720298767089844, "logits_per_char": -1.5734329223632812, "bits_per_byte": 2.269983874266566, "num_chars": 6}, {"sum_logits": -8.058528900146484, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -14.156341552734375, "logits_per_token": -8.058528900146484, "logits_per_char": -1.6117057800292969, "bits_per_byte": 2.3251999362220155, "num_chars": 5}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 776, "native_id": "Mercury_7012880", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.9381184577941895, "logits_per_token_corr": -2.312706152598063, "logits_per_char_corr": -0.3854510254330105, "bits_per_byte_corr": 0.5560882828981695}, "model_output": [{"sum_logits": -6.9381184577941895, "num_tokens": 3, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -21.32512855529785, "logits_per_token": -2.312706152598063, "logits_per_char": -0.3854510254330105, "bits_per_byte": 0.5560882828981695, "num_chars": 18}, {"sum_logits": -8.74593734741211, "num_tokens": 3, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -26.002342224121094, "logits_per_token": -2.9153124491373696, "logits_per_char": -0.4858854081895616, "bits_per_byte": 0.7009844688358938, "num_chars": 18}, {"sum_logits": -8.513362884521484, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -28.35771942138672, "logits_per_token": -1.418893814086914, "logits_per_char": -0.3274370340200571, "bits_per_byte": 0.47239178518446656, "num_chars": 26}, {"sum_logits": -16.202951431274414, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -28.390228271484375, "logits_per_token": -2.3147073473249162, "logits_per_char": -0.5226758526217553, "bits_per_byte": 0.7540618605703588, "num_chars": 31}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 777, "native_id": "Mercury_191625", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.468875885009766, "logits_per_token_corr": -4.293775177001953, "logits_per_char_corr": -0.6709023714065552, "bits_per_byte_corr": 0.967907524149578}, "model_output": [{"sum_logits": -12.733468055725098, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -29.86857795715332, "logits_per_token": -2.5466936111450194, "logits_per_char": -0.38586266835530597, "bits_per_byte": 0.5566821581007826, "num_chars": 33}, {"sum_logits": -13.988346099853516, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -31.556442260742188, "logits_per_token": -2.797669219970703, "logits_per_char": -0.4114219441133387, "bits_per_byte": 0.5935563984856365, "num_chars": 34}, {"sum_logits": -16.76936912536621, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -24.392974853515625, "logits_per_token": -5.589789708455403, "logits_per_char": -0.6987237135569254, "bits_per_byte": 1.008045236500821, "num_chars": 24}, {"sum_logits": -21.468875885009766, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.29670333862305, "logits_per_token": -4.293775177001953, "logits_per_char": -0.6709023714065552, "bits_per_byte": 0.967907524149578, "num_chars": 32}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 778, "native_id": "Mercury_SC_402985", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.449759483337402, "logits_per_token_corr": -4.224879741668701, "logits_per_char_corr": -0.4447241833335475, "bits_per_byte_corr": 0.6416013738591642}, "model_output": [{"sum_logits": -19.535507202148438, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.89643096923828, "logits_per_token": -6.5118357340494795, "logits_per_char": -1.0281845895867598, "bits_per_byte": 1.4833568085163382, "num_chars": 19}, {"sum_logits": -8.449759483337402, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -21.841793060302734, "logits_per_token": -4.224879741668701, "logits_per_char": -0.4447241833335475, "bits_per_byte": 0.6416013738591642, "num_chars": 19}, {"sum_logits": -12.55783462524414, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.8477840423584, "logits_per_token": -4.18594487508138, "logits_per_char": -0.7386961544261259, "bits_per_byte": 1.0657132787150854, "num_chars": 17}, {"sum_logits": -6.393791675567627, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -16.698421478271484, "logits_per_token": -3.1968958377838135, "logits_per_char": -0.5328159729639689, "bits_per_byte": 0.7686909619020982, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 779, "native_id": "Mercury_7005425", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.499290466308594, "logits_per_token_corr": -2.416548411051432, "logits_per_char_corr": -0.6590586575594816, "bits_per_byte_corr": 0.9508206569166847}, "model_output": [{"sum_logits": -14.499290466308594, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.175220489501953, "logits_per_token": -2.416548411051432, "logits_per_char": -0.6590586575594816, "bits_per_byte": 0.9508206569166847, "num_chars": 22}, {"sum_logits": -17.904766082763672, "num_tokens": 7, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -38.7044792175293, "logits_per_token": -2.557823726109096, "logits_per_char": -0.6174057269918507, "bits_per_byte": 0.8907281805482281, "num_chars": 29}, {"sum_logits": -16.89472007751465, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -32.35844421386719, "logits_per_token": -2.815786679585775, "logits_per_char": -0.5279600024223328, "bits_per_byte": 0.7616852772829719, "num_chars": 32}, {"sum_logits": -16.869626998901367, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.944908142089844, "logits_per_token": -2.8116044998168945, "logits_per_char": -0.6488318076500525, "bits_per_byte": 0.9360664312684251, "num_chars": 26}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 780, "native_id": "MDSA_2013_8_40", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.805110931396484, "logits_per_token_corr": -3.0894567701551647, "logits_per_char_corr": -0.5561022186279296, "bits_per_byte_corr": 0.8022859130424406}, "model_output": [{"sum_logits": -20.95746421813965, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -34.77167892456055, "logits_per_token": -2.3286071353488498, "logits_per_char": -0.4763060049577193, "bits_per_byte": 0.6871643112986293, "num_chars": 44}, {"sum_logits": -30.7904109954834, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -42.341487884521484, "logits_per_token": -3.4211567772759333, "logits_per_char": -0.6414668957392374, "bits_per_byte": 0.9254411093781004, "num_chars": 48}, {"sum_logits": -27.805110931396484, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -42.91740036010742, "logits_per_token": -3.0894567701551647, "logits_per_char": -0.5561022186279296, "bits_per_byte": 0.8022859130424406, "num_chars": 50}, {"sum_logits": -32.02567672729492, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -49.23954391479492, "logits_per_token": -3.5584085252549915, "logits_per_char": -0.49270271888146033, "bits_per_byte": 0.7108197691633026, "num_chars": 65}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 781, "native_id": "Mercury_401684", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.636609077453613, "logits_per_token_corr": -3.212203025817871, "logits_per_char_corr": -0.5668593574972713, "bits_per_byte_corr": 0.817805183943405}, "model_output": [{"sum_logits": -13.861002922058105, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.232810974121094, "logits_per_token": -4.620334307352702, "logits_per_char": -0.9900716372898647, "bits_per_byte": 1.4283714412439306, "num_chars": 14}, {"sum_logits": -9.636609077453613, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.462078094482422, "logits_per_token": -3.212203025817871, "logits_per_char": -0.5668593574972713, "bits_per_byte": 0.817805183943405, "num_chars": 17}, {"sum_logits": -12.569852828979492, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.56130599975586, "logits_per_token": -3.142463207244873, "logits_per_char": -0.6284926414489747, "bits_per_byte": 0.9067232170542925, "num_chars": 20}, {"sum_logits": -12.630966186523438, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.32205581665039, "logits_per_token": -4.2103220621744795, "logits_per_char": -0.7017203436957465, "bits_per_byte": 1.0123684599414797, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 782, "native_id": "NCEOGA_2013_5_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.832301139831543, "logits_per_token_corr": -4.832301139831543, "logits_per_char_corr": -0.5369223488701714, "bits_per_byte_corr": 0.7746152100580067}, "model_output": [{"sum_logits": -5.591407775878906, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -5.591407775878906, "logits_per_char": -0.9319012959798177, "bits_per_byte": 1.344449378309047, "num_chars": 6}, {"sum_logits": -4.832301139831543, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -12.133164405822754, "logits_per_token": -4.832301139831543, "logits_per_char": -0.5369223488701714, "bits_per_byte": 0.7746152100580067, "num_chars": 9}, {"sum_logits": -6.66987419128418, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -11.618191719055176, "logits_per_token": -6.66987419128418, "logits_per_char": -1.333974838256836, "bits_per_byte": 1.924518883825177, "num_chars": 5}, {"sum_logits": -6.180627822875977, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -14.832910537719727, "logits_per_token": -6.180627822875977, "logits_per_char": -0.7725784778594971, "bits_per_byte": 1.114595138706241, "num_chars": 8}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 783, "native_id": "Mercury_7116183", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.20358657836914, "logits_per_token_corr": -2.0407173156738283, "logits_per_char_corr": -0.3001054875990924, "bits_per_byte_corr": 0.43296069870308596}, "model_output": [{"sum_logits": -14.67416000366211, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.244873046875, "logits_per_token": -2.934832000732422, "logits_per_char": -0.4315929412841797, "bits_per_byte": 0.622656996073815, "num_chars": 34}, {"sum_logits": -10.20358657836914, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -25.489166259765625, "logits_per_token": -2.0407173156738283, "logits_per_char": -0.3001054875990924, "bits_per_byte": 0.43296069870308596, "num_chars": 34}, {"sum_logits": -20.720191955566406, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -30.87533950805664, "logits_per_token": -3.4533653259277344, "logits_per_char": -0.5600051879882812, "bits_per_byte": 0.8079167075833655, "num_chars": 37}, {"sum_logits": -16.624526977539062, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -30.628440856933594, "logits_per_token": -2.7707544962565103, "logits_per_char": -0.43748755204050166, "bits_per_byte": 0.6311611217799376, "num_chars": 38}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 784, "native_id": "Mercury_7106628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.950763702392578, "logits_per_token_corr": -3.5901527404785156, "logits_per_char_corr": -0.664843100088614, "bits_per_byte_corr": 0.9591658434677773}, "model_output": [{"sum_logits": -19.669158935546875, "num_tokens": 4, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -29.148162841796875, "logits_per_token": -4.917289733886719, "logits_per_char": -0.6146612167358398, "bits_per_byte": 0.8867686892122095, "num_chars": 32}, {"sum_logits": -12.91513729095459, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -24.90650749206543, "logits_per_token": -2.583027458190918, "logits_per_char": -0.5381307204564413, "bits_per_byte": 0.7763585217530706, "num_chars": 24}, {"sum_logits": -23.39786148071289, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.33633041381836, "logits_per_token": -7.79928716023763, "logits_per_char": -0.779928716023763, "bits_per_byte": 1.125199290855188, "num_chars": 30}, {"sum_logits": -17.950763702392578, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -28.590816497802734, "logits_per_token": -3.5901527404785156, "logits_per_char": -0.664843100088614, "bits_per_byte": 0.9591658434677773, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 785, "native_id": "Mercury_7203473", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.23147201538086, "logits_per_token_corr": -2.11195200139826, "logits_per_char_corr": -0.38084380353083375, "bits_per_byte_corr": 0.5494414667076193}, "model_output": [{"sum_logits": -20.256851196289062, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -39.41929244995117, "logits_per_token": -2.532106399536133, "logits_per_char": -0.494069541372904, "bits_per_byte": 0.7127916771934852, "num_chars": 41}, {"sum_logits": -29.11029052734375, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -36.540382385253906, "logits_per_token": -3.2344767252604165, "logits_per_char": -0.6064643859863281, "bits_per_byte": 0.8749431621388744, "num_chars": 48}, {"sum_logits": -23.82175064086914, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -43.80217742919922, "logits_per_token": -2.6468611823187933, "logits_per_char": -0.4581105892474835, "bits_per_byte": 0.6609138752865401, "num_chars": 52}, {"sum_logits": -23.23147201538086, "num_tokens": 11, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -39.44472885131836, "logits_per_token": -2.11195200139826, "logits_per_char": -0.38084380353083375, "bits_per_byte": 0.5494414667076193, "num_chars": 61}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 786, "native_id": "Mercury_SC_416108", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.187944412231445, "logits_per_token_corr": -3.062648137410482, "logits_per_char_corr": -0.48357602169639186, "bits_per_byte_corr": 0.6976527283946995}, "model_output": [{"sum_logits": -7.771424293518066, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -19.919597625732422, "logits_per_token": -2.590474764506022, "logits_per_char": -0.40902233123779297, "bits_per_byte": 0.5900944888900308, "num_chars": 19}, {"sum_logits": -5.128913879394531, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -20.744873046875, "logits_per_token": -1.709637959798177, "logits_per_char": -0.30170081643497243, "bits_per_byte": 0.4352622717031989, "num_chars": 17}, {"sum_logits": -9.187944412231445, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.928550720214844, "logits_per_token": -3.062648137410482, "logits_per_char": -0.48357602169639186, "bits_per_byte": 0.6976527283946995, "num_chars": 19}, {"sum_logits": -12.732847213745117, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -22.844398498535156, "logits_per_token": -4.244282404581706, "logits_per_char": -0.6063260577973866, "bits_per_byte": 0.874743596746673, "num_chars": 21}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 787, "native_id": "LEAP_2007_8_10418", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.64368438720703, "logits_per_token_corr": -2.520526341029576, "logits_per_char_corr": -0.4524021637745393, "bits_per_byte_corr": 0.6526783581654334}, "model_output": [{"sum_logits": -12.068923950195312, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -30.75870704650879, "logits_per_token": -1.7241319928850447, "logits_per_char": -0.2681983100043403, "bits_per_byte": 0.3869283718183406, "num_chars": 45}, {"sum_logits": -22.13524627685547, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -34.51160430908203, "logits_per_token": -3.6892077128092446, "logits_per_char": -0.6324356079101563, "bits_per_byte": 0.9124117152142349, "num_chars": 35}, {"sum_logits": -17.64368438720703, "num_tokens": 7, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -29.663755416870117, "logits_per_token": -2.520526341029576, "logits_per_char": -0.4524021637745393, "bits_per_byte": 0.6526783581654334, "num_chars": 39}, {"sum_logits": -21.725191116333008, "num_tokens": 8, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -33.741451263427734, "logits_per_token": -2.715648889541626, "logits_per_char": -0.4526081482569377, "bits_per_byte": 0.6529755309566898, "num_chars": 48}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 788, "native_id": "Mercury_7111178", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -34.375144958496094, "logits_per_token_corr": -2.4553674970354353, "logits_per_char_corr": -0.5635269665327228, "bits_per_byte_corr": 0.8129975600245442}, "model_output": [{"sum_logits": -35.164859771728516, "num_tokens": 10, "num_tokens_all": 253, "is_greedy": false, "sum_logits_uncond": -49.0665283203125, "logits_per_token": -3.5164859771728514, "logits_per_char": -0.6634879202212928, "bits_per_byte": 0.9572107321936789, "num_chars": 53}, {"sum_logits": -27.81851577758789, "num_tokens": 14, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -50.2562141418457, "logits_per_token": -1.987036841256278, "logits_per_char": -0.479629582372205, "bits_per_byte": 0.6919592199525219, "num_chars": 58}, {"sum_logits": -34.375144958496094, "num_tokens": 14, "num_tokens_all": 257, "is_greedy": false, "sum_logits_uncond": -53.663597106933594, "logits_per_token": -2.4553674970354353, "logits_per_char": -0.5635269665327228, "bits_per_byte": 0.8129975600245442, "num_chars": 61}, {"sum_logits": -37.429832458496094, "num_tokens": 13, "num_tokens_all": 256, "is_greedy": false, "sum_logits_uncond": -56.64679718017578, "logits_per_token": -2.8792178814227762, "logits_per_char": -0.5941243247380332, "bits_per_byte": 0.8571402169716804, "num_chars": 63}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 789, "native_id": "Mercury_7203560", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.590774536132812, "logits_per_token_corr": -2.215897878011068, "logits_per_char_corr": -0.5657611603432513, "bits_per_byte_corr": 0.8162208203553807}, "model_output": [{"sum_logits": -26.590774536132812, "num_tokens": 12, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -50.2406005859375, "logits_per_token": -2.215897878011068, "logits_per_char": -0.5657611603432513, "bits_per_byte": 0.8162208203553807, "num_chars": 47}, {"sum_logits": -22.96529197692871, "num_tokens": 8, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -40.91669845581055, "logits_per_token": -2.870661497116089, "logits_per_char": -0.5219384540211071, "bits_per_byte": 0.7529980192660444, "num_chars": 44}, {"sum_logits": -28.13724136352539, "num_tokens": 11, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -50.63929748535156, "logits_per_token": -2.5579310330477627, "logits_per_char": -0.5861925284067789, "bits_per_byte": 0.8456970537392303, "num_chars": 48}, {"sum_logits": -26.550395965576172, "num_tokens": 9, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -42.22435760498047, "logits_per_token": -2.9500439961751304, "logits_per_char": -0.5531332492828369, "bits_per_byte": 0.7980025956917209, "num_chars": 48}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 790, "native_id": "ACTAAP_2013_7_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.994075775146484, "logits_per_token_corr": -1.8456981365497296, "logits_per_char_corr": -0.43625592318448153, "bits_per_byte_corr": 0.6293842569371403}, "model_output": [{"sum_logits": -23.994075775146484, "num_tokens": 13, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -35.41367721557617, "logits_per_token": -1.8456981365497296, "logits_per_char": -0.43625592318448153, "bits_per_byte": 0.6293842569371403, "num_chars": 55}, {"sum_logits": -30.042638778686523, "num_tokens": 11, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -49.37651062011719, "logits_per_token": -2.7311489798805932, "logits_per_char": -0.536475692476545, "bits_per_byte": 0.7739708210939401, "num_chars": 56}, {"sum_logits": -28.519420623779297, "num_tokens": 13, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -44.595855712890625, "logits_per_token": -2.1938015864445615, "logits_per_char": -0.5092753682817731, "bits_per_byte": 0.7347290482675424, "num_chars": 56}, {"sum_logits": -27.092897415161133, "num_tokens": 11, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -40.552268981933594, "logits_per_token": -2.4629906741055576, "logits_per_char": -0.475313989739669, "bits_per_byte": 0.6857331358630607, "num_chars": 57}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 791, "native_id": "MCAS_2012_8_23640", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.227489948272705, "logits_per_token_corr": -4.227489948272705, "logits_per_char_corr": -0.4227489948272705, "bits_per_byte_corr": 0.6098978783785354}, "model_output": [{"sum_logits": -2.558708667755127, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.479575157165527, "logits_per_token": -2.558708667755127, "logits_per_char": -0.2326098788868297, "bits_per_byte": 0.3355851187320527, "num_chars": 11}, {"sum_logits": -1.5236362218856812, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": true, "sum_logits_uncond": -15.965056419372559, "logits_per_token": -1.5236362218856812, "logits_per_char": -0.13851238380778919, "bits_per_byte": 0.19983112922134977, "num_chars": 11}, {"sum_logits": -2.849888324737549, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.763668060302734, "logits_per_token": -2.849888324737549, "logits_per_char": -0.23749069372812906, "bits_per_byte": 0.3426266460990976, "num_chars": 12}, {"sum_logits": -4.227489948272705, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.859761238098145, "logits_per_token": -4.227489948272705, "logits_per_char": -0.4227489948272705, "bits_per_byte": 0.6098978783785354, "num_chars": 10}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 792, "native_id": "Mercury_404272", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.879493713378906, "logits_per_token_corr": -2.4066244761149087, "logits_per_char_corr": -0.44429990328275243, "bits_per_byte_corr": 0.6409892671339336}, "model_output": [{"sum_logits": -17.18013572692871, "num_tokens": 14, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -29.504505157470703, "logits_per_token": -1.2271525519234794, "logits_per_char": -0.24898747430331464, "bits_per_byte": 0.3592129944211183, "num_chars": 69}, {"sum_logits": -28.879493713378906, "num_tokens": 12, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -45.642940521240234, "logits_per_token": -2.4066244761149087, "logits_per_char": -0.44429990328275243, "bits_per_byte": 0.6409892671339336, "num_chars": 65}, {"sum_logits": -26.003028869628906, "num_tokens": 12, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -38.94684600830078, "logits_per_token": -2.1669190724690757, "logits_per_char": -0.44072930287506623, "bits_per_byte": 0.6358379796327648, "num_chars": 59}, {"sum_logits": -16.26780128479004, "num_tokens": 10, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.86692237854004, "logits_per_token": -1.626780128479004, "logits_per_char": -0.3189764957801968, "bits_per_byte": 0.4601858086225599, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 793, "native_id": "MCAS_2009_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.577730655670166, "logits_per_token_corr": -2.525910218556722, "logits_per_char_corr": -0.44574886209824505, "bits_per_byte_corr": 0.6430796728314986}, "model_output": [{"sum_logits": -13.856180191040039, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.165912628173828, "logits_per_token": -3.4640450477600098, "logits_per_char": -0.7697877883911133, "bits_per_byte": 1.1105690248495397, "num_chars": 18}, {"sum_logits": -7.577730655670166, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -21.252477645874023, "logits_per_token": -2.525910218556722, "logits_per_char": -0.44574886209824505, "bits_per_byte": 0.6430796728314986, "num_chars": 17}, {"sum_logits": -12.649868965148926, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.583988189697266, "logits_per_token": -3.1624672412872314, "logits_per_char": -0.505994758605957, "bits_per_byte": 0.7299961289571467, "num_chars": 25}, {"sum_logits": -4.357631683349609, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.61882209777832, "logits_per_token": -1.0894079208374023, "logits_per_char": -0.17430526733398438, "bits_per_byte": 0.25146934478374494, "num_chars": 25}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 794, "native_id": "AIMS_2008_4_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.43520164489746, "logits_per_token_corr": -3.9058669408162436, "logits_per_char_corr": -0.498621311593563, "bits_per_byte_corr": 0.7193584935181008}, "model_output": [{"sum_logits": -12.812786102294922, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -28.795333862304688, "logits_per_token": -1.8303980146135603, "logits_per_char": -0.2911996841430664, "bits_per_byte": 0.4201123402219362, "num_chars": 44}, {"sum_logits": -14.393430709838867, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -32.087398529052734, "logits_per_token": -1.7991788387298584, "logits_per_char": -0.31985401577419703, "bits_per_byte": 0.4614518023661859, "num_chars": 45}, {"sum_logits": -23.43520164489746, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -35.485595703125, "logits_per_token": -3.9058669408162436, "logits_per_char": -0.498621311593563, "bits_per_byte": 0.7193584935181008, "num_chars": 47}, {"sum_logits": -12.049878120422363, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -31.804977416992188, "logits_per_token": -1.3388753467135959, "logits_per_char": -0.24591588000861966, "bits_per_byte": 0.3547816205645358, "num_chars": 49}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 795, "native_id": "Mercury_7236513", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.917205333709717, "logits_per_token_corr": -5.917205333709717, "logits_per_char_corr": -0.6574672593010796, "bits_per_byte_corr": 0.9485247545412072}, "model_output": [{"sum_logits": -8.746686935424805, "num_tokens": 2, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -22.12740707397461, "logits_per_token": -4.373343467712402, "logits_per_char": -0.4373343467712402, "bits_per_byte": 0.6309400932977358, "num_chars": 20}, {"sum_logits": -9.612211227416992, "num_tokens": 2, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -21.082500457763672, "logits_per_token": -4.806105613708496, "logits_per_char": -0.600763201713562, "bits_per_byte": 0.8667180918613546, "num_chars": 16}, {"sum_logits": -4.889413356781006, "num_tokens": 1, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -11.17255687713623, "logits_per_token": -4.889413356781006, "logits_per_char": -0.9778826713562012, "bits_per_byte": 1.410786480537857, "num_chars": 5}, {"sum_logits": -5.917205333709717, "num_tokens": 1, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -12.746956825256348, "logits_per_token": -5.917205333709717, "logits_per_char": -0.6574672593010796, "bits_per_byte": 0.9485247545412072, "num_chars": 9}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 796, "native_id": "Mercury_SC_LBS10027", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.79268741607666, "logits_per_token_corr": -1.279268741607666, "logits_per_char_corr": -0.2781019003494926, "bits_per_byte_corr": 0.4012162324962979}, "model_output": [{"sum_logits": -14.149086952209473, "num_tokens": 10, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -34.83010482788086, "logits_per_token": -1.4149086952209473, "logits_per_char": -0.32157015800476074, "bits_per_byte": 0.463927672251682, "num_chars": 44}, {"sum_logits": -12.79268741607666, "num_tokens": 10, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -36.13410568237305, "logits_per_token": -1.279268741607666, "logits_per_char": -0.2781019003494926, "bits_per_byte": 0.4012162324962979, "num_chars": 46}, {"sum_logits": -18.88847541809082, "num_tokens": 10, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.33897399902344, "logits_per_token": -1.888847541809082, "logits_per_char": -0.3854790901651188, "bits_per_byte": 0.556128771748006, "num_chars": 49}, {"sum_logits": -19.583995819091797, "num_tokens": 10, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -32.5799446105957, "logits_per_token": -1.9583995819091797, "logits_per_char": -0.34971421105521067, "bits_per_byte": 0.5045309580181112, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 797, "native_id": "Mercury_189053", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.18807029724121, "logits_per_token_corr": -2.015672524770101, "logits_per_char_corr": -0.49363408769880024, "bits_per_byte_corr": 0.7121634503373184}, "model_output": [{"sum_logits": -13.92536735534668, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -19.383119583129883, "logits_per_token": -2.3208945592244468, "logits_per_char": -0.5157543464943215, "bits_per_byte": 0.7440762380048204, "num_chars": 27}, {"sum_logits": -14.584015846252441, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -21.70053482055664, "logits_per_token": -2.08343083517892, "logits_per_char": -0.42894164253683653, "bits_per_byte": 0.6188319805191052, "num_chars": 34}, {"sum_logits": -31.077396392822266, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -39.51203155517578, "logits_per_token": -2.825217853892933, "logits_per_char": -0.6342325794453524, "bits_per_byte": 0.9150041971366828, "num_chars": 49}, {"sum_logits": -24.18807029724121, "num_tokens": 12, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -29.79852294921875, "logits_per_token": -2.015672524770101, "logits_per_char": -0.49363408769880024, "bits_per_byte": 0.7121634503373184, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 798, "native_id": "Mercury_SC_414271", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.71843719482422, "logits_per_token_corr": -2.071843719482422, "logits_per_char_corr": -0.5053277364591273, "bits_per_byte_corr": 0.7290338194137518}, "model_output": [{"sum_logits": -17.280391693115234, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -38.71271514892578, "logits_per_token": -2.1600489616394043, "logits_per_char": -0.43200979232788084, "bits_per_byte": 0.6232583850073524, "num_chars": 40}, {"sum_logits": -20.694786071777344, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -42.91309356689453, "logits_per_token": -2.2994206746419272, "logits_per_char": -0.5047508797994474, "bits_per_byte": 0.7282015911715273, "num_chars": 41}, {"sum_logits": -20.71843719482422, "num_tokens": 10, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -41.583316802978516, "logits_per_token": -2.071843719482422, "logits_per_char": -0.5053277364591273, "bits_per_byte": 0.7290338194137518, "num_chars": 41}, {"sum_logits": -22.07723617553711, "num_tokens": 9, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -40.12480163574219, "logits_per_token": -2.4530262417263455, "logits_per_char": -0.5134240971055142, "bits_per_byte": 0.7407143987675511, "num_chars": 43}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 799, "native_id": "Mercury_408922", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.39537048339844, "logits_per_token_corr": -2.9450336803089487, "logits_per_char_corr": -0.5399228413899739, "bits_per_byte_corr": 0.7789440057365533}, "model_output": [{"sum_logits": -30.48932647705078, "num_tokens": 8, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -46.00678253173828, "logits_per_token": -3.8111658096313477, "logits_per_char": -0.5863332014817458, "bits_per_byte": 0.8459000020868719, "num_chars": 52}, {"sum_logits": -33.10386657714844, "num_tokens": 9, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -45.2127685546875, "logits_per_token": -3.6782073974609375, "logits_per_char": -0.6018884832208807, "bits_per_byte": 0.8683415299115685, "num_chars": 55}, {"sum_logits": -32.39537048339844, "num_tokens": 11, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -45.818023681640625, "logits_per_token": -2.9450336803089487, "logits_per_char": -0.5399228413899739, "bits_per_byte": 0.7789440057365533, "num_chars": 60}, {"sum_logits": -30.079986572265625, "num_tokens": 12, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -45.88119125366211, "logits_per_token": -2.5066655476888022, "logits_per_char": -0.42366178270796656, "bits_per_byte": 0.6112147529274, "num_chars": 71}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 800, "native_id": "Mercury_7264093", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.604116916656494, "logits_per_token_corr": -7.604116916656494, "logits_per_char_corr": -1.0863024166652135, "bits_per_byte_corr": 1.5672031094297258}, "model_output": [{"sum_logits": -2.4843945503234863, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": true, "sum_logits_uncond": -12.585593223571777, "logits_per_token": -2.4843945503234863, "logits_per_char": -0.3549135071890695, "bits_per_byte": 0.5120319567665479, "num_chars": 7}, {"sum_logits": -2.9053597450256348, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -13.386828422546387, "logits_per_token": -2.9053597450256348, "logits_per_char": -0.32281774944729275, "bits_per_byte": 0.46572756623887973, "num_chars": 9}, {"sum_logits": -7.604116916656494, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -13.623672485351562, "logits_per_token": -7.604116916656494, "logits_per_char": -1.0863024166652135, "bits_per_byte": 1.5672031094297258, "num_chars": 7}, {"sum_logits": -3.722057819366455, "num_tokens": 1, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -14.791035652160645, "logits_per_token": -3.722057819366455, "logits_per_char": -0.5317225456237793, "bits_per_byte": 0.7671134797008331, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 801, "native_id": "Mercury_SC_LBS11009", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.306196212768555, "logits_per_token_corr": -3.8612392425537108, "logits_per_char_corr": -0.8393998353377633, "bits_per_byte_corr": 1.2109979797656736}, "model_output": [{"sum_logits": -14.164535522460938, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.773679733276367, "logits_per_token": -3.5411338806152344, "logits_per_char": -0.5447898277869592, "bits_per_byte": 0.7859655828755631, "num_chars": 26}, {"sum_logits": -8.201189041137695, "num_tokens": 4, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.874645233154297, "logits_per_token": -2.050297260284424, "logits_per_char": -0.37278132005171344, "bits_per_byte": 0.5378097617750348, "num_chars": 22}, {"sum_logits": -19.306196212768555, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.140514373779297, "logits_per_token": -3.8612392425537108, "logits_per_char": -0.8393998353377633, "bits_per_byte": 1.2109979797656736, "num_chars": 23}, {"sum_logits": -9.477922439575195, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -21.017919540405273, "logits_per_token": -1.895584487915039, "logits_per_char": -0.4120835843293563, "bits_per_byte": 0.5945109435441385, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 802, "native_id": "Mercury_7191433", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.47998046875, "logits_per_token_corr": -2.6399972098214284, "logits_per_char_corr": -0.5279994419642857, "bits_per_byte_corr": 0.7617421765145623}, "model_output": [{"sum_logits": -9.825884819030762, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -21.953248977661133, "logits_per_token": -2.4564712047576904, "logits_per_char": -0.42721238343612006, "bits_per_byte": 0.6163371869900875, "num_chars": 23}, {"sum_logits": -7.938161373138428, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -24.64531707763672, "logits_per_token": -1.3230268955230713, "logits_per_char": -0.3053138989668626, "bits_per_byte": 0.44047484795428316, "num_chars": 26}, {"sum_logits": -18.47998046875, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -37.41898727416992, "logits_per_token": -2.6399972098214284, "logits_per_char": -0.5279994419642857, "bits_per_byte": 0.7617421765145623, "num_chars": 35}, {"sum_logits": -23.13967514038086, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.52082824707031, "logits_per_token": -3.85661252339681, "logits_per_char": -0.6611335754394532, "bits_per_byte": 0.9538141306523737, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 803, "native_id": "MEAP_2005_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.0623664855957, "logits_per_token_corr": -4.132795810699463, "logits_per_char_corr": -0.8935774725836676, "bits_per_byte_corr": 1.289159788347477}, "model_output": [{"sum_logits": -28.645282745361328, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -47.61833190917969, "logits_per_token": -3.580660343170166, "logits_per_char": -0.9548427581787109, "bits_per_byte": 1.3775469120541557, "num_chars": 30}, {"sum_logits": -29.459346771240234, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -44.939090728759766, "logits_per_token": -3.6824183464050293, "logits_per_char": -0.950301508749685, "bits_per_byte": 1.3709952740234554, "num_chars": 31}, {"sum_logits": -22.622161865234375, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -43.74177932739258, "logits_per_token": -3.2317374093191966, "logits_per_char": -0.6463474818638393, "bits_per_byte": 0.9324823067767001, "num_chars": 35}, {"sum_logits": -33.0623664855957, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -51.63865661621094, "logits_per_token": -4.132795810699463, "logits_per_char": -0.8935774725836676, "bits_per_byte": 1.289159788347477, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 804, "native_id": "Mercury_416683", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -30.282291412353516, "logits_per_token_corr": -2.523524284362793, "logits_per_char_corr": -0.540755203792027, "bits_per_byte_corr": 0.7801448508462187}, "model_output": [{"sum_logits": -29.524011611938477, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -44.89853286743164, "logits_per_token": -2.9524011611938477, "logits_per_char": -0.5570568228667637, "bits_per_byte": 0.8036631158438191, "num_chars": 53}, {"sum_logits": -37.78936767578125, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -51.1103515625, "logits_per_token": -3.778936767578125, "logits_per_char": -0.6870794122869318, "bits_per_byte": 0.9912460608039722, "num_chars": 55}, {"sum_logits": -30.282291412353516, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -47.440330505371094, "logits_per_token": -2.523524284362793, "logits_per_char": -0.540755203792027, "bits_per_byte": 0.7801448508462187, "num_chars": 56}, {"sum_logits": -27.965835571289062, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -43.83893966674805, "logits_per_token": -2.796583557128906, "logits_per_char": -0.4821695788153287, "bits_per_byte": 0.6956236602248946, "num_chars": 58}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 805, "native_id": "Mercury_7040775", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.017398834228516, "logits_per_token_corr": -1.6724662780761719, "logits_per_char_corr": -0.29514110789579506, "bits_per_byte_corr": 0.4257986127240439}, "model_output": [{"sum_logits": -19.065217971801758, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -25.98949432373047, "logits_per_token": -6.355072657267253, "logits_per_char": -1.0034325248316716, "bits_per_byte": 1.4476471274423843, "num_chars": 19}, {"sum_logits": -5.017398834228516, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -21.252477645874023, "logits_per_token": -1.6724662780761719, "logits_per_char": -0.29514110789579506, "bits_per_byte": 0.4257986127240439, "num_chars": 17}, {"sum_logits": -13.67793083190918, "num_tokens": 3, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -24.14225196838379, "logits_per_token": -4.55931027730306, "logits_per_char": -0.7198910964162726, "bits_per_byte": 1.0385833147806212, "num_chars": 19}, {"sum_logits": -10.074703216552734, "num_tokens": 4, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -24.16815185546875, "logits_per_token": -2.5186758041381836, "logits_per_char": -0.40298812866210937, "bits_per_byte": 0.5813889747583664, "num_chars": 25}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 806, "native_id": "Mercury_7222600", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.673480987548828, "logits_per_token_corr": -4.278913497924805, "logits_per_char_corr": -0.5834882042624734, "bits_per_byte_corr": 0.8417955387072816}, "model_output": [{"sum_logits": -7.137319564819336, "num_tokens": 4, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -20.840530395507812, "logits_per_token": -1.784329891204834, "logits_per_char": -0.28549278259277344, "bits_per_byte": 0.4118790216564811, "num_chars": 25}, {"sum_logits": -32.22673034667969, "num_tokens": 5, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -36.18729782104492, "logits_per_token": -6.445346069335938, "logits_per_char": -1.193582605432581, "bits_per_byte": 1.7219757057501501, "num_chars": 27}, {"sum_logits": -25.673480987548828, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -37.3028564453125, "logits_per_token": -4.278913497924805, "logits_per_char": -0.5834882042624734, "bits_per_byte": 0.8417955387072816, "num_chars": 44}, {"sum_logits": -11.038098335266113, "num_tokens": 6, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -28.612262725830078, "logits_per_token": -1.8396830558776855, "logits_per_char": -0.28302816244272083, "bits_per_byte": 0.40832332638832264, "num_chars": 39}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 807, "native_id": "MCAS_2001_5_3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.725104808807373, "logits_per_token_corr": -7.725104808807373, "logits_per_char_corr": -0.7725104808807373, "bits_per_byte_corr": 1.1144970398021887}, "model_output": [{"sum_logits": -6.886255741119385, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -14.220553398132324, "logits_per_token": -6.886255741119385, "logits_per_char": -0.7651395267910428, "bits_per_byte": 1.1038630008903587, "num_chars": 9}, {"sum_logits": -9.942998886108398, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.75171661376953, "logits_per_token": -4.971499443054199, "logits_per_char": -0.7648460681621845, "bits_per_byte": 1.1034396295817983, "num_chars": 13}, {"sum_logits": -4.5464959144592285, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.488286018371582, "logits_per_token": -4.5464959144592285, "logits_per_char": -0.9092991828918457, "bits_per_byte": 1.311841421843395, "num_chars": 5}, {"sum_logits": -7.725104808807373, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -15.346150398254395, "logits_per_token": -7.725104808807373, "logits_per_char": -0.7725104808807373, "bits_per_byte": 1.1144970398021887, "num_chars": 10}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 808, "native_id": "MCAS_2004_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -51.616214752197266, "logits_per_token_corr": -3.4410809834798175, "logits_per_char_corr": -0.7269889401717925, "bits_per_byte_corr": 1.048823338767722}, "model_output": [{"sum_logits": -42.837158203125, "num_tokens": 10, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -60.44501876831055, "logits_per_token": -4.2837158203125, "logits_per_char": -0.82379150390625, "bits_per_byte": 1.1884799174128617, "num_chars": 52}, {"sum_logits": -47.56757736206055, "num_tokens": 13, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -61.719757080078125, "logits_per_token": -3.659044412466196, "logits_per_char": -0.6258891758165861, "bits_per_byte": 0.9029672100973181, "num_chars": 76}, {"sum_logits": -44.415924072265625, "num_tokens": 12, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -60.94255828857422, "logits_per_token": -3.7013270060221353, "logits_per_char": -0.7528122724112818, "bits_per_byte": 1.086078532128888, "num_chars": 59}, {"sum_logits": -51.616214752197266, "num_tokens": 15, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -71.18649291992188, "logits_per_token": -3.4410809834798175, "logits_per_char": -0.7269889401717925, "bits_per_byte": 1.048823338767722, "num_chars": 71}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 809, "native_id": "Mercury_415268", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.079627990722656, "logits_per_token_corr": -2.134953498840332, "logits_per_char_corr": -0.46161156731682856, "bits_per_byte_corr": 0.665964718985449}, "model_output": [{"sum_logits": -11.759220123291016, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -25.055078506469727, "logits_per_token": -1.9598700205485027, "logits_per_char": -0.4199721472603934, "bits_per_byte": 0.6058917341644943, "num_chars": 28}, {"sum_logits": -15.208221435546875, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.716503143310547, "logits_per_token": -3.041644287109375, "logits_per_char": -0.5244214288119612, "bits_per_byte": 0.7565801946834645, "num_chars": 29}, {"sum_logits": -17.7005558013916, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -29.257741928100586, "logits_per_token": -2.5286508287702287, "logits_per_char": -0.5363804788300486, "bits_per_byte": 0.7738334568383146, "num_chars": 33}, {"sum_logits": -17.079627990722656, "num_tokens": 8, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -30.63159942626953, "logits_per_token": -2.134953498840332, "logits_per_char": -0.46161156731682856, "bits_per_byte": 0.665964718985449, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 810, "native_id": "Mercury_7017710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.387792587280273, "logits_per_token_corr": -1.2984740734100342, "logits_per_char_corr": -0.25969481468200684, "bits_per_byte_corr": 0.3746604212865788}, "model_output": [{"sum_logits": -7.409749984741211, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -28.683246612548828, "logits_per_token": -1.8524374961853027, "logits_per_char": -0.3087395826975505, "bits_per_byte": 0.4454170648842041, "num_chars": 24}, {"sum_logits": -8.44107723236084, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -25.85313606262207, "logits_per_token": -1.688215446472168, "logits_per_char": -0.3376430892944336, "bits_per_byte": 0.4871160105158588, "num_chars": 25}, {"sum_logits": -10.387792587280273, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.48263931274414, "logits_per_token": -1.2984740734100342, "logits_per_char": -0.25969481468200684, "bits_per_byte": 0.3746604212865788, "num_chars": 40}, {"sum_logits": -20.0640926361084, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -40.40981674194336, "logits_per_token": -2.2293436262342663, "logits_per_char": -0.47771649133591426, "bits_per_byte": 0.6891992130016941, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 811, "native_id": "Mercury_7210123", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.253050804138184, "logits_per_token_corr": -2.626525402069092, "logits_per_char_corr": -0.656631350517273, "bits_per_byte_corr": 0.947318793084173}, "model_output": [{"sum_logits": -5.253050804138184, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -13.936994552612305, "logits_per_token": -2.626525402069092, "logits_per_char": -0.656631350517273, "bits_per_byte": 0.947318793084173, "num_chars": 8}, {"sum_logits": -4.378840923309326, "num_tokens": 3, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -17.095348358154297, "logits_per_token": -1.4596136411031086, "logits_per_char": -0.36490341027577716, "bits_per_byte": 0.5264443404087127, "num_chars": 12}, {"sum_logits": -11.801403999328613, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -14.88875961303711, "logits_per_token": -5.900701999664307, "logits_per_char": -0.9078003076406626, "bits_per_byte": 1.3096790019516, "num_chars": 13}, {"sum_logits": -16.570716857910156, "num_tokens": 3, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -21.162639617919922, "logits_per_token": -5.523572285970052, "logits_per_char": -0.8285358428955079, "bits_per_byte": 1.1953245517449653, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 812, "native_id": "MCAS_2009_5_6519", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.5887069702148438, "logits_per_token_corr": -0.7177413940429688, "logits_per_char_corr": -0.17089080810546875, "bits_per_byte_corr": 0.24654332138744436}, "model_output": [{"sum_logits": -8.139997482299805, "num_tokens": 2, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -18.9022216796875, "logits_per_token": -4.069998741149902, "logits_per_char": -0.7399997711181641, "bits_per_byte": 1.0675940000519102, "num_chars": 11}, {"sum_logits": -11.208895683288574, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -23.29722785949707, "logits_per_token": -3.7362985610961914, "logits_per_char": -0.6227164268493652, "bits_per_byte": 0.8983899008963196, "num_chars": 18}, {"sum_logits": -3.5887069702148438, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -26.954734802246094, "logits_per_token": -0.7177413940429688, "logits_per_char": -0.17089080810546875, "bits_per_byte": 0.24654332138744436, "num_chars": 21}, {"sum_logits": -13.038074493408203, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -23.622289657592773, "logits_per_token": -2.6076148986816405, "logits_per_char": -0.5215229797363281, "bits_per_byte": 0.7523986165757764, "num_chars": 25}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 813, "native_id": "Mercury_401502", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.048532485961914, "logits_per_token_corr": -5.409706497192383, "logits_per_char_corr": -3.3810665607452393, "bits_per_byte_corr": 4.877847960106164}, "model_output": [{"sum_logits": -21.79345703125, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -21.261497497558594, "logits_per_token": -4.35869140625, "logits_per_char": -3.1133510044642856, "bits_per_byte": 4.491616054690525, "num_chars": 7}, {"sum_logits": -20.623798370361328, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -22.138545989990234, "logits_per_token": -4.124759674072266, "logits_per_char": -2.9462569100516185, "bits_per_byte": 4.250550233319364, "num_chars": 7}, {"sum_logits": -27.006561279296875, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -26.643646240234375, "logits_per_token": -5.401312255859375, "logits_per_char": -3.3758201599121094, "bits_per_byte": 4.870279003641687, "num_chars": 8}, {"sum_logits": -27.048532485961914, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -25.88971710205078, "logits_per_token": -5.409706497192383, "logits_per_char": -3.3810665607452393, "bits_per_byte": 4.877847960106164, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 814, "native_id": "Mercury_7109498", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.594131469726562, "logits_per_token_corr": -3.2277330671037947, "logits_per_char_corr": -0.4807262014835439, "bits_per_byte_corr": 0.6935413069061956}, "model_output": [{"sum_logits": -41.90105056762695, "num_tokens": 8, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -42.62413787841797, "logits_per_token": -5.237631320953369, "logits_per_char": -0.9976440611339751, "bits_per_byte": 1.4392961395713457, "num_chars": 42}, {"sum_logits": -22.594131469726562, "num_tokens": 7, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -41.972557067871094, "logits_per_token": -3.2277330671037947, "logits_per_char": -0.4807262014835439, "bits_per_byte": 0.6935413069061956, "num_chars": 47}, {"sum_logits": -34.244110107421875, "num_tokens": 10, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -51.5762939453125, "logits_per_token": -3.4244110107421877, "logits_per_char": -0.6714531393612132, "bits_per_byte": 0.9687021143464443, "num_chars": 51}, {"sum_logits": -34.5264892578125, "num_tokens": 13, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -50.18081283569336, "logits_per_token": -2.6558837890625, "logits_per_char": -0.6165444510323661, "bits_per_byte": 0.8894856219926419, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 815, "native_id": "VASoL_2008_5_10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.39730453491211, "logits_per_token_corr": -4.132434844970703, "logits_per_char_corr": -0.9536388103778546, "bits_per_byte_corr": 1.3758099825323697}, "model_output": [{"sum_logits": -12.39730453491211, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.191450119018555, "logits_per_token": -4.132434844970703, "logits_per_char": -0.9536388103778546, "bits_per_byte": 1.3758099825323697, "num_chars": 13}, {"sum_logits": -11.685213088989258, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.36844253540039, "logits_per_token": -2.9213032722473145, "logits_per_char": -0.7303258180618286, "bits_per_byte": 1.0536374359517324, "num_chars": 16}, {"sum_logits": -7.762219429016113, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -15.70174503326416, "logits_per_token": -3.8811097145080566, "logits_per_char": -0.5970938022320087, "bits_per_byte": 0.8614242674262733, "num_chars": 13}, {"sum_logits": -11.068681716918945, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -15.25369930267334, "logits_per_token": -2.7671704292297363, "logits_per_char": -0.5031218962235884, "bits_per_byte": 0.7258514646449441, "num_chars": 22}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 816, "native_id": "MCAS_2006_9_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.762531280517578, "logits_per_token_corr": -2.8453164100646973, "logits_per_char_corr": -0.599013981066252, "bits_per_byte_corr": 0.8641944999080581}, "model_output": [{"sum_logits": -22.762531280517578, "num_tokens": 8, "num_tokens_all": 262, "is_greedy": false, "sum_logits_uncond": -45.30516815185547, "logits_per_token": -2.8453164100646973, "logits_per_char": -0.599013981066252, "bits_per_byte": 0.8641944999080581, "num_chars": 38}, {"sum_logits": -13.665404319763184, "num_tokens": 11, "num_tokens_all": 265, "is_greedy": false, "sum_logits_uncond": -34.19749450683594, "logits_per_token": -1.242309483614835, "logits_per_char": -0.30367565155029297, "bits_per_byte": 0.43811135653064726, "num_chars": 45}, {"sum_logits": -14.226067543029785, "num_tokens": 12, "num_tokens_all": 266, "is_greedy": false, "sum_logits_uncond": -40.03036880493164, "logits_per_token": -1.1855056285858154, "logits_per_char": -0.29637640714645386, "bits_per_byte": 0.4275807728269845, "num_chars": 48}, {"sum_logits": -33.889469146728516, "num_tokens": 20, "num_tokens_all": 274, "is_greedy": false, "sum_logits_uncond": -63.63440704345703, "logits_per_token": -1.6944734573364257, "logits_per_char": -0.42898062211048754, "bits_per_byte": 0.6188882161567074, "num_chars": 79}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 817, "native_id": "Mercury_402341", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.41059398651123, "logits_per_token_corr": -1.682118797302246, "logits_per_char_corr": -0.5607062657674153, "bits_per_byte_corr": 0.8089281490186004}, "model_output": [{"sum_logits": -7.775211334228516, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -20.528701782226562, "logits_per_token": -1.2958685557047527, "logits_per_char": -0.518347422281901, "bits_per_byte": 0.7478172555842132, "num_chars": 15}, {"sum_logits": -9.860603332519531, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -24.07942771911621, "logits_per_token": -1.6434338887532551, "logits_per_char": -0.657373555501302, "bits_per_byte": 0.9483895685339556, "num_chars": 15}, {"sum_logits": -9.791293144226074, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -26.370277404785156, "logits_per_token": -1.9582586288452148, "logits_per_char": -0.6527528762817383, "bits_per_byte": 0.9417233375383475, "num_chars": 15}, {"sum_logits": -8.41059398651123, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -23.860300064086914, "logits_per_token": -1.682118797302246, "logits_per_char": -0.5607062657674153, "bits_per_byte": 0.8089281490186004, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 818, "native_id": "MCAS_2006_9_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.698830604553223, "logits_per_token_corr": -3.8494153022766113, "logits_per_char_corr": -1.5397661209106446, "bits_per_byte_corr": 2.221412946768219}, "model_output": [{"sum_logits": -9.319801330566406, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -20.69024085998535, "logits_per_token": -2.3299503326416016, "logits_per_char": -1.3314001900809151, "bits_per_byte": 1.9208044516697396, "num_chars": 7}, {"sum_logits": -6.713293075561523, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -15.318441390991211, "logits_per_token": -3.3566465377807617, "logits_per_char": -1.6783232688903809, "bits_per_byte": 2.421308657038446, "num_chars": 4}, {"sum_logits": -6.915741920471191, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -16.691875457763672, "logits_per_token": -3.4578709602355957, "logits_per_char": -1.3831483840942382, "bits_per_byte": 1.9954613145477742, "num_chars": 5}, {"sum_logits": -7.698830604553223, "num_tokens": 2, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.97145652770996, "logits_per_token": -3.8494153022766113, "logits_per_char": -1.5397661209106446, "bits_per_byte": 2.221412946768219, "num_chars": 5}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 819, "native_id": "Mercury_7267715", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.186208724975586, "logits_per_token_corr": -4.31231553213937, "logits_per_char_corr": -0.7362489932920875, "bits_per_byte_corr": 1.0621827714827494}, "model_output": [{"sum_logits": -17.967283248901367, "num_tokens": 5, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -29.095802307128906, "logits_per_token": -3.5934566497802733, "logits_per_char": -0.748636802037557, "bits_per_byte": 1.080054601727332, "num_chars": 24}, {"sum_logits": -15.43290901184082, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -29.573200225830078, "logits_per_token": -2.2047012874058316, "logits_per_char": -0.45390908858355355, "bits_per_byte": 0.6548523911143924, "num_chars": 34}, {"sum_logits": -30.186208724975586, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -44.79627227783203, "logits_per_token": -4.31231553213937, "logits_per_char": -0.7362489932920875, "bits_per_byte": 1.0621827714827494, "num_chars": 41}, {"sum_logits": -17.794769287109375, "num_tokens": 9, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -36.49136734008789, "logits_per_token": -1.9771965874565973, "logits_per_char": -0.38684281058933423, "bits_per_byte": 0.558096204441182, "num_chars": 46}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 820, "native_id": "Mercury_SC_413089", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.423442840576172, "logits_per_token_corr": -2.0470492045084634, "logits_per_char_corr": -0.5582861466841265, "bits_per_byte_corr": 0.8054366552187763}, "model_output": [{"sum_logits": -25.064258575439453, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -37.29254150390625, "logits_per_token": -2.7849176194932728, "logits_per_char": -0.6113233798887672, "bits_per_byte": 0.8819532085456379, "num_chars": 41}, {"sum_logits": -17.16303253173828, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.25715446472168, "logits_per_token": -1.9070036146375868, "logits_per_char": -0.4767509036593967, "bits_per_byte": 0.6878061644492377, "num_chars": 36}, {"sum_logits": -24.367359161376953, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -36.096717834472656, "logits_per_token": -2.436735916137695, "logits_per_char": -0.6248040810609475, "bits_per_byte": 0.9014017492744625, "num_chars": 39}, {"sum_logits": -18.423442840576172, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -29.727397918701172, "logits_per_token": -2.0470492045084634, "logits_per_char": -0.5582861466841265, "bits_per_byte": 0.8054366552187763, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 821, "native_id": "Mercury_SC_401656", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.302616119384766, "logits_per_token_corr": -3.7171026865641275, "logits_per_char_corr": -0.6758368521025686, "bits_per_byte_corr": 0.975026474979084}, "model_output": [{"sum_logits": -22.302616119384766, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -33.6739616394043, "logits_per_token": -3.7171026865641275, "logits_per_char": -0.6758368521025686, "bits_per_byte": 0.975026474979084, "num_chars": 33}, {"sum_logits": -19.45900535583496, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -27.22823715209961, "logits_per_token": -2.779857907976423, "logits_per_char": -0.5559715815952846, "bits_per_byte": 0.8020974436432871, "num_chars": 35}, {"sum_logits": -21.45479965209961, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -33.40212631225586, "logits_per_token": -3.5757999420166016, "logits_per_char": -0.5798594500567462, "bits_per_byte": 0.8365603530100704, "num_chars": 37}, {"sum_logits": -24.735231399536133, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -36.835121154785156, "logits_per_token": -3.533604485648019, "logits_per_char": -0.852939013777108, "bits_per_byte": 1.230530885357841, "num_chars": 29}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 822, "native_id": "Mercury_407019", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.504253387451172, "logits_per_token_corr": -1.3130316734313965, "logits_per_char_corr": -0.22835333450980808, "bits_per_byte_corr": 0.3294442232679954}, "model_output": [{"sum_logits": -6.869935989379883, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -25.13689422607422, "logits_per_token": -1.1449893315633137, "logits_per_char": -0.1962838854108538, "bits_per_byte": 0.28317778808885974, "num_chars": 35}, {"sum_logits": -28.008777618408203, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -44.171722412109375, "logits_per_token": -4.001253945486886, "logits_per_char": -0.7181737850873898, "bits_per_byte": 1.036105758242778, "num_chars": 39}, {"sum_logits": -24.363548278808594, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -40.447044372558594, "logits_per_token": -3.045443534851074, "logits_per_char": -0.580084482828776, "bits_per_byte": 0.8368850066743154, "num_chars": 42}, {"sum_logits": -10.504253387451172, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -29.94497299194336, "logits_per_token": -1.3130316734313965, "logits_per_char": -0.22835333450980808, "bits_per_byte": 0.3294442232679954, "num_chars": 46}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 823, "native_id": "Mercury_417128", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.2584285736084, "logits_per_token_corr": -3.28230357170105, "logits_per_char_corr": -0.596782467582009, "bits_per_byte_corr": 0.8609751064706616}, "model_output": [{"sum_logits": -27.19171714782715, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -40.855918884277344, "logits_per_token": -3.3989646434783936, "logits_per_char": -0.6797929286956788, "bits_per_byte": 0.980733887061345, "num_chars": 40}, {"sum_logits": -26.2584285736084, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -41.65824890136719, "logits_per_token": -3.28230357170105, "logits_per_char": -0.596782467582009, "bits_per_byte": 0.8609751064706616, "num_chars": 44}, {"sum_logits": -19.67538833618164, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.795841217041016, "logits_per_token": -1.967538833618164, "logits_per_char": -0.3783728526188777, "bits_per_byte": 0.5458766380806577, "num_chars": 52}, {"sum_logits": -32.34308624267578, "num_tokens": 11, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -50.90252685546875, "logits_per_token": -2.94028056751598, "logits_per_char": -0.5674225656609786, "bits_per_byte": 0.8186177215681741, "num_chars": 57}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 824, "native_id": "Mercury_7081305", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.108444213867188, "logits_per_token_corr": -1.5135555267333984, "logits_per_char_corr": -0.31047292856069714, "bits_per_byte_corr": 0.447917754365113}, "model_output": [{"sum_logits": -16.976531982421875, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -37.6679573059082, "logits_per_token": -2.4252188546316966, "logits_per_char": -0.48504377092633927, "bits_per_byte": 0.6997702429300148, "num_chars": 35}, {"sum_logits": -15.286589622497559, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -31.31279182434082, "logits_per_token": -2.5477649370829263, "logits_per_char": -0.43675970349993026, "bits_per_byte": 0.6301110582999361, "num_chars": 35}, {"sum_logits": -12.789122581481934, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.204654693603516, "logits_per_token": -1.5986403226852417, "logits_per_char": -0.36540350232805524, "bits_per_byte": 0.5271658207325228, "num_chars": 35}, {"sum_logits": -12.108444213867188, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.031612396240234, "logits_per_token": -1.5135555267333984, "logits_per_char": -0.31047292856069714, "bits_per_byte": 0.447917754365113, "num_chars": 39}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 825, "native_id": "NYSEDREGENTS_2015_8_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.70068359375, "logits_per_token_corr": -2.242954799107143, "logits_per_char_corr": -0.39251708984375, "bits_per_byte_corr": 0.5662824589821527}, "model_output": [{"sum_logits": -17.956096649169922, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -36.263240814208984, "logits_per_token": -2.5651566641671315, "logits_per_char": -0.47252885918868215, "bits_per_byte": 0.6817150418289208, "num_chars": 38}, {"sum_logits": -15.70068359375, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -35.75253677368164, "logits_per_token": -2.242954799107143, "logits_per_char": -0.39251708984375, "bits_per_byte": 0.5662824589821527, "num_chars": 40}, {"sum_logits": -21.974063873291016, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.44438934326172, "logits_per_token": -2.746757984161377, "logits_per_char": -0.5359527773973418, "bits_per_byte": 0.7732164141023671, "num_chars": 41}, {"sum_logits": -16.386943817138672, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -29.775653839111328, "logits_per_token": -2.7311573028564453, "logits_per_char": -0.39968155651557735, "bits_per_byte": 0.5766185995202197, "num_chars": 41}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 826, "native_id": "MEA_2016_8_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.105133056640625, "logits_per_token_corr": -1.925427754720052, "logits_per_char_corr": -0.45304182464001225, "bits_per_byte_corr": 0.6536011937239027}, "model_output": [{"sum_logits": -23.105133056640625, "num_tokens": 12, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.40865707397461, "logits_per_token": -1.925427754720052, "logits_per_char": -0.45304182464001225, "bits_per_byte": 0.6536011937239027, "num_chars": 51}, {"sum_logits": -22.368993759155273, "num_tokens": 12, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.124237060546875, "logits_per_token": -1.8640828132629395, "logits_per_char": -0.4759360374288356, "bits_per_byte": 0.6866305609794185, "num_chars": 47}, {"sum_logits": -20.247085571289062, "num_tokens": 12, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.02021789550781, "logits_per_token": -1.6872571309407551, "logits_per_char": -0.43078905470827794, "bits_per_byte": 0.6214972328973234, "num_chars": 47}, {"sum_logits": -24.296161651611328, "num_tokens": 12, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.377281188964844, "logits_per_token": -2.0246801376342773, "logits_per_char": -0.46723387791560245, "bits_per_byte": 0.6740759986046433, "num_chars": 52}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 827, "native_id": "ACTAAP_2015_7_9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.93670082092285, "logits_per_token_corr": -2.2420876026153564, "logits_per_char_corr": -0.5275500241447898, "bits_per_byte_corr": 0.7610938036550879}, "model_output": [{"sum_logits": -14.242658615112305, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -22.825660705566406, "logits_per_token": -2.034665516444615, "logits_per_char": -0.4594406004874937, "bits_per_byte": 0.6628326759068308, "num_chars": 31}, {"sum_logits": -16.401126861572266, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.93867301940918, "logits_per_token": -2.343018123081752, "logits_per_char": -0.49700384429006866, "bits_per_byte": 0.7170249814605477, "num_chars": 33}, {"sum_logits": -21.903858184814453, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.4698429107666, "logits_per_token": -2.7379822731018066, "logits_per_char": -0.6844955682754517, "bits_per_byte": 0.9875183618621765, "num_chars": 32}, {"sum_logits": -17.93670082092285, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -26.281322479248047, "logits_per_token": -2.2420876026153564, "logits_per_char": -0.5275500241447898, "bits_per_byte": 0.7610938036550879, "num_chars": 34}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 828, "native_id": "Mercury_7216423", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.831823348999023, "logits_per_token_corr": -6.915911674499512, "logits_per_char_corr": -0.6586582547142392, "bits_per_byte_corr": 0.9502429977174953}, "model_output": [{"sum_logits": -20.701404571533203, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -32.17768859863281, "logits_per_token": -6.900468190511067, "logits_per_char": -0.9000610683275305, "bits_per_byte": 1.2985136397742836, "num_chars": 23}, {"sum_logits": -13.831823348999023, "num_tokens": 2, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.864404678344727, "logits_per_token": -6.915911674499512, "logits_per_char": -0.6586582547142392, "bits_per_byte": 0.9502429977174953, "num_chars": 21}, {"sum_logits": -7.129034042358398, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -13.277413368225098, "logits_per_token": -7.129034042358398, "logits_per_char": -0.7921148935953776, "bits_per_byte": 1.1427802288051612, "num_chars": 9}, {"sum_logits": -6.796692371368408, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.87851333618164, "logits_per_token": -2.2655641237894693, "logits_per_char": -0.4854780265263149, "bits_per_byte": 0.7003967413305783, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 829, "native_id": "Mercury_416633", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.6571550369262695, "logits_per_token_corr": -2.8285775184631348, "logits_per_char_corr": -0.47142958641052246, "bits_per_byte_corr": 0.6801291264432845}, "model_output": [{"sum_logits": -5.6571550369262695, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.916326522827148, "logits_per_token": -2.8285775184631348, "logits_per_char": -0.47142958641052246, "bits_per_byte": 0.6801291264432845, "num_chars": 12}, {"sum_logits": -6.0898919105529785, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.012014389038086, "logits_per_token": -3.0449459552764893, "logits_per_char": -0.4059927940368652, "bits_per_byte": 0.5857237905940605, "num_chars": 15}, {"sum_logits": -6.885715484619141, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.070293426513672, "logits_per_token": -6.885715484619141, "logits_per_char": -0.8607144355773926, "bits_per_byte": 1.2417484478299396, "num_chars": 8}, {"sum_logits": -4.169122695922852, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.700695037841797, "logits_per_token": -2.084561347961426, "logits_per_char": -0.2605701684951782, "bits_per_byte": 0.3759232898918653, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 830, "native_id": "Mercury_7038518", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.43756103515625, "logits_per_token_corr": -3.4062601725260415, "logits_per_char_corr": -0.5109390258789063, "bits_per_byte_corr": 0.7371291988326654}, "model_output": [{"sum_logits": -16.008283615112305, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.907135009765625, "logits_per_token": -5.3360945383707685, "logits_per_char": -0.9416637420654297, "bits_per_byte": 1.3585336108637154, "num_chars": 17}, {"sum_logits": -10.293037414550781, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -22.08258056640625, "logits_per_token": -2.5732593536376953, "logits_per_char": -0.4901446387881324, "bits_per_byte": 0.7071292396984589, "num_chars": 21}, {"sum_logits": -18.52056312561035, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.126056671142578, "logits_per_token": -3.0867605209350586, "logits_per_char": -0.6173521041870117, "bits_per_byte": 0.8906508191936083, "num_chars": 30}, {"sum_logits": -20.43756103515625, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.957387924194336, "logits_per_token": -3.4062601725260415, "logits_per_char": -0.5109390258789063, "bits_per_byte": 0.7371291988326654, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 831, "native_id": "Mercury_7085225", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.305364608764648, "logits_per_token_corr": -4.652682304382324, "logits_per_char_corr": -1.163170576095581, "bits_per_byte_corr": 1.678100421842259}, "model_output": [{"sum_logits": -6.814028739929199, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -13.043850898742676, "logits_per_token": -6.814028739929199, "logits_per_char": -0.8517535924911499, "bits_per_byte": 1.2288206839472238, "num_chars": 8}, {"sum_logits": -9.305364608764648, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -15.953125, "logits_per_token": -4.652682304382324, "logits_per_char": -1.163170576095581, "bits_per_byte": 1.678100421842259, "num_chars": 8}, {"sum_logits": -7.450700283050537, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -18.27857208251953, "logits_per_token": -3.7253501415252686, "logits_per_char": -0.9313375353813171, "bits_per_byte": 1.343636043689341, "num_chars": 8}, {"sum_logits": -4.854723930358887, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -16.235103607177734, "logits_per_token": -2.4273619651794434, "logits_per_char": -0.44133853912353516, "bits_per_byte": 0.6367169217471613, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 832, "native_id": "LEAP__4_10225", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.358041763305664, "logits_per_token_corr": -1.7263402938842773, "logits_per_char_corr": -0.3836311764187283, "bits_per_byte_corr": 0.5534627957500959}, "model_output": [{"sum_logits": -9.532883644104004, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -22.036930084228516, "logits_per_token": -1.3618405205862862, "logits_per_char": -0.3075123756162582, "bits_per_byte": 0.44364657931387863, "num_chars": 31}, {"sum_logits": -10.358041763305664, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -21.544422149658203, "logits_per_token": -1.7263402938842773, "logits_per_char": -0.3836311764187283, "bits_per_byte": 0.5534627957500959, "num_chars": 27}, {"sum_logits": -16.476964950561523, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -25.352523803710938, "logits_per_token": -2.3538521357945035, "logits_per_char": -0.5681712051917767, "bits_per_byte": 0.8196977801066707, "num_chars": 29}, {"sum_logits": -15.582659721374512, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -25.6110782623291, "logits_per_token": -2.2260942459106445, "logits_per_char": -0.4869581162929535, "bits_per_byte": 0.7025320594969798, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 833, "native_id": "Mercury_SC_401661", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.95820426940918, "logits_per_token_corr": -4.3194014231363935, "logits_per_char_corr": -0.647910213470459, "bits_per_byte_corr": 0.9347368519158124}, "model_output": [{"sum_logits": -16.87689971923828, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.866588592529297, "logits_per_token": -4.21922492980957, "logits_per_char": -0.9376055399576823, "bits_per_byte": 1.352678862807939, "num_chars": 18}, {"sum_logits": -8.915468215942383, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.83393096923828, "logits_per_token": -2.971822738647461, "logits_per_char": -0.46923516926012543, "bits_per_byte": 0.6769632517027627, "num_chars": 19}, {"sum_logits": -18.212554931640625, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.533254623413086, "logits_per_token": -4.553138732910156, "logits_per_char": -0.7004828819861779, "bits_per_byte": 1.010583180069794, "num_chars": 26}, {"sum_logits": -12.95820426940918, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.13104248046875, "logits_per_token": -4.3194014231363935, "logits_per_char": -0.647910213470459, "bits_per_byte": 0.9347368519158124, "num_chars": 20}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 834, "native_id": "TIMSS_1995_8_Q15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.107034683227539, "logits_per_token_corr": -2.369011561075846, "logits_per_char_corr": -0.5076453345162528, "bits_per_byte_corr": 0.732377406637543}, "model_output": [{"sum_logits": -7.107034683227539, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -13.971698760986328, "logits_per_token": -2.369011561075846, "logits_per_char": -0.5076453345162528, "bits_per_byte": 0.732377406637543, "num_chars": 14}, {"sum_logits": -11.5503511428833, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -21.30071258544922, "logits_per_token": -3.850117047627767, "logits_per_char": -0.8884885494525616, "bits_per_byte": 1.2818180241827601, "num_chars": 13}, {"sum_logits": -8.550860404968262, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.548225402832031, "logits_per_token": -2.8502868016560874, "logits_per_char": -0.6577584926898663, "bits_per_byte": 0.9489449155069514, "num_chars": 13}, {"sum_logits": -9.84145450592041, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -17.163219451904297, "logits_per_token": -3.2804848353068032, "logits_per_char": -0.7570349619938777, "bits_per_byte": 1.0921705854489168, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 835, "native_id": "MCAS_1999_4_23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.4246883392334, "logits_per_token_corr": -1.7658807581121272, "logits_per_char_corr": -0.3808762419457529, "bits_per_byte_corr": 0.5494882654479575}, "model_output": [{"sum_logits": -19.4246883392334, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.440555572509766, "logits_per_token": -1.7658807581121272, "logits_per_char": -0.3808762419457529, "bits_per_byte": 0.5494882654479575, "num_chars": 51}, {"sum_logits": -37.46173095703125, "num_tokens": 11, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -43.58697509765625, "logits_per_token": -3.405611905184659, "logits_per_char": -0.749234619140625, "bits_per_byte": 1.0809170694972874, "num_chars": 50}, {"sum_logits": -26.750930786132812, "num_tokens": 10, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -39.997344970703125, "logits_per_token": -2.675093078613281, "logits_per_char": -0.5459373629823023, "bits_per_byte": 0.7876211262111313, "num_chars": 49}, {"sum_logits": -21.34662628173828, "num_tokens": 8, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -29.154129028320312, "logits_per_token": -2.668328285217285, "logits_per_char": -0.561753323203639, "bits_per_byte": 0.8104387335893674, "num_chars": 38}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 836, "native_id": "TIMSS_1995_8_J7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -43.117923736572266, "logits_per_token_corr": -2.536348455092486, "logits_per_char_corr": -0.4186206188016725, "bits_per_byte_corr": 0.603941890759476}, "model_output": [{"sum_logits": -18.229660034179688, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -36.1605224609375, "logits_per_token": -1.4022815410907452, "logits_per_char": -0.28935968308221727, "bits_per_byte": 0.41745777981621685, "num_chars": 63}, {"sum_logits": -24.280502319335938, "num_tokens": 12, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -38.24986267089844, "logits_per_token": -2.0233751932779946, "logits_per_char": -0.4414636785333807, "bits_per_byte": 0.6368974597531655, "num_chars": 55}, {"sum_logits": -20.149131774902344, "num_tokens": 12, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -35.55629348754883, "logits_per_token": -1.6790943145751953, "logits_per_char": -0.3303136356541368, "bits_per_byte": 0.4765418440965694, "num_chars": 61}, {"sum_logits": -43.117923736572266, "num_tokens": 17, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -55.809104919433594, "logits_per_token": -2.536348455092486, "logits_per_char": -0.4186206188016725, "bits_per_byte": 0.603941890759476, "num_chars": 103}, {"sum_logits": -17.805686950683594, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.51584243774414, "logits_per_token": -1.369668226975661, "logits_per_char": -0.32973494353117766, "bits_per_byte": 0.47570696784057415, "num_chars": 54}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 837, "native_id": "Mercury_SC_LBS10018", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.584099531173706, "logits_per_token_corr": -1.292049765586853, "logits_per_char_corr": -0.4306832551956177, "bits_per_byte_corr": 0.6213445964650799}, "model_output": [{"sum_logits": -2.584099531173706, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.23455238342285, "logits_per_token": -1.292049765586853, "logits_per_char": -0.4306832551956177, "bits_per_byte": 0.6213445964650799, "num_chars": 6}, {"sum_logits": -7.8427934646606445, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -14.624797821044922, "logits_per_token": -3.9213967323303223, "logits_per_char": -1.120399066380092, "bits_per_byte": 1.6163941768843446, "num_chars": 7}, {"sum_logits": -14.189847946166992, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.87893295288086, "logits_per_token": -3.547461986541748, "logits_per_char": -0.7468341024298417, "bits_per_byte": 1.0774538559430669, "num_chars": 19}, {"sum_logits": -7.864563465118408, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -25.317058563232422, "logits_per_token": -1.966140866279602, "logits_per_char": -0.4369201925065782, "bits_per_byte": 0.6303425949939445, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 838, "native_id": "Mercury_SC_406855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.141780853271484, "logits_per_token_corr": -2.921980077570135, "logits_per_char_corr": -0.8035445213317871, "bits_per_byte_corr": 1.159269696059698}, "model_output": [{"sum_logits": -41.90244674682617, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -49.38414764404297, "logits_per_token": -4.655827416314019, "logits_per_char": -1.1026959670217413, "bits_per_byte": 1.5908540032316691, "num_chars": 38}, {"sum_logits": -21.00060272216797, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -32.70966339111328, "logits_per_token": -2.625075340270996, "logits_per_char": -0.656268835067749, "bits_per_byte": 0.9467957938428988, "num_chars": 32}, {"sum_logits": -30.20538330078125, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -42.78130340576172, "logits_per_token": -3.7756729125976562, "logits_per_char": -0.7367166658727134, "bits_per_byte": 1.0628574803955788, "num_chars": 41}, {"sum_logits": -32.141780853271484, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -42.30180358886719, "logits_per_token": -2.921980077570135, "logits_per_char": -0.8035445213317871, "bits_per_byte": 1.159269696059698, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 839, "native_id": "Mercury_SC_415457", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.087772369384766, "logits_per_token_corr": -4.362590789794922, "logits_per_char_corr": -0.8725181579589844, "bits_per_byte_corr": 1.2587776195739042}, "model_output": [{"sum_logits": -5.115679740905762, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -17.915307998657227, "logits_per_token": -1.7052265803019206, "logits_per_char": -0.2842044300503201, "bits_per_byte": 0.41002032183256565, "num_chars": 18}, {"sum_logits": -4.7766242027282715, "num_tokens": 2, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -16.66507911682129, "logits_per_token": -2.3883121013641357, "logits_per_char": -0.3184416135152181, "bits_per_byte": 0.4594141366314151, "num_chars": 15}, {"sum_logits": -4.979552268981934, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -19.70878791809082, "logits_per_token": -1.6598507563273113, "logits_per_char": -0.4526865699074485, "bits_per_byte": 0.6530886694829802, "num_chars": 11}, {"sum_logits": -13.087772369384766, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -22.627634048461914, "logits_per_token": -4.362590789794922, "logits_per_char": -0.8725181579589844, "bits_per_byte": 1.2587776195739042, "num_chars": 15}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 840, "native_id": "NYSEDREGENTS_2015_4_25", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.77939510345459, "logits_per_token_corr": -2.11134215763637, "logits_per_char_corr": -0.4105387528737386, "bits_per_byte_corr": 0.5922822228641078}, "model_output": [{"sum_logits": -21.633268356323242, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.228546142578125, "logits_per_token": -3.0904669080461775, "logits_per_char": -0.600924121008979, "bits_per_byte": 0.8669502493308362, "num_chars": 36}, {"sum_logits": -20.121923446655273, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -40.90901184082031, "logits_per_token": -2.8745604923793246, "logits_per_char": -0.5589423179626465, "bits_per_byte": 0.8063833102682716, "num_chars": 36}, {"sum_logits": -14.77939510345459, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -39.137840270996094, "logits_per_token": -2.11134215763637, "logits_per_char": -0.4105387528737386, "bits_per_byte": 0.5922822228641078, "num_chars": 36}, {"sum_logits": -22.252246856689453, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -41.33618927001953, "logits_per_token": -3.1788924080984935, "logits_per_char": -0.6181179682413737, "bits_per_byte": 0.8917557274668322, "num_chars": 36}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 841, "native_id": "Mercury_7058135", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.737330913543701, "logits_per_token_corr": -1.4343327283859253, "logits_per_char_corr": -0.3585831820964813, "bits_per_byte_corr": 0.5173261785571494}, "model_output": [{"sum_logits": -7.116842746734619, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -21.934005737304688, "logits_per_token": -1.7792106866836548, "logits_per_char": -0.3953801525963677, "bits_per_byte": 0.5704129854171112, "num_chars": 18}, {"sum_logits": -5.737330913543701, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.358619689941406, "logits_per_token": -1.4343327283859253, "logits_per_char": -0.3585831820964813, "bits_per_byte": 0.5173261785571494, "num_chars": 16}, {"sum_logits": -12.149581909179688, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -24.541467666625977, "logits_per_token": -3.037395477294922, "logits_per_char": -0.578551519484747, "bits_per_byte": 0.8346734080600187, "num_chars": 21}, {"sum_logits": -11.510374069213867, "num_tokens": 5, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -27.49575424194336, "logits_per_token": -2.3020748138427733, "logits_per_char": -0.5755187034606933, "bits_per_byte": 0.8302979794221848, "num_chars": 20}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 842, "native_id": "MDSA_2008_4_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.4065492153167725, "logits_per_token_corr": -1.7032746076583862, "logits_per_char_corr": -0.34065492153167726, "bits_per_byte_corr": 0.49146116594852285}, "model_output": [{"sum_logits": -7.3413310050964355, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.443147659301758, "logits_per_token": -3.6706655025482178, "logits_per_char": -0.9176663756370544, "bits_per_byte": 1.3239127293230784, "num_chars": 8}, {"sum_logits": -3.4065492153167725, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": true, "sum_logits_uncond": -19.23801612854004, "logits_per_token": -1.7032746076583862, "logits_per_char": -0.34065492153167726, "bits_per_byte": 0.49146116594852285, "num_chars": 10}, {"sum_logits": -4.183259010314941, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.170225143432617, "logits_per_token": -2.0916295051574707, "logits_per_char": -0.2788839340209961, "bits_per_byte": 0.402344468595985, "num_chars": 15}, {"sum_logits": -6.862635135650635, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -18.75257110595703, "logits_per_token": -2.2875450452168784, "logits_per_char": -0.45750900904337566, "bits_per_byte": 0.6600459785093762, "num_chars": 15}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 843, "native_id": "AKDE&ED_2008_8_45", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.515371322631836, "logits_per_token_corr": -2.612819035847982, "logits_per_char_corr": -0.47030742645263673, "bits_per_byte_corr": 0.6785101918369574}, "model_output": [{"sum_logits": -27.328536987304688, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -41.81022644042969, "logits_per_token": -2.7328536987304686, "logits_per_char": -0.5693445205688477, "bits_per_byte": 0.8213905163825711, "num_chars": 48}, {"sum_logits": -22.567453384399414, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -43.4467887878418, "logits_per_token": -2.507494820488824, "logits_per_char": -0.4701552788416545, "bits_per_byte": 0.67829068923311, "num_chars": 48}, {"sum_logits": -26.956989288330078, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -44.311344146728516, "logits_per_token": -2.9952210320366754, "logits_per_char": -0.5501426385373486, "bits_per_byte": 0.7936880563999725, "num_chars": 49}, {"sum_logits": -23.515371322631836, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -40.56605529785156, "logits_per_token": -2.612819035847982, "logits_per_char": -0.47030742645263673, "bits_per_byte": 0.6785101918369574, "num_chars": 50}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 844, "native_id": "Mercury_7131758", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.205780982971191, "logits_per_token_corr": -2.6028904914855957, "logits_per_char_corr": -0.32536131143569946, "bits_per_byte_corr": 0.4693971505057504}, "model_output": [{"sum_logits": -5.205780982971191, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -18.009998321533203, "logits_per_token": -2.6028904914855957, "logits_per_char": -0.32536131143569946, "bits_per_byte": 0.4693971505057504, "num_chars": 16}, {"sum_logits": -5.173707485198975, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -15.7699556350708, "logits_per_token": -2.5868537425994873, "logits_per_char": -0.3695505346570696, "bits_per_byte": 0.5331487237080024, "num_chars": 14}, {"sum_logits": -5.030914783477783, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -17.915393829345703, "logits_per_token": -2.5154573917388916, "logits_per_char": -0.29593616373398723, "bits_per_byte": 0.42694563583903444, "num_chars": 17}, {"sum_logits": -9.331536293029785, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -18.426000595092773, "logits_per_token": -4.665768146514893, "logits_per_char": -0.46657681465148926, "bits_per_byte": 0.6731280566919562, "num_chars": 20}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 845, "native_id": "NYSEDREGENTS_2013_8_10", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.993919372558594, "logits_per_token_corr": -1.997973124186198, "logits_per_char_corr": -0.5993919372558594, "bits_per_byte_corr": 0.8647397754284782}, "model_output": [{"sum_logits": -12.048089027404785, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -15.223732948303223, "logits_per_token": -12.048089027404785, "logits_per_char": -0.9267760790311373, "bits_per_byte": 1.3370552532337006, "num_chars": 13}, {"sum_logits": -5.993919372558594, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -17.389202117919922, "logits_per_token": -1.997973124186198, "logits_per_char": -0.5993919372558594, "bits_per_byte": 0.8647397754284782, "num_chars": 10}, {"sum_logits": -7.19312047958374, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -14.774696350097656, "logits_per_token": -7.19312047958374, "logits_per_char": -0.719312047958374, "bits_per_byte": 1.0377479244419758, "num_chars": 10}, {"sum_logits": -7.474987506866455, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -16.44088363647461, "logits_per_token": -7.474987506866455, "logits_per_char": -0.7474987506866455, "bits_per_byte": 1.0784127406870938, "num_chars": 10}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 846, "native_id": "Mercury_SC_401783", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.863093376159668, "logits_per_token_corr": -2.215773344039917, "logits_per_char_corr": -0.35452373504638673, "bits_per_byte_corr": 0.5114696344292224}, "model_output": [{"sum_logits": -12.50825309753418, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -27.637332916259766, "logits_per_token": -3.127063274383545, "logits_per_char": -0.6254126548767089, "bits_per_byte": 0.902279735700477, "num_chars": 20}, {"sum_logits": -21.374664306640625, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.38372039794922, "logits_per_token": -4.274932861328125, "logits_per_char": -0.8221024733323318, "bits_per_byte": 1.1860431613799585, "num_chars": 26}, {"sum_logits": -13.567773818969727, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.732654571533203, "logits_per_token": -2.7135547637939452, "logits_per_char": -0.5218374545757587, "bits_per_byte": 0.7528523078671077, "num_chars": 26}, {"sum_logits": -8.863093376159668, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -23.448795318603516, "logits_per_token": -2.215773344039917, "logits_per_char": -0.35452373504638673, "bits_per_byte": 0.5114696344292224, "num_chars": 25}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 847, "native_id": "Mercury_7190120", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.862943649291992, "logits_per_token_corr": -2.651438183254666, "logits_per_char_corr": -0.5549521778905114, "bits_per_byte_corr": 0.8006267549737459}, "model_output": [{"sum_logits": -22.506805419921875, "num_tokens": 6, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -27.638389587402344, "logits_per_token": -3.751134236653646, "logits_per_char": -0.6820244066642992, "bits_per_byte": 0.9839532292605291, "num_chars": 33}, {"sum_logits": -23.862943649291992, "num_tokens": 9, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -33.527488708496094, "logits_per_token": -2.651438183254666, "logits_per_char": -0.5549521778905114, "bits_per_byte": 0.8006267549737459, "num_chars": 43}, {"sum_logits": -26.151325225830078, "num_tokens": 9, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -31.160877227783203, "logits_per_token": -2.9057028028700085, "logits_per_char": -0.5943483005870472, "bits_per_byte": 0.8574633458183321, "num_chars": 44}, {"sum_logits": -25.88477325439453, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -32.792938232421875, "logits_per_token": -2.5884773254394533, "logits_per_char": -0.5282606786611129, "bits_per_byte": 0.7621190614015734, "num_chars": 49}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 848, "native_id": "Mercury_409317", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.149131774902344, "logits_per_token_corr": -2.024855295817057, "logits_per_char_corr": -0.934548598069411, "bits_per_byte_corr": 1.348268627905441}, "model_output": [{"sum_logits": -7.6765947341918945, "num_tokens": 6, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -26.60780906677246, "logits_per_token": -1.2794324556986492, "logits_per_char": -0.6978722485628995, "bits_per_byte": 1.0068168321764486, "num_chars": 11}, {"sum_logits": -12.149131774902344, "num_tokens": 6, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -33.302825927734375, "logits_per_token": -2.024855295817057, "logits_per_char": -0.934548598069411, "bits_per_byte": 1.348268627905441, "num_chars": 13}, {"sum_logits": -9.306018829345703, "num_tokens": 6, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -31.779598236083984, "logits_per_token": -1.551003138224284, "logits_per_char": -0.7158476022573618, "bits_per_byte": 1.032749785809693, "num_chars": 13}, {"sum_logits": -11.191679954528809, "num_tokens": 5, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -30.691560745239258, "logits_per_token": -2.2383359909057616, "logits_per_char": -0.932639996210734, "bits_per_byte": 1.3455150974688943, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 849, "native_id": "Mercury_7268240", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.854702949523926, "logits_per_token_corr": -1.3854702949523925, "logits_per_char_corr": -0.21647973358631134, "bits_per_byte_corr": 0.3123142380981597}, "model_output": [{"sum_logits": -17.742534637451172, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -28.54241180419922, "logits_per_token": -2.957089106241862, "logits_per_char": -0.43274474725490664, "bits_per_byte": 0.6243187008358502, "num_chars": 41}, {"sum_logits": -16.155467987060547, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -39.094993591308594, "logits_per_token": -2.0194334983825684, "logits_per_char": -0.3671697269786488, "bits_per_byte": 0.5297139442770318, "num_chars": 44}, {"sum_logits": -33.54835510253906, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -45.927616119384766, "logits_per_token": -3.727595011393229, "logits_per_char": -0.657810884363511, "bits_per_byte": 0.9490205007147025, "num_chars": 51}, {"sum_logits": -13.854702949523926, "num_tokens": 10, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -30.313922882080078, "logits_per_token": -1.3854702949523925, "logits_per_char": -0.21647973358631134, "bits_per_byte": 0.3123142380981597, "num_chars": 64}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 850, "native_id": "Mercury_7228358", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.346138000488281, "logits_per_token_corr": -2.3910230000813804, "logits_per_char_corr": -0.40988965715680803, "bits_per_byte_corr": 0.5913457756922292}, "model_output": [{"sum_logits": -30.1370849609375, "num_tokens": 9, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -56.21495819091797, "logits_per_token": -3.3485649956597223, "logits_per_char": -0.5580941659432871, "bits_per_byte": 0.8051596855560209, "num_chars": 54}, {"sum_logits": -20.704402923583984, "num_tokens": 8, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -39.365196228027344, "logits_per_token": -2.588050365447998, "logits_per_char": -0.5308821262457432, "bits_per_byte": 0.7659010108318726, "num_chars": 39}, {"sum_logits": -31.855138778686523, "num_tokens": 12, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -39.761444091796875, "logits_per_token": -2.654594898223877, "logits_per_char": -0.6777689101848197, "bits_per_byte": 0.9778138455930591, "num_chars": 47}, {"sum_logits": -14.346138000488281, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -30.601293563842773, "logits_per_token": -2.3910230000813804, "logits_per_char": -0.40988965715680803, "bits_per_byte": 0.5913457756922292, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 851, "native_id": "MCAS_2004_5_33", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.054487228393555, "logits_per_token_corr": -3.0060541364881725, "logits_per_char_corr": -0.5636351505915324, "bits_per_byte_corr": 0.813153636629692}, "model_output": [{"sum_logits": -16.02992057800293, "num_tokens": 6, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -27.16826629638672, "logits_per_token": -2.671653429667155, "logits_per_char": -0.5009350180625916, "bits_per_byte": 0.7226964663670433, "num_chars": 32}, {"sum_logits": -26.256038665771484, "num_tokens": 9, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -34.587074279785156, "logits_per_token": -2.9173376295301647, "logits_per_char": -0.5470008055369059, "bits_per_byte": 0.7891553495109294, "num_chars": 48}, {"sum_logits": -27.054487228393555, "num_tokens": 9, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -38.506690979003906, "logits_per_token": -3.0060541364881725, "logits_per_char": -0.5636351505915324, "bits_per_byte": 0.813153636629692, "num_chars": 48}, {"sum_logits": -35.55331039428711, "num_tokens": 14, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -44.936912536621094, "logits_per_token": -2.539522171020508, "logits_per_char": -0.5828411540047067, "bits_per_byte": 0.840862042509195, "num_chars": 61}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 852, "native_id": "Mercury_7008855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.484821319580078, "logits_per_token_corr": -1.4969642639160157, "logits_per_char_corr": -0.2673150471278599, "bits_per_byte_corr": 0.3856540928466401}, "model_output": [{"sum_logits": -12.226089477539062, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -25.907711029052734, "logits_per_token": -2.4452178955078123, "logits_per_char": -0.6113044738769531, "bits_per_byte": 0.8819259329361507, "num_chars": 20}, {"sum_logits": -7.484821319580078, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -23.221195220947266, "logits_per_token": -1.4969642639160157, "logits_per_char": -0.2673150471278599, "bits_per_byte": 0.3856540928466401, "num_chars": 28}, {"sum_logits": -18.22765350341797, "num_tokens": 7, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -33.33962631225586, "logits_per_token": -2.6039505004882812, "logits_per_char": -0.5879888226909022, "bits_per_byte": 0.8482885585949141, "num_chars": 31}, {"sum_logits": -8.911161422729492, "num_tokens": 8, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -24.5267276763916, "logits_per_token": -1.1138951778411865, "logits_per_char": -0.27003519462816644, "bits_per_byte": 0.38957843615582166, "num_chars": 33}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 853, "native_id": "Mercury_7057085", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.297597885131836, "logits_per_token_corr": -2.882932980855306, "logits_per_char_corr": -0.6652922263512244, "bits_per_byte_corr": 0.9598137956995788}, "model_output": [{"sum_logits": -10.660591125488281, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -26.673866271972656, "logits_per_token": -3.5535303751627603, "logits_per_char": -0.44419129689534503, "bits_per_byte": 0.6408325812374119, "num_chars": 24}, {"sum_logits": -17.297597885131836, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -28.511459350585938, "logits_per_token": -2.882932980855306, "logits_per_char": -0.6652922263512244, "bits_per_byte": 0.9598137956995788, "num_chars": 26}, {"sum_logits": -16.831632614135742, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.309389114379883, "logits_per_token": -3.3663265228271486, "logits_per_char": -0.6011297362191337, "bits_per_byte": 0.8672468893748579, "num_chars": 28}, {"sum_logits": -10.313752174377441, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -21.273500442504883, "logits_per_token": -3.437917391459147, "logits_per_char": -0.542829061809339, "bits_per_byte": 0.7831367955233046, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 854, "native_id": "Mercury_7171728", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.557292938232422, "logits_per_token_corr": -8.778646469116211, "logits_per_char_corr": -3.5114585876464846, "bits_per_byte_corr": 5.065963890688186}, "model_output": [{"sum_logits": -18.793292999267578, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -16.297119140625, "logits_per_token": -9.396646499633789, "logits_per_char": -3.1322154998779297, "bits_per_byte": 4.5188317686726815, "num_chars": 6}, {"sum_logits": -17.557292938232422, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -16.871902465820312, "logits_per_token": -8.778646469116211, "logits_per_char": -3.5114585876464846, "bits_per_byte": 5.065963890688186, "num_chars": 5}, {"sum_logits": -19.645282745361328, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -16.931514739990234, "logits_per_token": -9.822641372680664, "logits_per_char": -3.9290565490722655, "bits_per_byte": 5.668430398722934, "num_chars": 5}, {"sum_logits": -20.941242218017578, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.39594268798828, "logits_per_token": -6.980414072672526, "logits_per_char": -4.1882484436035154, "bits_per_byte": 5.03530438300171, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 855, "native_id": "NAEP_2005_4_S14+3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -74.98289489746094, "logits_per_token_corr": -1.6662865532769098, "logits_per_char_corr": -0.3885124087951344, "bits_per_byte_corr": 0.5605049254929687}, "model_output": [{"sum_logits": -49.08698272705078, "num_tokens": 35, "num_tokens_all": 244, "is_greedy": false, "sum_logits_uncond": -107.95313262939453, "logits_per_token": -1.4024852207728795, "logits_per_char": -0.3339250525649713, "bits_per_byte": 0.48175201736441664, "num_chars": 147}, {"sum_logits": -45.66148376464844, "num_tokens": 23, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -68.66928100585938, "logits_per_token": -1.9852819028108015, "logits_per_char": -0.38051236470540367, "bits_per_byte": 0.5489633015578129, "num_chars": 120}, {"sum_logits": -74.98289489746094, "num_tokens": 45, "num_tokens_all": 254, "is_greedy": false, "sum_logits_uncond": -125.09806823730469, "logits_per_token": -1.6662865532769098, "logits_per_char": -0.3885124087951344, "bits_per_byte": 0.5605049254929687, "num_chars": 193}, {"sum_logits": -48.95452880859375, "num_tokens": 28, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -94.60277557373047, "logits_per_token": -1.7483760288783483, "logits_per_char": -0.35474296238111414, "bits_per_byte": 0.5117859126178611, "num_chars": 138}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 856, "native_id": "Mercury_7024395", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.14175033569336, "logits_per_token_corr": -1.8283500671386719, "logits_per_char_corr": -0.2948951721191406, "bits_per_byte_corr": 0.4254438023986871}, "model_output": [{"sum_logits": -9.14175033569336, "num_tokens": 5, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -26.278528213500977, "logits_per_token": -1.8283500671386719, "logits_per_char": -0.2948951721191406, "bits_per_byte": 0.4254438023986871, "num_chars": 31}, {"sum_logits": -12.36914348602295, "num_tokens": 6, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -41.488494873046875, "logits_per_token": -2.061523914337158, "logits_per_char": -0.34358731905619305, "bits_per_byte": 0.4956917213150599, "num_chars": 36}, {"sum_logits": -10.668843269348145, "num_tokens": 8, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -38.54372024536133, "logits_per_token": -1.333605408668518, "logits_per_char": -0.2735600838294396, "bits_per_byte": 0.39466377632618516, "num_chars": 39}, {"sum_logits": -11.248056411743164, "num_tokens": 10, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -38.190185546875, "logits_per_token": -1.1248056411743164, "logits_per_char": -0.23932034918602477, "bits_per_byte": 0.34526628095474104, "num_chars": 47}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 857, "native_id": "NYSEDREGENTS_2012_8_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.915283203125, "logits_per_token_corr": -2.8262736002604165, "logits_per_char_corr": -0.6056300571986607, "bits_per_byte_corr": 0.8737394801344348}, "model_output": [{"sum_logits": -33.33462142944336, "num_tokens": 12, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -49.62568664550781, "logits_per_token": -2.77788511912028, "logits_per_char": -0.6173078042489512, "bits_per_byte": 0.8905869078926566, "num_chars": 54}, {"sum_logits": -36.591331481933594, "num_tokens": 13, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -54.44721603393555, "logits_per_token": -2.814717806302584, "logits_per_char": -0.6308850255505792, "bits_per_byte": 0.9101746977335815, "num_chars": 58}, {"sum_logits": -33.915283203125, "num_tokens": 12, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -46.809364318847656, "logits_per_token": -2.8262736002604165, "logits_per_char": -0.6056300571986607, "bits_per_byte": 0.8737394801344348, "num_chars": 56}, {"sum_logits": -37.73455810546875, "num_tokens": 13, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -52.300567626953125, "logits_per_token": -2.9026583158052883, "logits_per_char": -0.6289093017578125, "bits_per_byte": 0.9073243308155885, "num_chars": 60}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 858, "native_id": "Mercury_7090790", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.822919845581055, "logits_per_token_corr": -2.4114599227905273, "logits_per_char_corr": -0.2679399914211697, "bits_per_byte_corr": 0.38655569687943064}, "model_output": [{"sum_logits": -4.822919845581055, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -17.517112731933594, "logits_per_token": -2.4114599227905273, "logits_per_char": -0.2679399914211697, "bits_per_byte": 0.38655569687943064, "num_chars": 18}, {"sum_logits": -4.619132995605469, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -16.814849853515625, "logits_per_token": -2.3095664978027344, "logits_per_char": -0.3299380711146763, "bits_per_byte": 0.47600001899795563, "num_chars": 14}, {"sum_logits": -12.222101211547852, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -23.340229034423828, "logits_per_token": -6.111050605773926, "logits_per_char": -1.1111001101407139, "bits_per_byte": 1.6029786188323405, "num_chars": 11}, {"sum_logits": -16.07185173034668, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.168292999267578, "logits_per_token": -5.35728391011556, "logits_per_char": -1.33932097752889, "bits_per_byte": 1.9322317324408764, "num_chars": 12}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 859, "native_id": "TIMSS_2003_8_pg87", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.113245487213135, "logits_per_token_corr": -1.2783113718032837, "logits_per_char_corr": -0.3195778429508209, "bits_per_byte_corr": 0.46105336920347256}, "model_output": [{"sum_logits": -5.65721321105957, "num_tokens": 4, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -19.512619018554688, "logits_per_token": -1.4143033027648926, "logits_per_char": -0.37714754740397133, "bits_per_byte": 0.5441088963235355, "num_chars": 15}, {"sum_logits": -5.113245487213135, "num_tokens": 4, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -19.471220016479492, "logits_per_token": -1.2783113718032837, "logits_per_char": -0.3195778429508209, "bits_per_byte": 0.46105336920347256, "num_chars": 16}, {"sum_logits": -5.252227306365967, "num_tokens": 4, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -18.973857879638672, "logits_per_token": -1.3130568265914917, "logits_per_char": -0.3282642066478729, "bits_per_byte": 0.4735851430325764, "num_chars": 16}, {"sum_logits": -4.076274871826172, "num_tokens": 4, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -19.249977111816406, "logits_per_token": -1.019068717956543, "logits_per_char": -0.23978087481330423, "bits_per_byte": 0.3459306789934199, "num_chars": 17}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 860, "native_id": "Mercury_SC_407382", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.259695053100586, "logits_per_token_corr": -1.8942421504429408, "logits_per_char_corr": -0.3683248625861274, "bits_per_byte_corr": 0.5313804526894966}, "model_output": [{"sum_logits": -12.562691688537598, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.08058738708496, "logits_per_token": -2.5125383377075194, "logits_per_char": -0.5982234137398856, "bits_per_byte": 0.8630539523468196, "num_chars": 21}, {"sum_logits": -7.901444911956787, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -24.86800765991211, "logits_per_token": -1.5802889823913575, "logits_per_char": -0.3160577964782715, "bits_per_byte": 0.45597501561382314, "num_chars": 25}, {"sum_logits": -13.259695053100586, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -26.096145629882812, "logits_per_token": -1.8942421504429408, "logits_per_char": -0.3683248625861274, "bits_per_byte": 0.5313804526894966, "num_chars": 36}, {"sum_logits": -18.916366577148438, "num_tokens": 8, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -33.21151351928711, "logits_per_token": -2.3645458221435547, "logits_per_char": -0.47290916442871095, "bits_per_byte": 0.6822637063127348, "num_chars": 40}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 861, "native_id": "MDSA_2010_4_20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.629526615142822, "logits_per_token_corr": -2.2098422050476074, "logits_per_char_corr": -0.3489224534285696, "bits_per_byte_corr": 0.5033886932165693}, "model_output": [{"sum_logits": -9.631192207336426, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -22.343576431274414, "logits_per_token": -3.210397402445475, "logits_per_char": -0.6879423005240304, "bits_per_byte": 0.9924909453844766, "num_chars": 14}, {"sum_logits": -6.629526615142822, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -21.273500442504883, "logits_per_token": -2.2098422050476074, "logits_per_char": -0.3489224534285696, "bits_per_byte": 0.5033886932165693, "num_chars": 19}, {"sum_logits": -8.3023681640625, "num_tokens": 5, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -22.108705520629883, "logits_per_token": -1.6604736328125, "logits_per_char": -0.2862885573814655, "bits_per_byte": 0.4130270819977925, "num_chars": 29}, {"sum_logits": -14.478326797485352, "num_tokens": 5, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -24.53333282470703, "logits_per_token": -2.8956653594970705, "logits_per_char": -0.48261089324951173, "bits_per_byte": 0.6962603423705637, "num_chars": 30}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 862, "native_id": "Mercury_SC_405019", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.945863723754883, "logits_per_token_corr": -1.3945863723754883, "logits_per_char_corr": -0.25356115861372513, "bits_per_byte_corr": 0.3658114260943439}, "model_output": [{"sum_logits": -19.40924835205078, "num_tokens": 9, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -35.901729583740234, "logits_per_token": -2.1565831502278647, "logits_per_char": -0.4313166300455729, "bits_per_byte": 0.6222583632201347, "num_chars": 45}, {"sum_logits": -12.199450492858887, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.14786148071289, "logits_per_token": -1.7427786418369837, "logits_per_char": -0.2772602384740656, "bits_per_byte": 0.40000197108251323, "num_chars": 44}, {"sum_logits": -20.340328216552734, "num_tokens": 11, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -46.460357666015625, "logits_per_token": -1.8491207469593396, "logits_per_char": -0.363220146724156, "bits_per_byte": 0.524015904430278, "num_chars": 56}, {"sum_logits": -13.945863723754883, "num_tokens": 10, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -32.81071090698242, "logits_per_token": -1.3945863723754883, "logits_per_char": -0.25356115861372513, "bits_per_byte": 0.3658114260943439, "num_chars": 55}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 863, "native_id": "Mercury_7123078", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.581941604614258, "logits_per_token_corr": -1.3581941604614258, "logits_per_char_corr": -0.25626304914366527, "bits_per_byte_corr": 0.3697094301629162}, "model_output": [{"sum_logits": -10.881895065307617, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -30.373214721679688, "logits_per_token": -1.8136491775512695, "logits_per_char": -0.37523776087267646, "bits_per_byte": 0.5413536567656779, "num_chars": 29}, {"sum_logits": -17.492141723632812, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.019386291503906, "logits_per_token": -1.943571302625868, "logits_per_char": -0.3802639505137568, "bits_per_byte": 0.5486049156354372, "num_chars": 46}, {"sum_logits": -20.740951538085938, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -36.12928009033203, "logits_per_token": -2.3045501708984375, "logits_per_char": -0.4938321794782366, "bits_per_byte": 0.7124492363651523, "num_chars": 42}, {"sum_logits": -13.581941604614258, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -36.651878356933594, "logits_per_token": -1.3581941604614258, "logits_per_char": -0.25626304914366527, "bits_per_byte": 0.3697094301629162, "num_chars": 53}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 864, "native_id": "Mercury_400084", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.677121877670288, "logits_per_token_corr": -2.677121877670288, "logits_per_char_corr": -1.338560938835144, "bits_per_byte_corr": 1.9311352283865248}, "model_output": [{"sum_logits": -2.635305166244507, "num_tokens": 1, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -5.19705867767334, "logits_per_token": -2.635305166244507, "logits_per_char": -1.3176525831222534, "bits_per_byte": 1.9009708472863733, "num_chars": 2}, {"sum_logits": -2.2030866146087646, "num_tokens": 1, "num_tokens_all": 232, "is_greedy": true, "sum_logits_uncond": -6.3205461502075195, "logits_per_token": -2.2030866146087646, "logits_per_char": -1.1015433073043823, "bits_per_byte": 1.5891910667736016, "num_chars": 2}, {"sum_logits": -2.8789288997650146, "num_tokens": 1, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -6.575865745544434, "logits_per_token": -2.8789288997650146, "logits_per_char": -1.4394644498825073, "bits_per_byte": 2.076708223382945, "num_chars": 2}, {"sum_logits": -2.677121877670288, "num_tokens": 1, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -2.677121877670288, "logits_per_char": -1.338560938835144, "bits_per_byte": 1.9311352283865248, "num_chars": 2}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 865, "native_id": "Mercury_7139650", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.467795372009277, "logits_per_token_corr": -6.467795372009277, "logits_per_char_corr": -0.6467795372009277, "bits_per_byte_corr": 0.9331056308689076}, "model_output": [{"sum_logits": -5.607301712036133, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -14.944598197937012, "logits_per_token": -5.607301712036133, "logits_per_char": -0.4005215508597238, "bits_per_byte": 0.5778304551948954, "num_chars": 14}, {"sum_logits": -0.81568443775177, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": true, "sum_logits_uncond": -15.166061401367188, "logits_per_token": -0.81568443775177, "logits_per_char": -0.10196055471897125, "bits_per_byte": 0.1470979866594533, "num_chars": 8}, {"sum_logits": -6.467795372009277, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -13.019214630126953, "logits_per_token": -6.467795372009277, "logits_per_char": -0.6467795372009277, "bits_per_byte": 0.9331056308689076, "num_chars": 10}, {"sum_logits": -3.764235496520996, "num_tokens": 1, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -13.47834587097168, "logits_per_token": -3.764235496520996, "logits_per_char": -0.4705294370651245, "bits_per_byte": 0.6788304854466184, "num_chars": 8}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 866, "native_id": "Mercury_417150", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.860321044921875, "logits_per_token_corr": -2.286773681640625, "logits_per_char_corr": -0.2286773681640625, "bits_per_byte_corr": 0.3299117050140697}, "model_output": [{"sum_logits": -10.994302749633789, "num_tokens": 4, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -27.44670867919922, "logits_per_token": -2.7485756874084473, "logits_per_char": -0.3435719609260559, "bits_per_byte": 0.4956695642168737, "num_chars": 32}, {"sum_logits": -6.860321044921875, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -20.762020111083984, "logits_per_token": -2.286773681640625, "logits_per_char": -0.2286773681640625, "bits_per_byte": 0.3299117050140697, "num_chars": 30}, {"sum_logits": -4.651064872741699, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -20.957340240478516, "logits_per_token": -1.5503549575805664, "logits_per_char": -0.16038154733592067, "bits_per_byte": 0.23138166299179752, "num_chars": 29}, {"sum_logits": -15.010202407836914, "num_tokens": 4, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -27.585128784179688, "logits_per_token": -3.7525506019592285, "logits_per_char": -0.4842000776721585, "bits_per_byte": 0.6985530508561758, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 867, "native_id": "Mercury_SC_402256", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.8697774410247803, "logits_per_token_corr": -3.8697774410247803, "logits_per_char_corr": -0.5528253487178257, "bits_per_byte_corr": 0.7975583890734921}, "model_output": [{"sum_logits": -8.432531356811523, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -18.238903045654297, "logits_per_token": -4.216265678405762, "logits_per_char": -1.6865062713623047, "bits_per_byte": 2.4331142341242815, "num_chars": 5}, {"sum_logits": -7.057778358459473, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -16.03864288330078, "logits_per_token": -7.057778358459473, "logits_per_char": -1.1762963930765789, "bits_per_byte": 1.6970369729083743, "num_chars": 6}, {"sum_logits": -3.8697774410247803, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -15.669745445251465, "logits_per_token": -3.8697774410247803, "logits_per_char": -0.5528253487178257, "bits_per_byte": 0.7975583890734921, "num_chars": 7}, {"sum_logits": -5.331167221069336, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -12.967279434204102, "logits_per_token": -5.331167221069336, "logits_per_char": -0.8885278701782227, "bits_per_byte": 1.2818747519986755, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 868, "native_id": "TIMSS_2007_8_pg53", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.525983810424805, "logits_per_token_corr": -4.262991905212402, "logits_per_char_corr": -0.568398920694987, "bits_per_byte_corr": 0.8200263041338861}, "model_output": [{"sum_logits": -8.525983810424805, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -16.28531265258789, "logits_per_token": -4.262991905212402, "logits_per_char": -0.568398920694987, "bits_per_byte": 0.8200263041338861, "num_chars": 15}, {"sum_logits": -10.213411331176758, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -16.067567825317383, "logits_per_token": -5.106705665588379, "logits_per_char": -0.7856470254751352, "bits_per_byte": 1.133449067542957, "num_chars": 13}, {"sum_logits": -8.317376136779785, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -15.89142894744873, "logits_per_token": -4.158688068389893, "logits_per_char": -0.6397981643676758, "bits_per_byte": 0.9230336389037709, "num_chars": 13}, {"sum_logits": -10.410280227661133, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -14.834554672241211, "logits_per_token": -5.205140113830566, "logits_per_char": -1.1566978030734592, "bits_per_byte": 1.6687621843024372, "num_chars": 9}, {"sum_logits": -8.73142147064209, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -15.8533353805542, "logits_per_token": -4.365710735321045, "logits_per_char": -0.6716478054340069, "bits_per_byte": 0.9689829581242934, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 869, "native_id": "MCAS_2006_9_17-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.976057052612305, "logits_per_token_corr": -1.9976057052612304, "logits_per_char_corr": -0.43426210983939795, "bits_per_byte_corr": 0.6265077923117278}, "model_output": [{"sum_logits": -18.574913024902344, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -48.254512786865234, "logits_per_token": -2.063879224989149, "logits_per_char": -0.4643728256225586, "bits_per_byte": 0.6699483726497419, "num_chars": 40}, {"sum_logits": -18.5100040435791, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -46.27054214477539, "logits_per_token": -2.0566671159532337, "logits_per_char": -0.46275010108947756, "bits_per_byte": 0.6676072760131354, "num_chars": 40}, {"sum_logits": -20.321422576904297, "num_tokens": 10, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -47.539459228515625, "logits_per_token": -2.0321422576904298, "logits_per_char": -0.44177005601965863, "bits_per_byte": 0.637339469033259, "num_chars": 46}, {"sum_logits": -19.976057052612305, "num_tokens": 10, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -51.253990173339844, "logits_per_token": -1.9976057052612304, "logits_per_char": -0.43426210983939795, "bits_per_byte": 0.6265077923117278, "num_chars": 46}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 870, "native_id": "Mercury_401728", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.318506240844727, "logits_per_token_corr": -2.9026437486921037, "logits_per_char_corr": -0.6554356851885396, "bits_per_byte_corr": 0.9455938126438452}, "model_output": [{"sum_logits": -26.28664207458496, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.291297912597656, "logits_per_token": -4.381107012430827, "logits_per_char": -1.0110246951763446, "bits_per_byte": 1.4586003139482362, "num_chars": 26}, {"sum_logits": -22.062246322631836, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -33.098018646240234, "logits_per_token": -5.515561580657959, "logits_per_char": -1.0505831582205636, "bits_per_byte": 1.5156711124073612, "num_chars": 21}, {"sum_logits": -19.707260131835938, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -29.415019989013672, "logits_per_token": -3.2845433553059897, "logits_per_char": -0.7038307189941406, "bits_per_byte": 1.0154130879188896, "num_chars": 28}, {"sum_logits": -20.318506240844727, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.78326416015625, "logits_per_token": -2.9026437486921037, "logits_per_char": -0.6554356851885396, "bits_per_byte": 0.9455938126438452, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 871, "native_id": "Mercury_7192798", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.142322540283203, "logits_per_token_corr": -4.571161270141602, "logits_per_char_corr": -0.5713951587677002, "bits_per_byte_corr": 0.8243489619427152}, "model_output": [{"sum_logits": -6.082033157348633, "num_tokens": 2, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -14.758249282836914, "logits_per_token": -3.0410165786743164, "logits_per_char": -0.5068360964457194, "bits_per_byte": 0.7312099228862851, "num_chars": 12}, {"sum_logits": -7.0730390548706055, "num_tokens": 2, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -19.092941284179688, "logits_per_token": -3.5365195274353027, "logits_per_char": -0.44206494092941284, "bits_per_byte": 0.6377648980301946, "num_chars": 16}, {"sum_logits": -9.142322540283203, "num_tokens": 2, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -18.12837028503418, "logits_per_token": -4.571161270141602, "logits_per_char": -0.5713951587677002, "bits_per_byte": 0.8243489619427152, "num_chars": 16}, {"sum_logits": -12.376423835754395, "num_tokens": 2, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -19.550621032714844, "logits_per_token": -6.188211917877197, "logits_per_char": -0.6875791019863553, "bits_per_byte": 0.9919669606553143, "num_chars": 18}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 872, "native_id": "Mercury_7221078", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.600482940673828, "logits_per_token_corr": -2.9429261343819753, "logits_per_char_corr": -0.5150120735168457, "bits_per_byte_corr": 0.7430053644612294}, "model_output": [{"sum_logits": -29.218536376953125, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -45.504371643066406, "logits_per_token": -3.6523170471191406, "logits_per_char": -0.7689088520250822, "bits_per_byte": 1.109300987713009, "num_chars": 38}, {"sum_logits": -23.57760238647461, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -40.05815124511719, "logits_per_token": -3.3682289123535156, "logits_per_char": -0.5894400596618652, "bits_per_byte": 0.8503822509760786, "num_chars": 40}, {"sum_logits": -20.600482940673828, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -41.466522216796875, "logits_per_token": -2.9429261343819753, "logits_per_char": -0.5150120735168457, "bits_per_byte": 0.7430053644612294, "num_chars": 40}, {"sum_logits": -25.171571731567383, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -38.913612365722656, "logits_per_token": -3.5959388187953403, "logits_per_char": -0.5993231364658901, "bits_per_byte": 0.8646405168699802, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 873, "native_id": "Mercury_7004953", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -38.71648406982422, "logits_per_token_corr": -4.839560508728027, "logits_per_char_corr": -0.9003833504610284, "bits_per_byte_corr": 1.2989785946100485}, "model_output": [{"sum_logits": -37.57913589477539, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -48.83705139160156, "logits_per_token": -4.697391986846924, "logits_per_char": -0.8739333929017533, "bits_per_byte": 1.2608193720075314, "num_chars": 43}, {"sum_logits": -38.71648406982422, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -47.96281814575195, "logits_per_token": -4.839560508728027, "logits_per_char": -0.9003833504610284, "bits_per_byte": 1.2989785946100485, "num_chars": 43}, {"sum_logits": -35.00184631347656, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -48.844627380371094, "logits_per_token": -4.37523078918457, "logits_per_char": -0.7447201343292885, "bits_per_byte": 1.0744040446477992, "num_chars": 47}, {"sum_logits": -35.38654327392578, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -46.395896911621094, "logits_per_token": -4.423317909240723, "logits_per_char": -0.7221743525290976, "bits_per_byte": 1.0418773570516755, "num_chars": 49}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 874, "native_id": "TIMSS_2003_8_pg94", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.716970443725586, "logits_per_token_corr": -4.716970443725586, "logits_per_char_corr": -0.9433940887451172, "bits_per_byte_corr": 1.361029973437521}, "model_output": [{"sum_logits": -9.956826210021973, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -16.55103302001953, "logits_per_token": -9.956826210021973, "logits_per_char": -1.659471035003662, "bits_per_byte": 2.3941106327003787, "num_chars": 6}, {"sum_logits": -6.267910003662109, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -14.822157859802246, "logits_per_token": -6.267910003662109, "logits_per_char": -0.8954157148088727, "bits_per_byte": 1.2918118112897352, "num_chars": 7}, {"sum_logits": -8.278013229370117, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -15.743477821350098, "logits_per_token": -8.278013229370117, "logits_per_char": -1.1825733184814453, "bits_per_byte": 1.7060926620620116, "num_chars": 7}, {"sum_logits": -4.716970443725586, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -13.014842987060547, "logits_per_token": -4.716970443725586, "logits_per_char": -0.9433940887451172, "bits_per_byte": 1.361029973437521, "num_chars": 5}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 875, "native_id": "Mercury_7095060", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.7172160148620605, "logits_per_token_corr": -2.23907200495402, "logits_per_char_corr": -0.39513035381541534, "bits_per_byte_corr": 0.5700526019546107}, "model_output": [{"sum_logits": -6.7172160148620605, "num_tokens": 3, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -20.89792823791504, "logits_per_token": -2.23907200495402, "logits_per_char": -0.39513035381541534, "bits_per_byte": 0.5700526019546107, "num_chars": 17}, {"sum_logits": -8.134539604187012, "num_tokens": 3, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -19.027414321899414, "logits_per_token": -2.7115132013956704, "logits_per_char": -0.4785023296580595, "bits_per_byte": 0.6903329380519944, "num_chars": 17}, {"sum_logits": -16.280330657958984, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -31.749359130859375, "logits_per_token": -3.2560661315917967, "logits_per_char": -0.678347110748291, "bits_per_byte": 0.978648012678619, "num_chars": 24}, {"sum_logits": -17.911893844604492, "num_tokens": 5, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -33.80620574951172, "logits_per_token": -3.5823787689208983, "logits_per_char": -0.6397104944501605, "bits_per_byte": 0.9229071579485363, "num_chars": 28}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 876, "native_id": "Mercury_7123358", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.355971336364746, "logits_per_token_corr": -1.6711942672729492, "logits_per_char_corr": -0.4915277256685145, "bits_per_byte_corr": 0.7091246122819062}, "model_output": [{"sum_logits": -8.355971336364746, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -25.00986671447754, "logits_per_token": -1.6711942672729492, "logits_per_char": -0.4915277256685145, "bits_per_byte": 0.7091246122819062, "num_chars": 17}, {"sum_logits": -11.793010711669922, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -26.924266815185547, "logits_per_token": -2.3586021423339845, "logits_per_char": -0.6551672617594401, "bits_per_byte": 0.9452065594938247, "num_chars": 18}, {"sum_logits": -16.729814529418945, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.14190673828125, "logits_per_token": -2.788302421569824, "logits_per_char": -0.522806704044342, "bits_per_byte": 0.7542506392688181, "num_chars": 32}, {"sum_logits": -16.42279624938965, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -27.26982879638672, "logits_per_token": -3.2845592498779297, "logits_per_char": -0.5132123827934265, "bits_per_byte": 0.7404089595794168, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 877, "native_id": "Mercury_7069020", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.6485769748687744, "logits_per_token_corr": -1.3242884874343872, "logits_per_char_corr": -0.2942863305409749, "bits_per_byte_corr": 0.42456542967317984}, "model_output": [{"sum_logits": -2.6485769748687744, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.862621307373047, "logits_per_token": -1.3242884874343872, "logits_per_char": -0.2942863305409749, "bits_per_byte": 0.42456542967317984, "num_chars": 9}, {"sum_logits": -10.766321182250977, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.494519233703613, "logits_per_token": -10.766321182250977, "logits_per_char": -1.1962579091389973, "bits_per_byte": 1.7258353531402715, "num_chars": 9}, {"sum_logits": -7.931501388549805, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.020057678222656, "logits_per_token": -2.643833796183268, "logits_per_char": -0.4957188367843628, "bits_per_byte": 0.7151711075045595, "num_chars": 16}, {"sum_logits": -3.6416385173797607, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.175880432128906, "logits_per_token": -1.2138795057932537, "logits_per_char": -0.20231325096554226, "bits_per_byte": 0.2918763238743218, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 878, "native_id": "TIMSS_2003_8_pg117", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.644540786743164, "logits_per_token_corr": -3.661135196685791, "logits_per_char_corr": -0.5857816314697266, "bits_per_byte_corr": 0.8451042547658281}, "model_output": [{"sum_logits": -5.189350605010986, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -22.718685150146484, "logits_per_token": -1.0378701210021972, "logits_per_char": -0.19219817055596244, "bits_per_byte": 0.2772833475292174, "num_chars": 27}, {"sum_logits": -14.644540786743164, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -23.930217742919922, "logits_per_token": -3.661135196685791, "logits_per_char": -0.5857816314697266, "bits_per_byte": 0.8451042547658281, "num_chars": 25}, {"sum_logits": -12.340230941772461, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.501296997070312, "logits_per_token": -6.1701154708862305, "logits_per_char": -1.0283525784810383, "bits_per_byte": 1.4835991652610385, "num_chars": 12}, {"sum_logits": -14.162227630615234, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -16.989023208618164, "logits_per_token": -7.081113815307617, "logits_per_char": -1.2874752391468396, "bits_per_byte": 1.857434142785812, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 879, "native_id": "VASoL_2008_3_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.3390390872955322, "logits_per_token_corr": -1.1130130290985107, "logits_per_char_corr": -0.2782532572746277, "bits_per_byte_corr": 0.4014345943815946}, "model_output": [{"sum_logits": -18.392501831054688, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -18.723407745361328, "logits_per_token": -6.1308339436848955, "logits_per_char": -1.414807833158053, "bits_per_byte": 2.0411362447094494, "num_chars": 13}, {"sum_logits": -3.3390390872955322, "num_tokens": 3, "num_tokens_all": 199, "is_greedy": true, "sum_logits_uncond": -17.678325653076172, "logits_per_token": -1.1130130290985107, "logits_per_char": -0.2782532572746277, "bits_per_byte": 0.4014345943815946, "num_chars": 12}, {"sum_logits": -16.32326316833496, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -24.60019874572754, "logits_per_token": -4.08081579208374, "logits_per_char": -0.8591191141228927, "bits_per_byte": 1.2394468854789071, "num_chars": 19}, {"sum_logits": -5.967281341552734, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -18.9898681640625, "logits_per_token": -1.193456268310547, "logits_per_char": -0.2594470148501189, "bits_per_byte": 0.3743029216979807, "num_chars": 23}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 880, "native_id": "Mercury_SC_400142", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.326981544494629, "logits_per_token_corr": -3.1089938481648765, "logits_per_char_corr": -0.4908937654997173, "bits_per_byte_corr": 0.7082100010902607}, "model_output": [{"sum_logits": -9.326981544494629, "num_tokens": 3, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -22.741262435913086, "logits_per_token": -3.1089938481648765, "logits_per_char": -0.4908937654997173, "bits_per_byte": 0.7082100010902607, "num_chars": 19}, {"sum_logits": -6.685624599456787, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -24.339689254760742, "logits_per_token": -1.6714061498641968, "logits_per_char": -0.3714235888587104, "bits_per_byte": 0.5358509697160277, "num_chars": 18}, {"sum_logits": -15.24558162689209, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -24.159347534179688, "logits_per_token": -3.8113954067230225, "logits_per_char": -0.8967989192289465, "bits_per_byte": 1.2938073534471126, "num_chars": 17}, {"sum_logits": -14.189369201660156, "num_tokens": 4, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -22.97024154663086, "logits_per_token": -3.547342300415039, "logits_per_char": -0.8346687765682445, "bits_per_byte": 1.2041725047407297, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 881, "native_id": "Mercury_7163818", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.458020210266113, "logits_per_token_corr": -2.2290101051330566, "logits_per_char_corr": -0.3715016841888428, "bits_per_byte_corr": 0.5359636374615263}, "model_output": [{"sum_logits": -7.6560750007629395, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -13.626036643981934, "logits_per_token": -3.8280375003814697, "logits_per_char": -0.6380062500635783, "bits_per_byte": 0.9204484530235495, "num_chars": 12}, {"sum_logits": -10.436437606811523, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -17.014938354492188, "logits_per_token": -5.218218803405762, "logits_per_char": -1.0436437606811524, "bits_per_byte": 1.5056596779904885, "num_chars": 10}, {"sum_logits": -9.389090538024902, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -16.102745056152344, "logits_per_token": -4.694545269012451, "logits_per_char": -0.7222377336942233, "bits_per_byte": 1.0419687967442883, "num_chars": 13}, {"sum_logits": -4.458020210266113, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -18.68292236328125, "logits_per_token": -2.2290101051330566, "logits_per_char": -0.3715016841888428, "bits_per_byte": 0.5359636374615263, "num_chars": 12}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 882, "native_id": "Mercury_402502", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.50959014892578, "logits_per_token_corr": -4.501918029785156, "logits_per_char_corr": -2.8136987686157227, "bits_per_byte_corr": 4.0593092600402025}, "model_output": [{"sum_logits": -19.007827758789062, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -21.261497497558594, "logits_per_token": -3.8015655517578124, "logits_per_char": -2.7154039655412947, "bits_per_byte": 3.917499835099466, "num_chars": 7}, {"sum_logits": -18.122665405273438, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -22.138545989990234, "logits_per_token": -3.6245330810546874, "logits_per_char": -2.588952200753348, "bits_per_byte": 3.7350685011281066, "num_chars": 7}, {"sum_logits": -22.919654846191406, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -26.325096130371094, "logits_per_token": -4.583930969238281, "logits_per_char": -2.864956855773926, "bits_per_byte": 4.133259048188849, "num_chars": 8}, {"sum_logits": -22.50959014892578, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -25.88971710205078, "logits_per_token": -4.501918029785156, "logits_per_char": -2.8136987686157227, "bits_per_byte": 4.0593092600402025, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 883, "native_id": "Mercury_7130778", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.050374031066895, "logits_per_token_corr": -2.010074806213379, "logits_per_char_corr": -0.4020149612426758, "bits_per_byte_corr": 0.5799849909483938}, "model_output": [{"sum_logits": -9.302948951721191, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -16.732097625732422, "logits_per_token": -3.100982983907064, "logits_per_char": -0.4896288921958522, "bits_per_byte": 0.7063851746474203, "num_chars": 19}, {"sum_logits": -15.911531448364258, "num_tokens": 4, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -27.690719604492188, "logits_per_token": -3.9778828620910645, "logits_per_char": -0.7576919737316313, "bits_per_byte": 1.0931184530247806, "num_chars": 21}, {"sum_logits": -10.050374031066895, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -26.761281967163086, "logits_per_token": -2.010074806213379, "logits_per_char": -0.4020149612426758, "bits_per_byte": 0.5799849909483938, "num_chars": 25}, {"sum_logits": -7.431687355041504, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -25.96442222595215, "logits_per_token": -1.0616696221487862, "logits_per_char": -0.275247679816352, "bits_per_byte": 0.3970984626875296, "num_chars": 27}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 884, "native_id": "MEA_2010_8_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.245506286621094, "logits_per_token_corr": -2.7806882858276367, "logits_per_char_corr": -0.5425733240639291, "bits_per_byte_corr": 0.7827678439462333}, "model_output": [{"sum_logits": -11.614246368408203, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -28.936077117919922, "logits_per_token": -1.4517807960510254, "logits_per_char": -0.34159548142377066, "bits_per_byte": 0.492818107040506, "num_chars": 34}, {"sum_logits": -17.021812438964844, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -33.76987075805664, "logits_per_token": -1.7021812438964843, "logits_per_char": -0.37003940084706183, "bits_per_byte": 0.5338540085359629, "num_chars": 46}, {"sum_logits": -22.245506286621094, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -45.97016525268555, "logits_per_token": -2.7806882858276367, "logits_per_char": -0.5425733240639291, "bits_per_byte": 0.7827678439462333, "num_chars": 41}, {"sum_logits": -20.291065216064453, "num_tokens": 11, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.14307403564453, "logits_per_token": -1.8446422923694958, "logits_per_char": -0.38285028709555574, "bits_per_byte": 0.552336210596071, "num_chars": 53}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 885, "native_id": "Mercury_7211033", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.2754926681518555, "logits_per_token_corr": -2.2754926681518555, "logits_per_char_corr": -0.28443658351898193, "bits_per_byte_corr": 0.4103552484905295}, "model_output": [{"sum_logits": -3.3982603549957275, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.6991301774978638, "logits_per_char": -0.2265506903330485, "bits_per_byte": 0.3268435574536951, "num_chars": 15}, {"sum_logits": -2.2754926681518555, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.469803810119629, "logits_per_token": -2.2754926681518555, "logits_per_char": -0.28443658351898193, "bits_per_byte": 0.4103552484905295, "num_chars": 8}, {"sum_logits": -3.306155204772949, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -3.306155204772949, "logits_per_char": -0.5510258674621582, "bits_per_byte": 0.794962286389766, "num_chars": 6}, {"sum_logits": -5.08327579498291, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -5.08327579498291, "logits_per_char": -0.5648084216647677, "bits_per_byte": 0.8148463089886684, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 886, "native_id": "NYSEDREGENTS_2008_8_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -20.083988189697266, "logits_per_token_corr": -2.510498523712158, "logits_per_char_corr": -0.41841642061869305, "bits_per_byte_corr": 0.6036472950535328}, "model_output": [{"sum_logits": -20.083988189697266, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -34.658878326416016, "logits_per_token": -2.510498523712158, "logits_per_char": -0.41841642061869305, "bits_per_byte": 0.6036472950535328, "num_chars": 48}, {"sum_logits": -16.897945404052734, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -27.357421875, "logits_per_token": -2.112243175506592, "logits_per_char": -0.38404421372847125, "bits_per_byte": 0.5540586826285647, "num_chars": 44}, {"sum_logits": -20.883697509765625, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -33.89188003540039, "logits_per_token": -2.610462188720703, "logits_per_char": -0.4443339895694814, "bits_per_byte": 0.6410384432507598, "num_chars": 47}, {"sum_logits": -22.145151138305664, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.90693664550781, "logits_per_token": -2.768143892288208, "logits_per_char": -0.527265503292992, "bits_per_byte": 0.7606833268331695, "num_chars": 42}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 887, "native_id": "NAEP_2005_8_S11+1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.403390884399414, "logits_per_token_corr": -2.9403390884399414, "logits_per_char_corr": -0.5445072385999892, "bits_per_byte_corr": 0.7855578928569124}, "model_output": [{"sum_logits": -14.15738582611084, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -25.295732498168945, "logits_per_token": -4.719128608703613, "logits_per_char": -0.8327874015359318, "bits_per_byte": 1.201458254311558, "num_chars": 17}, {"sum_logits": -23.683883666992188, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -34.602325439453125, "logits_per_token": -3.9473139444986978, "logits_per_char": -0.6965848137350643, "bits_per_byte": 1.0049594563348616, "num_chars": 34}, {"sum_logits": -29.403390884399414, "num_tokens": 10, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -41.26785659790039, "logits_per_token": -2.9403390884399414, "logits_per_char": -0.5445072385999892, "bits_per_byte": 0.7855578928569124, "num_chars": 54}, {"sum_logits": -23.16944122314453, "num_tokens": 12, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -35.8416633605957, "logits_per_token": -1.9307867685953777, "logits_per_char": -0.35105213974461413, "bits_per_byte": 0.506461181103378, "num_chars": 66}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 888, "native_id": "Mercury_412774", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.137264251708984, "logits_per_token_corr": -2.892158031463623, "logits_per_char_corr": -0.7230395078659058, "bits_per_byte_corr": 1.0431255123656884}, "model_output": [{"sum_logits": -21.443042755126953, "num_tokens": 8, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -38.75987243652344, "logits_per_token": -2.680380344390869, "logits_per_char": -0.6700950860977173, "bits_per_byte": 0.9667428576379342, "num_chars": 32}, {"sum_logits": -24.189067840576172, "num_tokens": 9, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -39.977882385253906, "logits_per_token": -2.6876742045084634, "logits_per_char": -0.691116224016462, "bits_per_byte": 0.997069949067172, "num_chars": 35}, {"sum_logits": -23.137264251708984, "num_tokens": 8, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -39.568519592285156, "logits_per_token": -2.892158031463623, "logits_per_char": -0.7230395078659058, "bits_per_byte": 1.0431255123656884, "num_chars": 32}, {"sum_logits": -25.245391845703125, "num_tokens": 9, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -40.280052185058594, "logits_per_token": -2.8050435384114585, "logits_per_char": -0.7212969098772322, "bits_per_byte": 1.040611474889164, "num_chars": 35}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 889, "native_id": "MEA_2013_5_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.25074005126953, "logits_per_token_corr": -1.5500493367513022, "logits_per_char_corr": -0.3059307901482833, "bits_per_byte_corr": 0.44136483380248753}, "model_output": [{"sum_logits": -32.53388214111328, "num_tokens": 13, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -57.487796783447266, "logits_per_token": -2.5026063185471754, "logits_per_char": -0.4518594741821289, "bits_per_byte": 0.6518954225817203, "num_chars": 72}, {"sum_logits": -26.799549102783203, "num_tokens": 13, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -55.48577117919922, "logits_per_token": -2.0615037771371694, "logits_per_char": -0.3526256460892527, "bits_per_byte": 0.508731270903597, "num_chars": 76}, {"sum_logits": -23.25074005126953, "num_tokens": 15, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -44.7030029296875, "logits_per_token": -1.5500493367513022, "logits_per_char": -0.3059307901482833, "bits_per_byte": 0.44136483380248753, "num_chars": 76}, {"sum_logits": -26.486270904541016, "num_tokens": 13, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -54.988670349121094, "logits_per_token": -2.0374054541954627, "logits_per_char": -0.42041699848477804, "bits_per_byte": 0.6065335188198478, "num_chars": 63}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 890, "native_id": "Mercury_7098473", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.588960647583008, "logits_per_token_corr": -3.1963202158610025, "logits_per_char_corr": -0.5640565086813534, "bits_per_byte_corr": 0.8137615278563157}, "model_output": [{"sum_logits": -10.040414810180664, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.164600372314453, "logits_per_token": -3.346804936726888, "logits_per_char": -0.717172486441476, "bits_per_byte": 1.034661189651868, "num_chars": 14}, {"sum_logits": -9.588960647583008, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -22.11163330078125, "logits_per_token": -3.1963202158610025, "logits_per_char": -0.5640565086813534, "bits_per_byte": 0.8137615278563157, "num_chars": 17}, {"sum_logits": -15.69655990600586, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -29.23792266845703, "logits_per_token": -2.616093317667643, "logits_per_char": -0.5813540705928096, "bits_per_byte": 0.8387166346454614, "num_chars": 27}, {"sum_logits": -18.902849197387695, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.051485061645508, "logits_per_token": -2.700407028198242, "logits_per_char": -0.555966152864344, "bits_per_byte": 0.8020896116400807, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 891, "native_id": "Mercury_417593", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.602523803710938, "logits_per_token_corr": -2.6602523803710936, "logits_per_char_corr": -0.5216181137982536, "bits_per_byte_corr": 0.7525358660151361}, "model_output": [{"sum_logits": -26.602523803710938, "num_tokens": 10, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -43.034671783447266, "logits_per_token": -2.6602523803710936, "logits_per_char": -0.5216181137982536, "bits_per_byte": 0.7525358660151361, "num_chars": 51}, {"sum_logits": -27.804927825927734, "num_tokens": 10, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -42.82594299316406, "logits_per_token": -2.7804927825927734, "logits_per_char": -0.5792693297068278, "bits_per_byte": 0.8357089893077148, "num_chars": 48}, {"sum_logits": -27.467662811279297, "num_tokens": 11, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -50.35980987548828, "logits_per_token": -2.497060255570845, "logits_per_char": -0.5385816237505745, "bits_per_byte": 0.7770090376994376, "num_chars": 51}, {"sum_logits": -35.74459457397461, "num_tokens": 11, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -55.18144989013672, "logits_per_token": -3.249508597634055, "logits_per_char": -0.6744263127165021, "bits_per_byte": 0.9729914968018258, "num_chars": 53}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 892, "native_id": "Mercury_7081743", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.185983657836914, "logits_per_token_corr": -2.4551405225481306, "logits_per_char_corr": -0.33698007172229244, "bits_per_byte_corr": 0.4861594783525078}, "model_output": [{"sum_logits": -23.558427810668945, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -40.468841552734375, "logits_per_token": -2.6176030900743275, "logits_per_char": -0.4444986379371499, "bits_per_byte": 0.6412759806342857, "num_chars": 53}, {"sum_logits": -17.185983657836914, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -35.2022705078125, "logits_per_token": -2.4551405225481306, "logits_per_char": -0.33698007172229244, "bits_per_byte": 0.4861594783525078, "num_chars": 51}, {"sum_logits": -24.77832794189453, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -42.9682502746582, "logits_per_token": -3.5397611345563615, "logits_per_char": -0.5056801620794802, "bits_per_byte": 0.7295422621085175, "num_chars": 49}, {"sum_logits": -20.391700744628906, "num_tokens": 11, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -35.367591857910156, "logits_per_token": -1.853790976784446, "logits_per_char": -0.3847490706533756, "bits_per_byte": 0.5550755762186612, "num_chars": 53}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 893, "native_id": "Mercury_7018410", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.822661399841309, "logits_per_token_corr": -1.1028326749801636, "logits_per_char_corr": -0.24507392777336967, "bits_per_byte_corr": 0.35356694025007446}, "model_output": [{"sum_logits": -6.870903015136719, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.139972686767578, "logits_per_token": -1.3741806030273438, "logits_per_char": -0.2544778894495081, "bits_per_byte": 0.367133989124959, "num_chars": 27}, {"sum_logits": -7.330358982086182, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.81366729736328, "logits_per_token": -1.047194140298026, "logits_per_char": -0.24434529940287272, "bits_per_byte": 0.35251575171330674, "num_chars": 30}, {"sum_logits": -8.822661399841309, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -23.265727996826172, "logits_per_token": -1.1028326749801636, "logits_per_char": -0.24507392777336967, "bits_per_byte": 0.35356694025007446, "num_chars": 36}, {"sum_logits": -22.101226806640625, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.08716583251953, "logits_per_token": -2.762653350830078, "logits_per_char": -0.5023006092418324, "bits_per_byte": 0.7246665979892173, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 894, "native_id": "Mercury_402563", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.7641665935516357, "logits_per_token_corr": -0.8820832967758179, "logits_per_char_corr": -0.5880555311838785, "bits_per_byte_corr": 0.8483847986069163}, "model_output": [{"sum_logits": -1.5521538257598877, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": true, "sum_logits_uncond": -9.318770408630371, "logits_per_token": -0.7760769128799438, "logits_per_char": -0.5173846085866293, "bits_per_byte": 0.7464282090407437, "num_chars": 3}, {"sum_logits": -1.7641665935516357, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -10.142398834228516, "logits_per_token": -0.8820832967758179, "logits_per_char": -0.5880555311838785, "bits_per_byte": 0.8483847986069163, "num_chars": 3}, {"sum_logits": -5.44858455657959, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -10.558201789855957, "logits_per_token": -2.724292278289795, "logits_per_char": -1.8161948521931965, "bits_per_byte": 2.620215306549071, "num_chars": 3}, {"sum_logits": -5.310904502868652, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -12.361029624938965, "logits_per_token": -2.655452251434326, "logits_per_char": -1.7703015009562175, "bits_per_byte": 2.5540051963096584, "num_chars": 3}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 895, "native_id": "Mercury_416407", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.85931396484375, "logits_per_token_corr": -3.8098856608072915, "logits_per_char_corr": -0.49694160793138586, "bits_per_byte_corr": 0.7169351933745131}, "model_output": [{"sum_logits": -20.35000991821289, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -33.08960723876953, "logits_per_token": -4.0700019836425785, "logits_per_char": -0.5355265767950761, "bits_per_byte": 0.772601536607054, "num_chars": 38}, {"sum_logits": -21.975936889648438, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -41.18163299560547, "logits_per_token": -4.3951873779296875, "logits_per_char": -0.5783141286749589, "bits_per_byte": 0.8343309255159845, "num_chars": 38}, {"sum_logits": -22.85931396484375, "num_tokens": 6, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -41.51005935668945, "logits_per_token": -3.8098856608072915, "logits_per_char": -0.49694160793138586, "bits_per_byte": 0.7169351933745131, "num_chars": 46}, {"sum_logits": -11.309551239013672, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -30.279396057128906, "logits_per_token": -1.6156501770019531, "logits_per_char": -0.20562820434570311, "bits_per_byte": 0.2966587906766614, "num_chars": 55}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 896, "native_id": "Mercury_SC_400400", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.8001699447631836, "logits_per_token_corr": -2.8001699447631836, "logits_per_char_corr": -0.35002124309539795, "bits_per_byte_corr": 0.5049739116198837}, "model_output": [{"sum_logits": -2.8001699447631836, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.47834587097168, "logits_per_token": -2.8001699447631836, "logits_per_char": -0.35002124309539795, "bits_per_byte": 0.5049739116198837, "num_chars": 8}, {"sum_logits": -5.876513481140137, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -12.887895584106445, "logits_per_token": -5.876513481140137, "logits_per_char": -0.6529459423489041, "bits_per_byte": 0.9420018729960118, "num_chars": 9}, {"sum_logits": -6.377449035644531, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.142045021057129, "logits_per_token": -6.377449035644531, "logits_per_char": -0.7086054484049479, "bits_per_byte": 1.022301566361453, "num_chars": 9}, {"sum_logits": -6.353410720825195, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.95900821685791, "logits_per_token": -6.353410720825195, "logits_per_char": -0.5775827928022905, "bits_per_byte": 0.8332758308792608, "num_chars": 11}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 897, "native_id": "MCAS_2000_8_22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.216747283935547, "logits_per_token_corr": -1.0760467052459717, "logits_per_char_corr": -0.21793350992323476, "bits_per_byte_corr": 0.31441159401000235}, "model_output": [{"sum_logits": -9.922036170959473, "num_tokens": 14, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -38.46371078491211, "logits_per_token": -0.708716869354248, "logits_per_char": -0.1340815698778307, "bits_per_byte": 0.19343881593749235, "num_chars": 74}, {"sum_logits": -17.5877685546875, "num_tokens": 14, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -44.01261901855469, "logits_per_token": -1.2562691824776786, "logits_per_char": -0.21713294511959877, "bits_per_byte": 0.31325662313788566, "num_chars": 81}, {"sum_logits": -16.107667922973633, "num_tokens": 15, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -43.940773010253906, "logits_per_token": -1.0738445281982423, "logits_per_char": -0.19406828822859798, "bits_per_byte": 0.27998135702140947, "num_chars": 83}, {"sum_logits": -17.216747283935547, "num_tokens": 16, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -43.972042083740234, "logits_per_token": -1.0760467052459717, "logits_per_char": -0.21793350992323476, "bits_per_byte": 0.31441159401000235, "num_chars": 79}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 898, "native_id": "MCAS_8_2014_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.650388717651367, "logits_per_token_corr": -2.081298589706421, "logits_per_char_corr": -0.4381681241487202, "bits_per_byte_corr": 0.6321429797854324}, "model_output": [{"sum_logits": -16.650388717651367, "num_tokens": 8, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -31.44525146484375, "logits_per_token": -2.081298589706421, "logits_per_char": -0.4381681241487202, "bits_per_byte": 0.6321429797854324, "num_chars": 38}, {"sum_logits": -19.84756088256836, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -30.96373748779297, "logits_per_token": -3.307926813761393, "logits_per_char": -0.496189022064209, "bits_per_byte": 0.7158494414760931, "num_chars": 40}, {"sum_logits": -19.429109573364258, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -29.171762466430664, "logits_per_token": -2.7755870819091797, "logits_per_char": -0.4625978469848633, "bits_per_byte": 0.6673876197714532, "num_chars": 42}, {"sum_logits": -34.600364685058594, "num_tokens": 11, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -42.650211334228516, "logits_per_token": -3.1454876986416904, "logits_per_char": -0.6407474941677518, "bits_per_byte": 0.9244032322985096, "num_chars": 54}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 899, "native_id": "Mercury_7206430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -23.25464630126953, "logits_per_token_corr": -3.3220923287527904, "logits_per_char_corr": -0.4745846183932557, "bits_per_byte_corr": 0.6846808754386231}, "model_output": [{"sum_logits": -23.25464630126953, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -46.81645202636719, "logits_per_token": -3.3220923287527904, "logits_per_char": -0.4745846183932557, "bits_per_byte": 0.6846808754386231, "num_chars": 49}, {"sum_logits": -28.514320373535156, "num_tokens": 10, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -50.49958038330078, "logits_per_token": -2.851432037353516, "logits_per_char": -0.5184421886097301, "bits_per_byte": 0.7479539744954157, "num_chars": 55}, {"sum_logits": -24.97644805908203, "num_tokens": 9, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -42.18599319458008, "logits_per_token": -2.775160895453559, "logits_per_char": -0.4306284148117592, "bits_per_byte": 0.6212654785152467, "num_chars": 58}, {"sum_logits": -24.904260635375977, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -43.18519973754883, "logits_per_token": -3.5577515193394254, "logits_per_char": -0.6074209911067311, "bits_per_byte": 0.8763232516021696, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 900, "native_id": "Mercury_7185343", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.25936508178711, "logits_per_token_corr": -2.3042403629847934, "logits_per_char_corr": -0.5120534139966207, "bits_per_byte_corr": 0.7387369210437188}, "model_output": [{"sum_logits": -15.901098251342773, "num_tokens": 10, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -36.04491424560547, "logits_per_token": -1.5901098251342773, "logits_per_char": -0.39752745628356934, "bits_per_byte": 0.5735108897979218, "num_chars": 40}, {"sum_logits": -23.393360137939453, "num_tokens": 8, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -37.5727653503418, "logits_per_token": -2.9241700172424316, "logits_per_char": -0.5705697594619379, "bits_per_byte": 0.8231581624575379, "num_chars": 41}, {"sum_logits": -23.407474517822266, "num_tokens": 9, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -36.055816650390625, "logits_per_token": -2.600830501980252, "logits_per_char": -0.5201661003960504, "bits_per_byte": 0.7504410534804717, "num_chars": 45}, {"sum_logits": -32.25936508178711, "num_tokens": 14, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -54.491546630859375, "logits_per_token": -2.3042403629847934, "logits_per_char": -0.5120534139966207, "bits_per_byte": 0.7387369210437188, "num_chars": 63}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 901, "native_id": "OHAT_2010_8_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -2.7544469833374023, "logits_per_token_corr": -1.3772234916687012, "logits_per_char_corr": -0.25040427121249115, "bits_per_byte_corr": 0.36125700029593555}, "model_output": [{"sum_logits": -4.396219730377197, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -19.15277862548828, "logits_per_token": -2.1981098651885986, "logits_per_char": -0.4396219730377197, "bits_per_byte": 0.6342404403677955, "num_chars": 10}, {"sum_logits": -2.7544469833374023, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -16.49663734436035, "logits_per_token": -1.3772234916687012, "logits_per_char": -0.25040427121249115, "bits_per_byte": 0.36125700029593555, "num_chars": 11}, {"sum_logits": -3.658190965652466, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -17.792051315307617, "logits_per_token": -1.829095482826233, "logits_per_char": -0.3658190965652466, "bits_per_byte": 0.5277653964775413, "num_chars": 10}, {"sum_logits": -5.303855895996094, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -19.378381729125977, "logits_per_token": -1.7679519653320312, "logits_per_char": -0.4079889150766226, "bits_per_byte": 0.5886035845191347, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 902, "native_id": "Mercury_405462", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.825584411621094, "logits_per_token_corr": -2.9806204901801214, "logits_per_char_corr": -0.4790282930646624, "bits_per_byte_corr": 0.69109174285039}, "model_output": [{"sum_logits": -22.610950469970703, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -41.292945861816406, "logits_per_token": -2.826368808746338, "logits_per_char": -0.4614479687749123, "bits_per_byte": 0.6657286961803295, "num_chars": 49}, {"sum_logits": -26.825584411621094, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -44.75642395019531, "logits_per_token": -2.9806204901801214, "logits_per_char": -0.4790282930646624, "bits_per_byte": 0.69109174285039, "num_chars": 56}, {"sum_logits": -27.210607528686523, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -42.89607238769531, "logits_per_token": -3.4013259410858154, "logits_per_char": -0.5335413240918926, "bits_per_byte": 0.7697374223772576, "num_chars": 51}, {"sum_logits": -26.120548248291016, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -51.127811431884766, "logits_per_token": -2.1767123540242515, "logits_per_char": -0.3957658825498639, "bits_per_byte": 0.5709694761081427, "num_chars": 66}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 903, "native_id": "Mercury_SC_LBS10337", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.317750930786133, "logits_per_token_corr": -2.331775093078613, "logits_per_char_corr": -0.46635501861572265, "bits_per_byte_corr": 0.6728080726510667}, "model_output": [{"sum_logits": -23.317750930786133, "num_tokens": 10, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.8919677734375, "logits_per_token": -2.331775093078613, "logits_per_char": -0.46635501861572265, "bits_per_byte": 0.6728080726510667, "num_chars": 50}, {"sum_logits": -31.436206817626953, "num_tokens": 10, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -47.138465881347656, "logits_per_token": -3.1436206817626955, "logits_per_char": -0.5239367802937825, "bits_per_byte": 0.7558809946697135, "num_chars": 60}, {"sum_logits": -18.118541717529297, "num_tokens": 7, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -35.476009368896484, "logits_per_token": -2.5883631025041853, "logits_per_char": -0.5328982858096852, "bits_per_byte": 0.7688097142364146, "num_chars": 34}, {"sum_logits": -30.004638671875, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -40.9749755859375, "logits_per_token": -3.3338487413194446, "logits_per_char": -0.6977822946947675, "bits_per_byte": 1.0066870561769856, "num_chars": 43}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 904, "native_id": "Mercury_7142520", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.061520099639893, "logits_per_token_corr": -2.5307600498199463, "logits_per_char_corr": -0.29773647644940543, "bits_per_byte_corr": 0.42954293806561944}, "model_output": [{"sum_logits": -9.512364387512207, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.009998321533203, "logits_per_token": -4.7561821937561035, "logits_per_char": -0.5945227742195129, "bits_per_byte": 0.8577150580626564, "num_chars": 16}, {"sum_logits": -8.320466041564941, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.404239654541016, "logits_per_token": -4.160233020782471, "logits_per_char": -0.4894391789155848, "bits_per_byte": 0.7061114762387876, "num_chars": 17}, {"sum_logits": -5.061520099639893, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -17.915393829345703, "logits_per_token": -2.5307600498199463, "logits_per_char": -0.29773647644940543, "bits_per_byte": 0.42954293806561944, "num_chars": 17}, {"sum_logits": -11.131089210510254, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -18.426000595092773, "logits_per_token": -5.565544605255127, "logits_per_char": -0.5565544605255127, "bits_per_byte": 0.8029383601853664, "num_chars": 20}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 905, "native_id": "Mercury_SC_405501", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.735586166381836, "logits_per_token_corr": -2.3907980237688338, "logits_per_char_corr": -0.46487739351060653, "bits_per_byte_corr": 0.670676310239621}, "model_output": [{"sum_logits": -27.857696533203125, "num_tokens": 8, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -33.14921569824219, "logits_per_token": -3.4822120666503906, "logits_per_char": -0.8193440156824449, "bits_per_byte": 1.1820635482079616, "num_chars": 34}, {"sum_logits": -23.758800506591797, "num_tokens": 8, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -32.505619049072266, "logits_per_token": -2.9698500633239746, "logits_per_char": -0.6788228716169085, "bits_per_byte": 0.979334390524423, "num_chars": 35}, {"sum_logits": -21.171855926513672, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -32.52836227416992, "logits_per_token": -3.0245508466448103, "logits_per_char": -0.604910169328962, "bits_per_byte": 0.8727009014748236, "num_chars": 35}, {"sum_logits": -16.735586166381836, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -28.63229751586914, "logits_per_token": -2.3907980237688338, "logits_per_char": -0.46487739351060653, "bits_per_byte": 0.670676310239621, "num_chars": 36}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 906, "native_id": "Mercury_7009555", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.3757381439209, "logits_per_token_corr": -3.27514762878418, "logits_per_char_corr": -0.5646806256524448, "bits_per_byte_corr": 0.8146619383154446}, "model_output": [{"sum_logits": -17.627941131591797, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -37.759864807128906, "logits_per_token": -2.5182773045131137, "logits_per_char": -0.5341800342906605, "bits_per_byte": 0.770658886413586, "num_chars": 33}, {"sum_logits": -16.19399070739746, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -34.22529220581055, "logits_per_token": -2.69899845123291, "logits_per_char": -0.5997774336073134, "bits_per_byte": 0.8652959291030022, "num_chars": 27}, {"sum_logits": -16.3757381439209, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -29.50889015197754, "logits_per_token": -3.27514762878418, "logits_per_char": -0.5646806256524448, "bits_per_byte": 0.8146619383154446, "num_chars": 29}, {"sum_logits": -12.762601852416992, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -30.209640502929688, "logits_per_token": -1.8232288360595703, "logits_per_char": -0.3867455106793028, "bits_per_byte": 0.5579558303435006, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 907, "native_id": "Mercury_409085", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.309822082519531, "logits_per_token_corr": -1.187117440359933, "logits_per_char_corr": -0.9233135647243924, "bits_per_byte_corr": 1.3320599010143488}, "model_output": [{"sum_logits": -8.309822082519531, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.283214569091797, "logits_per_token": -1.187117440359933, "logits_per_char": -0.9233135647243924, "bits_per_byte": 1.3320599010143488, "num_chars": 9}, {"sum_logits": -9.607145309448242, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -25.877872467041016, "logits_per_token": -1.3724493299211775, "logits_per_char": -1.0674605899386935, "bits_per_byte": 1.5400200994500668, "num_chars": 9}, {"sum_logits": -5.620114326477051, "num_tokens": 6, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -21.82053565979004, "logits_per_token": -0.9366857210795084, "logits_per_char": -0.7025142908096313, "bits_per_byte": 1.0135138835054103, "num_chars": 8}, {"sum_logits": -5.422630786895752, "num_tokens": 6, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -22.206113815307617, "logits_per_token": -0.9037717978159586, "logits_per_char": -0.677828848361969, "bits_per_byte": 0.9779003181039924, "num_chars": 8}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 908, "native_id": "NYSEDREGENTS_2012_4_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.196274757385254, "logits_per_token_corr": -4.732091585795085, "logits_per_char_corr": -1.1830228964487712, "bits_per_byte_corr": 1.7067412659659662}, "model_output": [{"sum_logits": -14.196274757385254, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.51598358154297, "logits_per_token": -4.732091585795085, "logits_per_char": -1.1830228964487712, "bits_per_byte": 1.7067412659659662, "num_chars": 12}, {"sum_logits": -15.02786922454834, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -24.068035125732422, "logits_per_token": -5.009289741516113, "logits_per_char": -1.1559899403498723, "bits_per_byte": 1.6677409542614876, "num_chars": 13}, {"sum_logits": -5.005429267883301, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -19.334339141845703, "logits_per_token": -1.6684764226277669, "logits_per_char": -0.2944370157578412, "bits_per_byte": 0.42478282248828825, "num_chars": 17}, {"sum_logits": -7.205930233001709, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.336050033569336, "logits_per_token": -2.401976744333903, "logits_per_char": -0.4803953488667806, "bits_per_byte": 0.6930639874767258, "num_chars": 15}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 909, "native_id": "Mercury_407539", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.960695266723633, "logits_per_token_corr": -2.7086707523890903, "logits_per_char_corr": -0.6116353311846333, "bits_per_byte_corr": 0.8824032591331832}, "model_output": [{"sum_logits": -24.64853858947754, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -34.95338439941406, "logits_per_token": -3.0810673236846924, "logits_per_char": -0.5732218276622684, "bits_per_byte": 0.8269842880982567, "num_chars": 43}, {"sum_logits": -19.382549285888672, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -38.65440368652344, "logits_per_token": -2.422818660736084, "logits_per_char": -0.5238526834023965, "bits_per_byte": 0.7557596685015566, "num_chars": 37}, {"sum_logits": -23.9804630279541, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.47252655029297, "logits_per_token": -3.4257804325648715, "logits_per_char": -0.7053077361162972, "bits_per_byte": 1.0175439731963345, "num_chars": 34}, {"sum_logits": -18.960695266723633, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.37739562988281, "logits_per_token": -2.7086707523890903, "logits_per_char": -0.6116353311846333, "bits_per_byte": 0.8824032591331832, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 910, "native_id": "ACTAAP_2013_7_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.36044979095459, "logits_per_token_corr": -5.36044979095459, "logits_per_char_corr": -0.6700562238693237, "bits_per_byte_corr": 0.9666867912937529}, "model_output": [{"sum_logits": -8.670172691345215, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -12.466421127319336, "logits_per_token": -8.670172691345215, "logits_per_char": -1.2385960987636022, "bits_per_byte": 1.7869164493519494, "num_chars": 7}, {"sum_logits": -5.81805419921875, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -13.882975578308105, "logits_per_token": -5.81805419921875, "logits_per_char": -0.6464504665798612, "bits_per_byte": 0.9326308823157923, "num_chars": 9}, {"sum_logits": -5.096142768859863, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -13.386828422546387, "logits_per_token": -5.096142768859863, "logits_per_char": -0.5662380854288737, "bits_per_byte": 0.8169088778112842, "num_chars": 9}, {"sum_logits": -5.36044979095459, "num_tokens": 1, "num_tokens_all": 174, "is_greedy": false, "sum_logits_uncond": -14.070293426513672, "logits_per_token": -5.36044979095459, "logits_per_char": -0.6700562238693237, "bits_per_byte": 0.9666867912937529, "num_chars": 8}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 911, "native_id": "AKDE&ED_2008_8_34", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.07416820526123, "logits_per_token_corr": -1.814833641052246, "logits_per_char_corr": -0.2520602279239231, "bits_per_byte_corr": 0.3636460408314469}, "model_output": [{"sum_logits": -15.975913047790527, "num_tokens": 6, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -32.56433868408203, "logits_per_token": -2.6626521746317544, "logits_per_char": -0.5153520337996944, "bits_per_byte": 0.7434958234753948, "num_chars": 31}, {"sum_logits": -10.30051040649414, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -30.982154846191406, "logits_per_token": -2.575127601623535, "logits_per_char": -0.34335034688313804, "bits_per_byte": 0.49534984273616445, "num_chars": 30}, {"sum_logits": -7.708759784698486, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -25.05362319946289, "logits_per_token": -1.5417519569396974, "logits_per_char": -0.22672822896172018, "bits_per_byte": 0.32709969155284613, "num_chars": 34}, {"sum_logits": -9.07416820526123, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -22.319272994995117, "logits_per_token": -1.814833641052246, "logits_per_char": -0.2520602279239231, "bits_per_byte": 0.3636460408314469, "num_chars": 36}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 912, "native_id": "MCAS_2004_8_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.108972549438477, "logits_per_token_corr": -5.108972549438477, "logits_per_char_corr": -0.3929978884183444, "bits_per_byte_corr": 0.5669761047013869}, "model_output": [{"sum_logits": -5.108972549438477, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.935097694396973, "logits_per_token": -5.108972549438477, "logits_per_char": -0.3929978884183444, "bits_per_byte": 0.5669761047013869, "num_chars": 13}, {"sum_logits": -8.01717758178711, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -13.1109619140625, "logits_per_token": -8.01717758178711, "logits_per_char": -0.8017177581787109, "bits_per_byte": 1.1566342339178743, "num_chars": 10}, {"sum_logits": -10.207661628723145, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -15.493139266967773, "logits_per_token": -5.103830814361572, "logits_per_char": -0.6379788517951965, "bits_per_byte": 0.9204089256776262, "num_chars": 16}, {"sum_logits": -11.145242691040039, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -11.462020874023438, "logits_per_token": -11.145242691040039, "logits_per_char": -1.2383602990044489, "bits_per_byte": 1.786576262208776, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 913, "native_id": "Mercury_415272", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.171364784240723, "logits_per_token_corr": -1.6952274640401204, "logits_per_char_corr": -0.48435070401146296, "bits_per_byte_corr": 0.6987703587289178}, "model_output": [{"sum_logits": -6.211900234222412, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -20.428485870361328, "logits_per_token": -1.0353167057037354, "logits_per_char": -0.3105950117111206, "bits_per_byte": 0.4480938831208051, "num_chars": 20}, {"sum_logits": -10.171364784240723, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -22.883338928222656, "logits_per_token": -1.6952274640401204, "logits_per_char": -0.48435070401146296, "bits_per_byte": 0.6987703587289178, "num_chars": 21}, {"sum_logits": -5.887447357177734, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.368688583374023, "logits_per_token": -1.1774894714355468, "logits_per_char": -0.2803546360560826, "bits_per_byte": 0.4044662431286311, "num_chars": 21}, {"sum_logits": -6.906839370727539, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -24.528146743774414, "logits_per_token": -0.6906839370727539, "logits_per_char": -0.222801270023469, "bits_per_byte": 0.32143428736685253, "num_chars": 31}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 914, "native_id": "Mercury_405387", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.349102020263672, "logits_per_token_corr": -3.1163673400878906, "logits_per_char_corr": -1.8698204040527344, "bits_per_byte_corr": 2.2479838535681798}, "model_output": [{"sum_logits": -9.246999740600586, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -17.25333023071289, "logits_per_token": -3.082333246866862, "logits_per_char": -1.849399948120117, "bits_per_byte": 2.2234334448125965, "num_chars": 5}, {"sum_logits": -9.349102020263672, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -16.645343780517578, "logits_per_token": -3.1163673400878906, "logits_per_char": -1.8698204040527344, "bits_per_byte": 2.2479838535681798, "num_chars": 5}, {"sum_logits": -9.118553161621094, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -17.35728645324707, "logits_per_token": -3.0395177205403647, "logits_per_char": -1.8237106323242187, "bits_per_byte": 2.1925485710604304, "num_chars": 5}, {"sum_logits": -9.059093475341797, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -17.521942138671875, "logits_per_token": -3.019697825113932, "logits_per_char": -1.8118186950683595, "bits_per_byte": 2.178251538639094, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 915, "native_id": "Mercury_7116323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.07712173461914, "logits_per_token_corr": -1.7863468594021268, "logits_per_char_corr": -0.2635593726986744, "bits_per_byte_corr": 0.3802357999724568}, "model_output": [{"sum_logits": -21.75945281982422, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -35.15959167480469, "logits_per_token": -3.626575469970703, "logits_per_char": -0.6044292449951172, "bits_per_byte": 0.8720070743233423, "num_chars": 36}, {"sum_logits": -20.495071411132812, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -37.26114273071289, "logits_per_token": -3.4158452351888022, "logits_per_char": -0.4554460313585069, "bits_per_byte": 0.6570697308339493, "num_chars": 45}, {"sum_logits": -29.2667179107666, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -50.703948974609375, "logits_per_token": -2.92667179107666, "logits_per_char": -0.4960460662841797, "bits_per_byte": 0.7156431998811782, "num_chars": 59}, {"sum_logits": -16.07712173461914, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -41.05096435546875, "logits_per_token": -1.7863468594021268, "logits_per_char": -0.2635593726986744, "bits_per_byte": 0.3802357999724568, "num_chars": 61}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 916, "native_id": "Mercury_7213430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.267356872558594, "logits_per_token_corr": -1.605613072713216, "logits_per_char_corr": -0.27524795532226565, "bits_per_byte_corr": 0.3970988601585449}, "model_output": [{"sum_logits": -22.46550941467285, "num_tokens": 12, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -39.61793899536133, "logits_per_token": -1.872125784556071, "logits_per_char": -0.32093584878104076, "bits_per_byte": 0.4630125574802304, "num_chars": 70}, {"sum_logits": -19.267356872558594, "num_tokens": 12, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -38.4046516418457, "logits_per_token": -1.605613072713216, "logits_per_char": -0.27524795532226565, "bits_per_byte": 0.3970988601585449, "num_chars": 70}, {"sum_logits": -21.989925384521484, "num_tokens": 10, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -40.20698547363281, "logits_per_token": -2.1989925384521483, "logits_per_char": -0.43979850769042966, "bits_per_byte": 0.6344951260358054, "num_chars": 50}, {"sum_logits": -22.109619140625, "num_tokens": 10, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -41.00460433959961, "logits_per_token": -2.2109619140625, "logits_per_char": -0.43352194393382354, "bits_per_byte": 0.6254399586303198, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 917, "native_id": "Mercury_7234360", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.36257553100586, "logits_per_token_corr": -1.9203219413757324, "logits_per_char_corr": -0.39391219310271436, "bits_per_byte_corr": 0.56829516753539}, "model_output": [{"sum_logits": -15.36257553100586, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -44.65815734863281, "logits_per_token": -1.9203219413757324, "logits_per_char": -0.39391219310271436, "bits_per_byte": 0.56829516753539, "num_chars": 39}, {"sum_logits": -17.32132911682129, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -45.37222671508789, "logits_per_token": -1.9245921240912542, "logits_per_char": -0.3849184248182509, "bits_per_byte": 0.5553199026324808, "num_chars": 45}, {"sum_logits": -25.346843719482422, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -57.15764236450195, "logits_per_token": -2.8163159688313804, "logits_per_char": -0.5280592441558838, "bits_per_byte": 0.7618284528398152, "num_chars": 48}, {"sum_logits": -35.47639465332031, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -69.15669250488281, "logits_per_token": -2.7289534348707933, "logits_per_char": -0.6012948246325477, "bits_per_byte": 0.8674850616101987, "num_chars": 59}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 918, "native_id": "Mercury_405685", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.15385818481445, "logits_per_token_corr": -4.019232273101807, "logits_per_char_corr": -0.7477641438328942, "bits_per_byte_corr": 1.0787956220630732}, "model_output": [{"sum_logits": -16.42644691467285, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -21.393680572509766, "logits_per_token": -3.2852893829345704, "logits_per_char": -0.746656677939675, "bits_per_byte": 1.077197886510971, "num_chars": 22}, {"sum_logits": -19.330562591552734, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -23.092315673828125, "logits_per_token": -3.2217604319254556, "logits_per_char": -0.7732225036621094, "bits_per_byte": 1.115524271537875, "num_chars": 25}, {"sum_logits": -22.947166442871094, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -27.726593017578125, "logits_per_token": -3.2781666346958707, "logits_per_char": -0.674916660084444, "bits_per_byte": 0.973698918517869, "num_chars": 34}, {"sum_logits": -32.15385818481445, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -40.519649505615234, "logits_per_token": -4.019232273101807, "logits_per_char": -0.7477641438328942, "bits_per_byte": 1.0787956220630732, "num_chars": 43}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 919, "native_id": "Mercury_7236740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.421795845031738, "logits_per_token_corr": -3.210897922515869, "logits_per_char_corr": -0.8027244806289673, "bits_per_byte_corr": 1.158086627404412}, "model_output": [{"sum_logits": -4.751296520233154, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -10.506362915039062, "logits_per_token": -2.375648260116577, "logits_per_char": -0.6787566457475934, "bits_per_byte": 0.9792388467911834, "num_chars": 7}, {"sum_logits": -6.421795845031738, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -11.056684494018555, "logits_per_token": -3.210897922515869, "logits_per_char": -0.8027244806289673, "bits_per_byte": 1.158086627404412, "num_chars": 8}, {"sum_logits": -4.6721391677856445, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -11.306564331054688, "logits_per_token": -2.3360695838928223, "logits_per_char": -0.5191265741984049, "bits_per_byte": 0.7489413341902533, "num_chars": 9}, {"sum_logits": -3.7587904930114746, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -13.678836822509766, "logits_per_token": -1.8793952465057373, "logits_per_char": -0.37587904930114746, "bits_per_byte": 0.5422788404012132, "num_chars": 10}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 920, "native_id": "Mercury_7116235", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.111915588378906, "logits_per_token_corr": -2.4445593697684154, "logits_per_char_corr": -0.42779788970947263, "bits_per_byte_corr": 0.6171818939870632}, "model_output": [{"sum_logits": -17.111915588378906, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -34.73583221435547, "logits_per_token": -2.4445593697684154, "logits_per_char": -0.42779788970947263, "bits_per_byte": 0.6171818939870632, "num_chars": 40}, {"sum_logits": -17.387123107910156, "num_tokens": 6, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.34003829956055, "logits_per_token": -2.8978538513183594, "logits_per_char": -0.4240761733636623, "bits_per_byte": 0.6118125922713635, "num_chars": 41}, {"sum_logits": -22.1263370513916, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -39.026153564453125, "logits_per_token": -3.160905293055943, "logits_per_char": -0.5531584262847901, "bits_per_byte": 0.7980389184275832, "num_chars": 40}, {"sum_logits": -18.60348892211914, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -36.100059509277344, "logits_per_token": -2.657641274588449, "logits_per_char": -0.5167635811699761, "bits_per_byte": 0.7455322558664814, "num_chars": 36}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 921, "native_id": "Mercury_SC_405357", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.770946502685547, "logits_per_token_corr": -2.6815637860979353, "logits_per_char_corr": -0.536312757219587, "bits_per_byte_corr": 0.7737357552067408}, "model_output": [{"sum_logits": -14.962648391723633, "num_tokens": 5, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -28.550453186035156, "logits_per_token": -2.9925296783447264, "logits_per_char": -0.49875494639078777, "bits_per_byte": 0.7195512877773472, "num_chars": 30}, {"sum_logits": -22.748416900634766, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -40.34653091430664, "logits_per_token": -3.791402816772461, "logits_per_char": -0.689345966685902, "bits_per_byte": 0.9945160075952738, "num_chars": 33}, {"sum_logits": -18.770946502685547, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -35.59775924682617, "logits_per_token": -2.6815637860979353, "logits_per_char": -0.536312757219587, "bits_per_byte": 0.7737357552067408, "num_chars": 35}, {"sum_logits": -21.28176498413086, "num_tokens": 6, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -35.4606819152832, "logits_per_token": -3.5469608306884766, "logits_per_char": -0.5190674386373381, "bits_per_byte": 0.7488560196095619, "num_chars": 41}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 922, "native_id": "Mercury_7042945", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.109356880187988, "logits_per_token_corr": -2.527339220046997, "logits_per_char_corr": -0.4395372556603473, "bits_per_byte_corr": 0.634118219027583}, "model_output": [{"sum_logits": -9.151952743530273, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -29.268367767333984, "logits_per_token": -2.2879881858825684, "logits_per_char": -0.4575976371765137, "bits_per_byte": 0.6601738418775378, "num_chars": 20}, {"sum_logits": -10.109356880187988, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -20.14810562133789, "logits_per_token": -2.527339220046997, "logits_per_char": -0.4395372556603473, "bits_per_byte": 0.634118219027583, "num_chars": 23}, {"sum_logits": -10.90609073638916, "num_tokens": 5, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -23.64115333557129, "logits_per_token": -2.181218147277832, "logits_per_char": -0.45442044734954834, "bits_per_byte": 0.6555901258702087, "num_chars": 24}, {"sum_logits": -8.555206298828125, "num_tokens": 4, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -21.011510848999023, "logits_per_token": -2.1388015747070312, "logits_per_char": -0.35646692911783856, "bits_per_byte": 0.5142730708795928, "num_chars": 24}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 923, "native_id": "Mercury_7106750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.42941951751709, "logits_per_token_corr": -1.1588243908352323, "logits_per_char_corr": -0.3067476328681497, "bits_per_byte_corr": 0.4425432887436259}, "model_output": [{"sum_logits": -23.153398513793945, "num_tokens": 14, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -43.09536361694336, "logits_per_token": -1.6538141795567103, "logits_per_char": -0.4209708820689808, "bits_per_byte": 0.6073326039200075, "num_chars": 55}, {"sum_logits": -7.384383678436279, "num_tokens": 9, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -35.779537200927734, "logits_per_token": -0.8204870753818088, "logits_per_char": -0.16409741507636177, "bits_per_byte": 0.23674252695353504, "num_chars": 45}, {"sum_logits": -22.967947006225586, "num_tokens": 9, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -40.036048889160156, "logits_per_token": -2.551994111802843, "logits_per_char": -0.6207553244925834, "bits_per_byte": 0.8955606282515128, "num_chars": 37}, {"sum_logits": -10.42941951751709, "num_tokens": 9, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -41.3105583190918, "logits_per_token": -1.1588243908352323, "logits_per_char": -0.3067476328681497, "bits_per_byte": 0.4425432887436259, "num_chars": 34}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 924, "native_id": "MDSA_2009_4_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.00307846069336, "logits_per_token_corr": -3.3336753845214844, "logits_per_char_corr": -0.5357692582266671, "bits_per_byte_corr": 0.7729516519049264}, "model_output": [{"sum_logits": -13.00514030456543, "num_tokens": 8, "num_tokens_all": 290, "is_greedy": false, "sum_logits_uncond": -30.28818702697754, "logits_per_token": -1.6256425380706787, "logits_per_char": -0.317198544013791, "bits_per_byte": 0.4576207664262246, "num_chars": 41}, {"sum_logits": -30.00307846069336, "num_tokens": 9, "num_tokens_all": 291, "is_greedy": false, "sum_logits_uncond": -41.96632385253906, "logits_per_token": -3.3336753845214844, "logits_per_char": -0.5357692582266671, "bits_per_byte": 0.7729516519049264, "num_chars": 56}, {"sum_logits": -34.16785430908203, "num_tokens": 11, "num_tokens_all": 293, "is_greedy": false, "sum_logits_uncond": -51.86133575439453, "logits_per_token": -3.106168573552912, "logits_per_char": -0.5423468937949528, "bits_per_byte": 0.7824411741200739, "num_chars": 63}, {"sum_logits": -27.660690307617188, "num_tokens": 12, "num_tokens_all": 294, "is_greedy": false, "sum_logits_uncond": -43.309879302978516, "logits_per_token": -2.3050575256347656, "logits_per_char": -0.43219828605651855, "bits_per_byte": 0.6235303239748969, "num_chars": 64}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 925, "native_id": "Mercury_7016310", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.048849582672119, "logits_per_token_corr": -5.048849582672119, "logits_per_char_corr": -0.6311061978340149, "bits_per_byte_corr": 0.9104937818900765}, "model_output": [{"sum_logits": -5.028367519378662, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.433614730834961, "logits_per_token": -5.028367519378662, "logits_per_char": -0.5587075021531847, "bits_per_byte": 0.8060445426644385, "num_chars": 9}, {"sum_logits": -5.774831295013428, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.689517974853516, "logits_per_token": -5.774831295013428, "logits_per_char": -0.7218539118766785, "bits_per_byte": 1.041415058911531, "num_chars": 8}, {"sum_logits": -5.048849582672119, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.651010513305664, "logits_per_token": -5.048849582672119, "logits_per_char": -0.6311061978340149, "bits_per_byte": 0.9104937818900765, "num_chars": 8}, {"sum_logits": -6.14036226272583, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.843673706054688, "logits_per_token": -6.14036226272583, "logits_per_char": -0.8771946089608329, "bits_per_byte": 1.2655243122432362, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 926, "native_id": "VASoL_2007_3_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.531271934509277, "logits_per_token_corr": -5.531271934509277, "logits_per_char_corr": -1.3828179836273193, "bits_per_byte_corr": 1.9949846474326427}, "model_output": [{"sum_logits": -5.531271934509277, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -10.564221382141113, "logits_per_token": -5.531271934509277, "logits_per_char": -1.3828179836273193, "bits_per_byte": 1.9949846474326427, "num_chars": 4}, {"sum_logits": -10.793046951293945, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -17.302711486816406, "logits_per_token": -5.396523475646973, "logits_per_char": -0.9811860864812677, "bits_per_byte": 1.4155523011567914, "num_chars": 11}, {"sum_logits": -9.810132026672363, "num_tokens": 4, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -18.89670181274414, "logits_per_token": -2.452533006668091, "logits_per_char": -0.8175110022226969, "bits_per_byte": 1.1794190687796986, "num_chars": 12}, {"sum_logits": -7.906462669372559, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -9.711779594421387, "logits_per_token": -7.906462669372559, "logits_per_char": -1.3177437782287598, "bits_per_byte": 1.9011024140142836, "num_chars": 6}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 927, "native_id": "Mercury_7030468", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.119884490966797, "logits_per_token_corr": -2.1119884490966796, "logits_per_char_corr": -0.47999737479469995, "bits_per_byte_corr": 0.6924898322565323}, "model_output": [{"sum_logits": -21.119884490966797, "num_tokens": 10, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -34.45338439941406, "logits_per_token": -2.1119884490966796, "logits_per_char": -0.47999737479469995, "bits_per_byte": 0.6924898322565323, "num_chars": 44}, {"sum_logits": -30.770801544189453, "num_tokens": 14, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -36.634613037109375, "logits_per_token": -2.1979143960135326, "logits_per_char": -0.4333915710449219, "bits_per_byte": 0.6252518703100348, "num_chars": 71}, {"sum_logits": -41.56709289550781, "num_tokens": 20, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -57.41786193847656, "logits_per_token": -2.0783546447753904, "logits_per_char": -0.4948463439941406, "bits_per_byte": 0.7139123664828937, "num_chars": 84}, {"sum_logits": -31.495643615722656, "num_tokens": 12, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -45.54961013793945, "logits_per_token": -2.624636967976888, "logits_per_char": -0.5079942518664945, "bits_per_byte": 0.7328807879684172, "num_chars": 62}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 928, "native_id": "Mercury_SC_402616", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.503755569458008, "logits_per_token_corr": -4.750625928243001, "logits_per_char_corr": -0.7917709880405002, "bits_per_byte_corr": 1.142284077966605}, "model_output": [{"sum_logits": -12.635587692260742, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -26.644628524780273, "logits_per_token": -2.5271175384521483, "logits_per_char": -0.5493733779243801, "bits_per_byte": 0.792578247928491, "num_chars": 23}, {"sum_logits": -16.40371322631836, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -28.899885177612305, "logits_per_token": -3.280742645263672, "logits_per_char": -0.5291520395586568, "bits_per_byte": 0.7634050233481031, "num_chars": 31}, {"sum_logits": -19.669273376464844, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -29.836471557617188, "logits_per_token": -3.933854675292969, "logits_per_char": -0.6556424458821615, "bits_per_byte": 0.9458921052711845, "num_chars": 30}, {"sum_logits": -28.503755569458008, "num_tokens": 6, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -43.39373016357422, "logits_per_token": -4.750625928243001, "logits_per_char": -0.7917709880405002, "bits_per_byte": 1.142284077966605, "num_chars": 36}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 929, "native_id": "Mercury_405464", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.161510467529297, "logits_per_token_corr": -2.684612274169922, "logits_per_char_corr": -0.4474353790283203, "bits_per_byte_corr": 0.6455128024428952}, "model_output": [{"sum_logits": -24.161510467529297, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -40.245750427246094, "logits_per_token": -2.684612274169922, "logits_per_char": -0.4474353790283203, "bits_per_byte": 0.6455128024428952, "num_chars": 54}, {"sum_logits": -31.804378509521484, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -38.86894607543945, "logits_per_token": -3.1804378509521483, "logits_per_char": -0.588969972398546, "bits_per_byte": 0.8497040584125024, "num_chars": 54}, {"sum_logits": -32.114830017089844, "num_tokens": 11, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -39.56428527832031, "logits_per_token": -2.919530001553622, "logits_per_char": -0.6175928849440354, "bits_per_byte": 0.8909981923977082, "num_chars": 52}, {"sum_logits": -34.447750091552734, "num_tokens": 13, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -50.21087646484375, "logits_per_token": -2.649826930119441, "logits_per_char": -0.5741291681925456, "bits_per_byte": 0.8282933037816861, "num_chars": 60}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 930, "native_id": "Mercury_7205608", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.41033935546875, "logits_per_token_corr": -5.1025848388671875, "logits_per_char_corr": -0.7038048053609913, "bits_per_byte_corr": 1.015375702448854}, "model_output": [{"sum_logits": -17.695964813232422, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -23.952781677246094, "logits_per_token": -4.4239912033081055, "logits_per_char": -0.6319987433297294, "bits_per_byte": 0.9117814528505125, "num_chars": 28}, {"sum_logits": -20.41033935546875, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -25.898651123046875, "logits_per_token": -5.1025848388671875, "logits_per_char": -0.7038048053609913, "bits_per_byte": 1.015375702448854, "num_chars": 29}, {"sum_logits": -20.54883575439453, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -33.168846130371094, "logits_per_token": -3.4248059590657554, "logits_per_char": -0.7610679909035012, "bits_per_byte": 1.0979890162565966, "num_chars": 27}, {"sum_logits": -19.190752029418945, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -27.88388442993164, "logits_per_token": -3.838150405883789, "logits_per_char": -0.6853840010506767, "bits_per_byte": 0.9888000994211577, "num_chars": 28}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 931, "native_id": "Mercury_7015208", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.868183135986328, "logits_per_token_corr": -4.173636627197266, "logits_per_char_corr": -0.719592521930563, "bits_per_byte_corr": 1.0381525628507517}, "model_output": [{"sum_logits": -21.178043365478516, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -37.22172927856445, "logits_per_token": -4.235608673095703, "logits_per_char": -0.7302773574302936, "bits_per_byte": 1.0535675220389387, "num_chars": 29}, {"sum_logits": -20.868183135986328, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -37.65786361694336, "logits_per_token": -4.173636627197266, "logits_per_char": -0.719592521930563, "bits_per_byte": 1.0381525628507517, "num_chars": 29}, {"sum_logits": -13.826457023620605, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -33.148494720458984, "logits_per_token": -2.7652914047241213, "logits_per_char": -0.4608819007873535, "bits_per_byte": 0.6649120327018718, "num_chars": 30}, {"sum_logits": -19.37445831298828, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -36.59781265258789, "logits_per_token": -3.874891662597656, "logits_per_char": -0.6054518222808838, "bits_per_byte": 0.8734823415024446, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 932, "native_id": "Mercury_SC_409666", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.985790252685547, "logits_per_token_corr": -2.9979700360979353, "logits_per_char_corr": -0.47694977847012604, "bits_per_byte_corr": 0.6880930801524349}, "model_output": [{"sum_logits": -18.84213638305664, "num_tokens": 8, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -34.08451461791992, "logits_per_token": -2.35526704788208, "logits_per_char": -0.5233926773071289, "bits_per_byte": 0.7550960199891349, "num_chars": 36}, {"sum_logits": -16.3673095703125, "num_tokens": 8, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -27.011795043945312, "logits_per_token": -2.0459136962890625, "logits_per_char": -0.4091827392578125, "bits_per_byte": 0.590325908745032, "num_chars": 40}, {"sum_logits": -23.470746994018555, "num_tokens": 8, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -35.468692779541016, "logits_per_token": -2.9338433742523193, "logits_per_char": -0.6018140254876553, "bits_per_byte": 0.8682341101090882, "num_chars": 39}, {"sum_logits": -20.985790252685547, "num_tokens": 7, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -33.409393310546875, "logits_per_token": -2.9979700360979353, "logits_per_char": -0.47694977847012604, "bits_per_byte": 0.6880930801524349, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 933, "native_id": "Mercury_7230353", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.849022626876831, "logits_per_token_corr": -3.849022626876831, "logits_per_char_corr": -0.3207518855730693, "bits_per_byte_corr": 0.46274715467238375}, "model_output": [{"sum_logits": -4.155888557434082, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.359955787658691, "logits_per_token": -4.155888557434082, "logits_per_char": -0.593698365347726, "bits_per_byte": 0.8565256874716637, "num_chars": 7}, {"sum_logits": -3.849022626876831, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -16.535337448120117, "logits_per_token": -3.849022626876831, "logits_per_char": -0.3207518855730693, "bits_per_byte": 0.46274715467238375, "num_chars": 12}, {"sum_logits": -5.577674865722656, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -18.407367706298828, "logits_per_token": -2.788837432861328, "logits_per_char": -0.3718449910481771, "bits_per_byte": 0.5364589245649914, "num_chars": 15}, {"sum_logits": -7.4278154373168945, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -15.352563858032227, "logits_per_token": -7.4278154373168945, "logits_per_char": -0.6752559488469904, "bits_per_byte": 0.9741884087330245, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 934, "native_id": "Mercury_7150343", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.303857803344727, "logits_per_token_corr": -2.033761978149414, "logits_per_char_corr": -0.33279741460626777, "bits_per_byte_corr": 0.48012517967347573}, "model_output": [{"sum_logits": -17.196247100830078, "num_tokens": 9, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -46.28288269042969, "logits_per_token": -1.9106941223144531, "logits_per_char": -0.3126590381969105, "bits_per_byte": 0.4510716438961198, "num_chars": 55}, {"sum_logits": -18.303857803344727, "num_tokens": 9, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -47.19797897338867, "logits_per_token": -2.033761978149414, "logits_per_char": -0.33279741460626777, "bits_per_byte": 0.48012517967347573, "num_chars": 55}, {"sum_logits": -18.751848220825195, "num_tokens": 8, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -37.88714599609375, "logits_per_token": -2.3439810276031494, "logits_per_char": -0.3676832984475529, "bits_per_byte": 0.5304548712883623, "num_chars": 51}, {"sum_logits": -18.821151733398438, "num_tokens": 10, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -37.505374908447266, "logits_per_token": -1.8821151733398438, "logits_per_char": -0.36904219085094975, "bits_per_byte": 0.5324153386198461, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 935, "native_id": "Mercury_7026723", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.738969326019287, "logits_per_token_corr": -2.2463231086730957, "logits_per_char_corr": -0.3964099603540757, "bits_per_byte_corr": 0.5718986839622265}, "model_output": [{"sum_logits": -4.709978103637695, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -15.89700698852539, "logits_per_token": -2.3549890518188477, "logits_per_char": -0.5233309004041884, "bits_per_byte": 0.7550068947576211, "num_chars": 9}, {"sum_logits": -4.284507751464844, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -15.733524322509766, "logits_per_token": -2.142253875732422, "logits_per_char": -0.4284507751464844, "bits_per_byte": 0.6181238085693095, "num_chars": 10}, {"sum_logits": -8.372961044311523, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -20.674516677856445, "logits_per_token": -2.790987014770508, "logits_per_char": -0.5581974029541016, "bits_per_byte": 0.8053086250795594, "num_chars": 15}, {"sum_logits": -6.738969326019287, "num_tokens": 3, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -21.75379180908203, "logits_per_token": -2.2463231086730957, "logits_per_char": -0.3964099603540757, "bits_per_byte": 0.5718986839622265, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 936, "native_id": "Mercury_7024273", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.568078994750977, "logits_per_token_corr": -2.356807899475098, "logits_per_char_corr": -0.4621191959755093, "bits_per_byte_corr": 0.6666970723339413}, "model_output": [{"sum_logits": -29.44109535217285, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -41.90745544433594, "logits_per_token": -3.2712328169080944, "logits_per_char": -0.6542465633816189, "bits_per_byte": 0.9438782725099866, "num_chars": 45}, {"sum_logits": -26.368545532226562, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.17131805419922, "logits_per_token": -3.2960681915283203, "logits_per_char": -0.5992851257324219, "bits_per_byte": 0.8645856789733051, "num_chars": 44}, {"sum_logits": -23.568078994750977, "num_tokens": 10, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -33.96699142456055, "logits_per_token": -2.356807899475098, "logits_per_char": -0.4621191959755093, "bits_per_byte": 0.6666970723339413, "num_chars": 51}, {"sum_logits": -35.66698455810547, "num_tokens": 7, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -43.11568832397461, "logits_per_token": -5.095283508300781, "logits_per_char": -0.8492139180501302, "bits_per_byte": 1.2251567082256896, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 937, "native_id": "AKDE&ED_2008_8_40", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -47.70724105834961, "logits_per_token_corr": -5.300804562038845, "logits_per_char_corr": -0.691409290700719, "bits_per_byte_corr": 0.9974927549191996}, "model_output": [{"sum_logits": -39.57609939575195, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -51.641456604003906, "logits_per_token": -4.947012424468994, "logits_per_char": -0.7067160606384277, "bits_per_byte": 1.0195757560003762, "num_chars": 56}, {"sum_logits": -37.467376708984375, "num_tokens": 8, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -52.34571075439453, "logits_per_token": -4.683422088623047, "logits_per_char": -0.6573223984032347, "bits_per_byte": 0.9483157644422675, "num_chars": 57}, {"sum_logits": -47.70724105834961, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -61.067352294921875, "logits_per_token": -5.300804562038845, "logits_per_char": -0.691409290700719, "bits_per_byte": 0.9974927549191996, "num_chars": 69}, {"sum_logits": -45.91108322143555, "num_tokens": 9, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -60.00255584716797, "logits_per_token": -5.101231469048394, "logits_per_char": -0.6558726174490792, "bits_per_byte": 0.9462241726493306, "num_chars": 70}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 938, "native_id": "Mercury_183033", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.204981803894043, "logits_per_token_corr": -1.6409963607788085, "logits_per_char_corr": -0.39071341923304964, "bits_per_byte_corr": 0.5636803123366962}, "model_output": [{"sum_logits": -7.840224266052246, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -32.85282897949219, "logits_per_token": -1.5680448532104492, "logits_per_char": -0.41264338242380244, "bits_per_byte": 0.5953185614788955, "num_chars": 19}, {"sum_logits": -8.204981803894043, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.43441390991211, "logits_per_token": -1.6409963607788085, "logits_per_char": -0.39071341923304964, "bits_per_byte": 0.5636803123366962, "num_chars": 21}, {"sum_logits": -14.162534713745117, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -36.01561737060547, "logits_per_token": -2.8325069427490233, "logits_per_char": -0.5665013885498047, "bits_per_byte": 0.8172887439181022, "num_chars": 25}, {"sum_logits": -10.447993278503418, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -37.68665313720703, "logits_per_token": -2.0895986557006836, "logits_per_char": -0.45426057732623554, "bits_per_byte": 0.6553594821803883, "num_chars": 23}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 939, "native_id": "Mercury_402364", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.635517120361328, "logits_per_token_corr": -1.6622167314801897, "logits_per_char_corr": -0.5540722438267299, "bits_per_byte_corr": 0.7993572784636181}, "model_output": [{"sum_logits": -7.937299728393555, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -17.607019424438477, "logits_per_token": -2.645766576131185, "logits_per_char": -0.7215727025812323, "bits_per_byte": 1.0410093596555388, "num_chars": 11}, {"sum_logits": -9.054977416992188, "num_tokens": 3, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -17.418804168701172, "logits_per_token": -3.0183258056640625, "logits_per_char": -0.8231797651811079, "bits_per_byte": 1.187597364987779, "num_chars": 11}, {"sum_logits": -10.224026679992676, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -22.224987030029297, "logits_per_token": -1.4605752399989538, "logits_per_char": -0.4868584133329846, "bits_per_byte": 0.7023882185310707, "num_chars": 21}, {"sum_logits": -11.635517120361328, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -21.99935531616211, "logits_per_token": -1.6622167314801897, "logits_per_char": -0.5540722438267299, "bits_per_byte": 0.7993572784636181, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 940, "native_id": "Mercury_7263183", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.728452205657959, "logits_per_token_corr": -1.909484068552653, "logits_per_char_corr": -0.249063139376433, "bits_per_byte_corr": 0.3593221560468748}, "model_output": [{"sum_logits": -4.671966075897217, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.77805519104004, "logits_per_token": -1.5573220252990723, "logits_per_char": -0.2458929513630114, "bits_per_byte": 0.3547485415212225, "num_chars": 19}, {"sum_logits": -5.728452205657959, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -16.986217498779297, "logits_per_token": -1.909484068552653, "logits_per_char": -0.249063139376433, "bits_per_byte": 0.3593221560468748, "num_chars": 23}, {"sum_logits": -5.801944255828857, "num_tokens": 2, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -18.009998321533203, "logits_per_token": -2.9009721279144287, "logits_per_char": -0.3626215159893036, "bits_per_byte": 0.523152262837782, "num_chars": 16}, {"sum_logits": -8.352920532226562, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -19.510868072509766, "logits_per_token": -1.6705841064453124, "logits_per_char": -0.39775812058221727, "bits_per_byte": 0.5738436680376915, "num_chars": 21}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 941, "native_id": "Mercury_7222530", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.055137634277344, "logits_per_token_corr": -2.0061264038085938, "logits_per_char_corr": -0.41034403714266693, "bits_per_byte_corr": 0.5920013074445075}, "model_output": [{"sum_logits": -29.370838165283203, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -46.51136016845703, "logits_per_token": -2.6700761968439277, "logits_per_char": -0.4895139694213867, "bits_per_byte": 0.7062193761306136, "num_chars": 60}, {"sum_logits": -18.055137634277344, "num_tokens": 9, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -38.285945892333984, "logits_per_token": -2.0061264038085938, "logits_per_char": -0.41034403714266693, "bits_per_byte": 0.5920013074445075, "num_chars": 44}, {"sum_logits": -25.075965881347656, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -43.908687591552734, "logits_per_token": -2.5075965881347657, "logits_per_char": -0.47313143172354066, "bits_per_byte": 0.6825843702367377, "num_chars": 53}, {"sum_logits": -30.685199737548828, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -44.159385681152344, "logits_per_token": -3.0685199737548827, "logits_per_char": -0.626228566072425, "bits_per_byte": 0.9034568467363433, "num_chars": 49}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 942, "native_id": "OHAT_2009_8_36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.513716697692871, "logits_per_token_corr": -1.279301855299208, "logits_per_char_corr": -0.295223505069048, "bits_per_byte_corr": 0.42591748671727925}, "model_output": [{"sum_logits": -21.653837203979492, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -25.637094497680664, "logits_per_token": -4.330767440795898, "logits_per_char": -0.8328398924607497, "bits_per_byte": 1.2015339827084845, "num_chars": 26}, {"sum_logits": -20.843318939208984, "num_tokens": 6, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -27.828575134277344, "logits_per_token": -3.473886489868164, "logits_per_char": -0.6316157254305753, "bits_per_byte": 0.9112288748268308, "num_chars": 33}, {"sum_logits": -11.513716697692871, "num_tokens": 9, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -25.216703414916992, "logits_per_token": -1.279301855299208, "logits_per_char": -0.295223505069048, "bits_per_byte": 0.42591748671727925, "num_chars": 39}, {"sum_logits": -22.594165802001953, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -33.68663024902344, "logits_per_token": -2.824270725250244, "logits_per_char": -0.5135037682273171, "bits_per_byte": 0.7408293398998783, "num_chars": 44}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 943, "native_id": "Mercury_7141750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.008298873901367, "logits_per_token_corr": -1.9098453521728516, "logits_per_char_corr": -0.3501383145650228, "bits_per_byte_corr": 0.5051428100485412}, "model_output": [{"sum_logits": -25.430625915527344, "num_tokens": 8, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -41.35939025878906, "logits_per_token": -3.178828239440918, "logits_per_char": -0.6202591686713986, "bits_per_byte": 0.8948448267087807, "num_chars": 41}, {"sum_logits": -21.008298873901367, "num_tokens": 11, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -41.12710189819336, "logits_per_token": -1.9098453521728516, "logits_per_char": -0.3501383145650228, "bits_per_byte": 0.5051428100485412, "num_chars": 60}, {"sum_logits": -25.1668643951416, "num_tokens": 12, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -42.71353530883789, "logits_per_token": -2.0972386995951333, "logits_per_char": -0.4660530443544741, "bits_per_byte": 0.6723724158818869, "num_chars": 54}, {"sum_logits": -39.5092658996582, "num_tokens": 11, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -49.005714416503906, "logits_per_token": -3.591751445423473, "logits_per_char": -0.7746914882285922, "bits_per_byte": 1.1176435682870838, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 944, "native_id": "TIMSS_2011_4_pg45", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.978804111480713, "logits_per_token_corr": -2.659601370493571, "logits_per_char_corr": -0.4693414183223949, "bits_per_byte_corr": 0.6771165366979981}, "model_output": [{"sum_logits": -7.978804111480713, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.99909210205078, "logits_per_token": -2.659601370493571, "logits_per_char": -0.4693414183223949, "bits_per_byte": 0.6771165366979981, "num_chars": 17}, {"sum_logits": -17.50864028930664, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -32.59593200683594, "logits_per_token": -3.501728057861328, "logits_per_char": -0.6734092418964093, "bits_per_byte": 0.971524173773444, "num_chars": 26}, {"sum_logits": -18.170921325683594, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -32.31136703491211, "logits_per_token": -3.028486887613932, "logits_per_char": -0.6056973775227864, "bits_per_byte": 0.873836602832202, "num_chars": 30}, {"sum_logits": -17.288612365722656, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -35.25746154785156, "logits_per_token": -2.8814353942871094, "logits_per_char": -0.4802392323811849, "bits_per_byte": 0.6928387589971556, "num_chars": 36}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 945, "native_id": "MCAS_2014_5_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.200744152069092, "logits_per_token_corr": -3.100372076034546, "logits_per_char_corr": -0.516728679339091, "bits_per_byte_corr": 0.7454819031681454}, "model_output": [{"sum_logits": -6.200744152069092, "num_tokens": 2, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -16.877239227294922, "logits_per_token": -3.100372076034546, "logits_per_char": -0.516728679339091, "bits_per_byte": 0.7454819031681454, "num_chars": 12}, {"sum_logits": -11.226215362548828, "num_tokens": 2, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -21.48999786376953, "logits_per_token": -5.613107681274414, "logits_per_char": -1.1226215362548828, "bits_per_byte": 1.6196005231512327, "num_chars": 10}, {"sum_logits": -11.981861114501953, "num_tokens": 2, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -17.93915367126465, "logits_per_token": -5.990930557250977, "logits_per_char": -1.1981861114501953, "bits_per_byte": 1.7286171610524694, "num_chars": 10}, {"sum_logits": -10.518828392028809, "num_tokens": 2, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -20.982776641845703, "logits_per_token": -5.259414196014404, "logits_per_char": -0.8091406455406775, "bits_per_byte": 1.1673431967040686, "num_chars": 13}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 946, "native_id": "Mercury_SC_409241", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.219722747802734, "logits_per_token_corr": -3.0366204579671225, "logits_per_char_corr": -0.5693663358688354, "bits_per_byte_corr": 0.821421989207679}, "model_output": [{"sum_logits": -19.39080238342285, "num_tokens": 4, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -25.958053588867188, "logits_per_token": -4.847700595855713, "logits_per_char": -1.0205685464959395, "bits_per_byte": 1.4723691809180073, "num_chars": 19}, {"sum_logits": -18.308517456054688, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -37.24091720581055, "logits_per_token": -3.0514195760091147, "logits_per_char": -0.5548035592743845, "bits_per_byte": 0.8004123436332756, "num_chars": 33}, {"sum_logits": -18.219722747802734, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -29.517065048217773, "logits_per_token": -3.0366204579671225, "logits_per_char": -0.5693663358688354, "bits_per_byte": 0.821421989207679, "num_chars": 32}, {"sum_logits": -18.662799835205078, "num_tokens": 7, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -36.937652587890625, "logits_per_token": -2.666114262172154, "logits_per_char": -0.5184111065334744, "bits_per_byte": 0.747909132538141, "num_chars": 36}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 947, "native_id": "Mercury_SC_401147", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.900949478149414, "logits_per_token_corr": -2.8168249130249023, "logits_per_char_corr": -0.482884270804269, "bits_per_byte_corr": 0.6966547428131026}, "model_output": [{"sum_logits": -13.589164733886719, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -28.227115631103516, "logits_per_token": -2.2648607889811196, "logits_per_char": -0.4383601527060232, "bits_per_byte": 0.6324200184327627, "num_chars": 31}, {"sum_logits": -22.531850814819336, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -35.72388458251953, "logits_per_token": -3.2188358306884766, "logits_per_char": -0.6627014945535099, "bits_per_byte": 0.95607615978274, "num_chars": 34}, {"sum_logits": -16.900949478149414, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -31.421661376953125, "logits_per_token": -2.8168249130249023, "logits_per_char": -0.482884270804269, "bits_per_byte": 0.6966547428131026, "num_chars": 35}, {"sum_logits": -12.980369567871094, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -42.901390075683594, "logits_per_token": -1.6225461959838867, "logits_per_char": -0.31659437970417303, "bits_per_byte": 0.45674914157285607, "num_chars": 41}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 948, "native_id": "Mercury_SC_LBS10273", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.4099907875061035, "logits_per_token_corr": -2.2049953937530518, "logits_per_char_corr": -0.6299986839294434, "bits_per_byte_corr": 0.9088959770722344}, "model_output": [{"sum_logits": -4.4699883460998535, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.209413528442383, "logits_per_token": -2.2349941730499268, "logits_per_char": -0.6385697637285505, "bits_per_byte": 0.9212614313934787, "num_chars": 7}, {"sum_logits": -9.748924255371094, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.216288566589355, "logits_per_token": -4.874462127685547, "logits_per_char": -0.9748924255371094, "bits_per_byte": 1.4064724677236113, "num_chars": 10}, {"sum_logits": -6.7477569580078125, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.744370460510254, "logits_per_token": -3.3738784790039062, "logits_per_char": -0.6747756958007812, "bits_per_byte": 0.9734955500448862, "num_chars": 10}, {"sum_logits": -4.4099907875061035, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.350211143493652, "logits_per_token": -2.2049953937530518, "logits_per_char": -0.6299986839294434, "bits_per_byte": 0.9088959770722344, "num_chars": 7}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 949, "native_id": "Mercury_401523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.709238052368164, "logits_per_token_corr": -2.451539675394694, "logits_per_char_corr": -0.4457344864353989, "bits_per_byte_corr": 0.643058933134001}, "model_output": [{"sum_logits": -14.709238052368164, "num_tokens": 6, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -26.34387969970703, "logits_per_token": -2.451539675394694, "logits_per_char": -0.4457344864353989, "bits_per_byte": 0.643058933134001, "num_chars": 33}, {"sum_logits": -17.339797973632812, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -24.937217712402344, "logits_per_token": -2.477113996233259, "logits_per_char": -0.5099940580480239, "bits_per_byte": 0.7357658984292508, "num_chars": 34}, {"sum_logits": -17.89619255065918, "num_tokens": 7, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -31.706207275390625, "logits_per_token": -2.5565989358084544, "logits_per_char": -0.4836808797475454, "bits_per_byte": 0.6978040065850961, "num_chars": 37}, {"sum_logits": -49.8968391418457, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -59.966514587402344, "logits_per_token": -5.544093237982856, "logits_per_char": -0.978369394938151, "bits_per_byte": 1.4114886742358201, "num_chars": 51}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 950, "native_id": "Mercury_401865", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.47878646850586, "logits_per_token_corr": -2.575291266808143, "logits_per_char_corr": -0.6832405401735889, "bits_per_byte_corr": 0.9857077390434414}, "model_output": [{"sum_logits": -25.21509552001953, "num_tokens": 12, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -46.09858322143555, "logits_per_token": -2.1012579600016275, "logits_per_char": -0.7004193200005425, "bits_per_byte": 1.0104914795083286, "num_chars": 36}, {"sum_logits": -23.565271377563477, "num_tokens": 13, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -41.53191375732422, "logits_per_token": -1.8127131828894982, "logits_per_char": -0.6545908715989854, "bits_per_byte": 0.9443750042677189, "num_chars": 36}, {"sum_logits": -23.552566528320312, "num_tokens": 11, "num_tokens_all": 229, "is_greedy": false, "sum_logits_uncond": -43.332374572753906, "logits_per_token": -2.141142411665483, "logits_per_char": -0.5744528421541539, "bits_per_byte": 0.8287602666009638, "num_chars": 41}, {"sum_logits": -33.47878646850586, "num_tokens": 13, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -50.359649658203125, "logits_per_token": -2.575291266808143, "logits_per_char": -0.6832405401735889, "bits_per_byte": 0.9857077390434414, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 951, "native_id": "MCAS_2013_8_29435", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.4735000133514404, "logits_per_token_corr": -1.7367500066757202, "logits_per_char_corr": -0.3859444459279378, "bits_per_byte_corr": 0.5568001381992745}, "model_output": [{"sum_logits": -5.648544788360596, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -18.508447647094727, "logits_per_token": -2.824272394180298, "logits_per_char": -0.7060680985450745, "bits_per_byte": 1.0186409443016107, "num_chars": 8}, {"sum_logits": -3.4735000133514404, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": true, "sum_logits_uncond": -17.24582290649414, "logits_per_token": -1.7367500066757202, "logits_per_char": -0.3859444459279378, "bits_per_byte": 0.5568001381992745, "num_chars": 9}, {"sum_logits": -5.051235675811768, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -18.631189346313477, "logits_per_token": -2.525617837905884, "logits_per_char": -0.5612484084235297, "bits_per_byte": 0.8097102955400316, "num_chars": 9}, {"sum_logits": -8.59009838104248, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -19.519683837890625, "logits_per_token": -4.29504919052124, "logits_per_char": -0.71584153175354, "bits_per_byte": 1.0327410279239335, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 952, "native_id": "Mercury_SC_406720", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.96624755859375, "logits_per_token_corr": -2.4915618896484375, "logits_per_char_corr": -0.4530112526633523, "bits_per_byte_corr": 0.6535570876847852}, "model_output": [{"sum_logits": -14.103463172912598, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -26.706645965576172, "logits_per_token": -3.5258657932281494, "logits_per_char": -0.7835257318284776, "bits_per_byte": 1.1303886877186526, "num_chars": 18}, {"sum_logits": -15.227574348449707, "num_tokens": 5, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -27.126903533935547, "logits_per_token": -3.0455148696899412, "logits_per_char": -0.662068449932596, "bits_per_byte": 0.9551628694474854, "num_chars": 23}, {"sum_logits": -9.96624755859375, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -31.172178268432617, "logits_per_token": -2.4915618896484375, "logits_per_char": -0.4530112526633523, "bits_per_byte": 0.6535570876847852, "num_chars": 22}, {"sum_logits": -11.352960586547852, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -25.159774780273438, "logits_per_token": -2.838240146636963, "logits_per_char": -0.7568640391031901, "bits_per_byte": 1.0919239958421474, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 953, "native_id": "NYSEDREGENTS_2013_8_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.508932113647461, "logits_per_token_corr": -1.5848220189412434, "logits_per_char_corr": -0.23192517350359662, "bits_per_byte_corr": 0.3345972976711917}, "model_output": [{"sum_logits": -9.508932113647461, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -27.400440216064453, "logits_per_token": -1.5848220189412434, "logits_per_char": -0.23192517350359662, "bits_per_byte": 0.3345972976711917, "num_chars": 41}, {"sum_logits": -30.366182327270508, "num_tokens": 13, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -48.868812561035156, "logits_per_token": -2.3358601790208082, "logits_per_char": -0.46717203580416167, "bits_per_byte": 0.6739867792971496, "num_chars": 65}, {"sum_logits": -25.849302291870117, "num_tokens": 11, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -40.19664764404297, "logits_per_token": -2.3499365719881924, "logits_per_char": -0.46159468378339497, "bits_per_byte": 0.6659403611954916, "num_chars": 56}, {"sum_logits": -21.29141616821289, "num_tokens": 9, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -37.1802864074707, "logits_per_token": -2.3657129075792103, "logits_per_char": -0.40172483336250736, "bits_per_byte": 0.579566424894451, "num_chars": 53}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 954, "native_id": "Mercury_7038833", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.483463287353516, "logits_per_token_corr": -2.296692657470703, "logits_per_char_corr": -0.3959814926673626, "bits_per_byte_corr": 0.5712805357554239}, "model_output": [{"sum_logits": -13.187549591064453, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.102474212646484, "logits_per_token": -4.395849863688151, "logits_per_char": -1.4652832878960504, "bits_per_byte": 2.113956932946626, "num_chars": 9}, {"sum_logits": -9.784411430358887, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.758041381835938, "logits_per_token": -3.2614704767862954, "logits_per_char": -0.9784411430358887, "bits_per_byte": 1.4115921848606197, "num_chars": 10}, {"sum_logits": -11.483463287353516, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.36708068847656, "logits_per_token": -2.296692657470703, "logits_per_char": -0.3959814926673626, "bits_per_byte": 0.5712805357554239, "num_chars": 29}, {"sum_logits": -10.973400115966797, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -30.242755889892578, "logits_per_token": -1.8289000193277996, "logits_per_char": -0.3325272762414181, "bits_per_byte": 0.47973545239415294, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 955, "native_id": "Mercury_175560", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.2352147102355957, "logits_per_token_corr": -3.2352147102355957, "logits_per_char_corr": -0.4621735300336565, "bits_per_byte_corr": 0.6667754598101817}, "model_output": [{"sum_logits": -1.5917803049087524, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": true, "sum_logits_uncond": -12.68324089050293, "logits_per_token": -1.5917803049087524, "logits_per_char": -0.26529671748479206, "bits_per_byte": 0.38274225867970485, "num_chars": 6}, {"sum_logits": -4.613295078277588, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -11.467022895812988, "logits_per_token": -4.613295078277588, "logits_per_char": -0.7688825130462646, "bits_per_byte": 1.1092629885988867, "num_chars": 6}, {"sum_logits": -3.2352147102355957, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -12.709002494812012, "logits_per_token": -3.2352147102355957, "logits_per_char": -0.4621735300336565, "bits_per_byte": 0.6667754598101817, "num_chars": 7}, {"sum_logits": -2.0438427925109863, "num_tokens": 1, "num_tokens_all": 175, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -2.0438427925109863, "logits_per_char": -0.34064046541849774, "bits_per_byte": 0.4914403101857282, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 956, "native_id": "Mercury_7005005", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.844188690185547, "logits_per_token_corr": -2.9688377380371094, "logits_per_char_corr": -0.46388089656829834, "bits_per_byte_corr": 0.669238669042691}, "model_output": [{"sum_logits": -16.41816520690918, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -25.58514404296875, "logits_per_token": -3.2836330413818358, "logits_per_char": -0.5863630431038993, "bits_per_byte": 0.8459430544471648, "num_chars": 28}, {"sum_logits": -14.894024848937988, "num_tokens": 6, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -28.76031494140625, "logits_per_token": -2.482337474822998, "logits_per_char": -0.5319294588906425, "bits_per_byte": 0.7674119924448309, "num_chars": 28}, {"sum_logits": -14.844188690185547, "num_tokens": 5, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.031660079956055, "logits_per_token": -2.9688377380371094, "logits_per_char": -0.46388089656829834, "bits_per_byte": 0.669238669042691, "num_chars": 32}, {"sum_logits": -32.73883056640625, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.98978042602539, "logits_per_token": -3.6376478407118054, "logits_per_char": -0.9629067813648897, "bits_per_byte": 1.3891808383144777, "num_chars": 34}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 957, "native_id": "Mercury_183890", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.213672637939453, "logits_per_token_corr": -2.0534181594848633, "logits_per_char_corr": -0.6318209721491888, "bits_per_byte_corr": 0.9115249832499334}, "model_output": [{"sum_logits": -10.083571434020996, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -25.139568328857422, "logits_per_token": -1.6805952390034993, "logits_per_char": -0.4801700682867141, "bits_per_byte": 0.6927389763010551, "num_chars": 21}, {"sum_logits": -10.770892143249512, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -17.793245315551758, "logits_per_token": -3.5902973810831704, "logits_per_char": -1.077089214324951, "bits_per_byte": 1.5539112681027132, "num_chars": 10}, {"sum_logits": -26.16336441040039, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.825645446777344, "logits_per_token": -13.081682205200195, "logits_per_char": -2.1802803675333657, "bits_per_byte": 3.145479673990213, "num_chars": 12}, {"sum_logits": -8.213672637939453, "num_tokens": 4, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.343761444091797, "logits_per_token": -2.0534181594848633, "logits_per_char": -0.6318209721491888, "bits_per_byte": 0.9115249832499334, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 958, "native_id": "Mercury_7270358", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.949470520019531, "logits_per_token_corr": -1.5898941040039063, "logits_per_char_corr": -0.41839318526418584, "bits_per_byte_corr": 0.6036137735228119}, "model_output": [{"sum_logits": -11.941985130310059, "num_tokens": 6, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -28.418109893798828, "logits_per_token": -1.9903308550516765, "logits_per_char": -0.4975827137629191, "bits_per_byte": 0.7178601135783517, "num_chars": 24}, {"sum_logits": -12.708388328552246, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -29.153602600097656, "logits_per_token": -2.5416776657104494, "logits_per_char": -0.668862543608013, "bits_per_byte": 0.9649646747003516, "num_chars": 19}, {"sum_logits": -10.42605209350586, "num_tokens": 6, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -29.939128875732422, "logits_per_token": -1.7376753489176433, "logits_per_char": -0.3595190377070986, "bits_per_byte": 0.518676332805576, "num_chars": 29}, {"sum_logits": -7.949470520019531, "num_tokens": 5, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -28.481552124023438, "logits_per_token": -1.5898941040039063, "logits_per_char": -0.41839318526418584, "bits_per_byte": 0.6036137735228119, "num_chars": 19}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 959, "native_id": "MCAS_2013_5_29411", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.1057791709899902, "logits_per_token_corr": -1.5528895854949951, "logits_per_char_corr": -0.517629861831665, "bits_per_byte_corr": 0.746782034681119}, "model_output": [{"sum_logits": -4.4380388259887695, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -12.967279434204102, "logits_per_token": -4.4380388259887695, "logits_per_char": -0.7396731376647949, "bits_per_byte": 1.0671227675885457, "num_chars": 6}, {"sum_logits": -1.7347946166992188, "num_tokens": 1, "num_tokens_all": 186, "is_greedy": true, "sum_logits_uncond": -12.292281150817871, "logits_per_token": -1.7347946166992188, "logits_per_char": -0.4336986541748047, "bits_per_byte": 0.6256948976186578, "num_chars": 4}, {"sum_logits": -3.1057791709899902, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -17.733797073364258, "logits_per_token": -1.5528895854949951, "logits_per_char": -0.517629861831665, "bits_per_byte": 0.746782034681119, "num_chars": 6}, {"sum_logits": -3.6544971466064453, "num_tokens": 2, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -18.238903045654297, "logits_per_token": -1.8272485733032227, "logits_per_char": -0.730899429321289, "bits_per_byte": 1.0544649820711547, "num_chars": 5}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 960, "native_id": "ACTAAP_2007_7_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -40.680824279785156, "logits_per_token_corr": -2.1410960147255347, "logits_per_char_corr": -0.4675956813768409, "bits_per_byte_corr": 0.6745979706639488}, "model_output": [{"sum_logits": -28.85913848876953, "num_tokens": 11, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -36.51984405517578, "logits_per_token": -2.6235580444335938, "logits_per_char": -0.5445120469579157, "bits_per_byte": 0.7855648298510477, "num_chars": 53}, {"sum_logits": -28.41494369506836, "num_tokens": 12, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -44.42827606201172, "logits_per_token": -2.36791197458903, "logits_per_char": -0.4985077841240063, "bits_per_byte": 0.7191947080007666, "num_chars": 57}, {"sum_logits": -40.680824279785156, "num_tokens": 19, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -65.65039825439453, "logits_per_token": -2.1410960147255347, "logits_per_char": -0.4675956813768409, "bits_per_byte": 0.6745979706639488, "num_chars": 87}, {"sum_logits": -51.77427291870117, "num_tokens": 22, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -73.31027221679688, "logits_per_token": -2.353376041759144, "logits_per_char": -0.5177427291870117, "bits_per_byte": 0.746944867854956, "num_chars": 100}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 961, "native_id": "Mercury_7082023", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.057424545288086, "logits_per_token_corr": -4.057424545288086, "logits_per_char_corr": -0.36885677684437146, "bits_per_byte_corr": 0.5321478427520441}, "model_output": [{"sum_logits": -5.736127853393555, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -15.784364700317383, "logits_per_token": -5.736127853393555, "logits_per_char": -0.4412406041071965, "bits_per_byte": 0.6365756313847601, "num_chars": 13}, {"sum_logits": -4.057424545288086, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -14.865165710449219, "logits_per_token": -4.057424545288086, "logits_per_char": -0.36885677684437146, "bits_per_byte": 0.5321478427520441, "num_chars": 11}, {"sum_logits": -13.183775901794434, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -20.177383422851562, "logits_per_token": -6.591887950897217, "logits_per_char": -0.9416982786996024, "bits_per_byte": 1.3585834366945655, "num_chars": 14}, {"sum_logits": -10.708932876586914, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -13.142045021057129, "logits_per_token": -10.708932876586914, "logits_per_char": -1.1898814307318792, "bits_per_byte": 1.7166360393639801, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 962, "native_id": "MCAS_2003_8_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.992488861083984, "logits_per_token_corr": -1.748122215270996, "logits_per_char_corr": -0.874061107635498, "bits_per_byte_corr": 1.2610036254205534}, "model_output": [{"sum_logits": -9.823867797851562, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -22.213211059570312, "logits_per_token": -3.274622599283854, "logits_per_char": -0.701704842703683, "bits_per_byte": 1.012346096737101, "num_chars": 14}, {"sum_logits": -6.992488861083984, "num_tokens": 4, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -18.906982421875, "logits_per_token": -1.748122215270996, "logits_per_char": -0.874061107635498, "bits_per_byte": 1.2610036254205534, "num_chars": 8}, {"sum_logits": -10.193564414978027, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -20.59380340576172, "logits_per_token": -3.397854804992676, "logits_per_char": -1.0193564414978027, "bits_per_byte": 1.4706204830481573, "num_chars": 10}, {"sum_logits": -5.916118144989014, "num_tokens": 3, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -19.344533920288086, "logits_per_token": -1.9720393816630046, "logits_per_char": -0.45508601115300107, "bits_per_byte": 0.6565503314688458, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 963, "native_id": "NYSEDREGENTS_2015_8_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.007944107055664, "logits_per_token_corr": -3.601588821411133, "logits_per_char_corr": -0.5145126887730189, "bits_per_byte_corr": 0.7422849045678144}, "model_output": [{"sum_logits": -10.321282386779785, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -26.460824966430664, "logits_per_token": -3.4404274622599282, "logits_per_char": -0.44875140812086023, "bits_per_byte": 0.6474114310883695, "num_chars": 23}, {"sum_logits": -18.007944107055664, "num_tokens": 5, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -33.828521728515625, "logits_per_token": -3.601588821411133, "logits_per_char": -0.5145126887730189, "bits_per_byte": 0.7422849045678144, "num_chars": 35}, {"sum_logits": -16.802520751953125, "num_tokens": 4, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -32.910587310791016, "logits_per_token": -4.200630187988281, "logits_per_char": -0.672100830078125, "bits_per_byte": 0.9696365345317635, "num_chars": 25}, {"sum_logits": -7.046917915344238, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -25.28274154663086, "logits_per_token": -3.523458957672119, "logits_per_char": -0.3708904165970652, "bits_per_byte": 0.535081764738212, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 964, "native_id": "Mercury_7064750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.53365707397461, "logits_per_token_corr": -3.191707134246826, "logits_per_char_corr": -0.5803103880448774, "bits_per_byte_corr": 0.8372109190092961}, "model_output": [{"sum_logits": -26.124631881713867, "num_tokens": 8, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -44.891014099121094, "logits_per_token": -3.2655789852142334, "logits_per_char": -0.6220150448027111, "bits_per_byte": 0.8973780204958424, "num_chars": 42}, {"sum_logits": -21.425992965698242, "num_tokens": 7, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -46.59028625488281, "logits_per_token": -3.060856137956892, "logits_per_char": -0.49827890617902887, "bits_per_byte": 0.7188645069245785, "num_chars": 43}, {"sum_logits": -25.064170837402344, "num_tokens": 9, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -41.0665283203125, "logits_per_token": -2.7849078708224826, "logits_per_char": -0.455712197043679, "bits_per_byte": 0.6574537267480022, "num_chars": 55}, {"sum_logits": -25.53365707397461, "num_tokens": 8, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -42.132606506347656, "logits_per_token": -3.191707134246826, "logits_per_char": -0.5803103880448774, "bits_per_byte": 0.8372109190092961, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 965, "native_id": "TIMSS_2007_8_pg113", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.325641632080078, "logits_per_token_corr": -5.325641632080078, "logits_per_char_corr": -0.7608059474400112, "bits_per_byte_corr": 1.0976109674513221}, "model_output": [{"sum_logits": -4.874149322509766, "num_tokens": 1, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -14.3268461227417, "logits_per_token": -4.874149322509766, "logits_per_char": -0.6963070460728237, "bits_per_byte": 1.0045587223060275, "num_chars": 7}, {"sum_logits": -7.36689567565918, "num_tokens": 1, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -15.201915740966797, "logits_per_token": -7.36689567565918, "logits_per_char": -1.0524136679513114, "bits_per_byte": 1.518311979718212, "num_chars": 7}, {"sum_logits": -5.325641632080078, "num_tokens": 1, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -13.13901424407959, "logits_per_token": -5.325641632080078, "logits_per_char": -0.7608059474400112, "bits_per_byte": 1.0976109674513221, "num_chars": 7}, {"sum_logits": -4.435075759887695, "num_tokens": 1, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -14.288768768310547, "logits_per_token": -4.435075759887695, "logits_per_char": -0.40318870544433594, "bits_per_byte": 0.5816783458874023, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 966, "native_id": "Mercury_7173583", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -37.50554275512695, "logits_per_token_corr": -2.885041750394381, "logits_per_char_corr": -0.750110855102539, "bits_per_byte_corr": 1.0821812107741904}, "model_output": [{"sum_logits": -20.06830596923828, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -28.271785736083984, "logits_per_token": -2.229811774359809, "logits_per_char": -0.6081304839163115, "bits_per_byte": 0.8773468333600984, "num_chars": 33}, {"sum_logits": -23.666316986083984, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -32.867530822753906, "logits_per_token": -2.629590776231554, "logits_per_char": -0.7395724058151245, "bits_per_byte": 1.0669774422485667, "num_chars": 32}, {"sum_logits": -37.50554275512695, "num_tokens": 13, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -42.4179573059082, "logits_per_token": -2.885041750394381, "logits_per_char": -0.750110855102539, "bits_per_byte": 1.0821812107741904, "num_chars": 50}, {"sum_logits": -24.032272338867188, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -34.54533386230469, "logits_per_token": -2.0026893615722656, "logits_per_char": -0.44504208034939235, "bits_per_byte": 0.6420600023074372, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 967, "native_id": "Mercury_403930", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.357391357421875, "logits_per_token_corr": -3.059565226236979, "logits_per_char_corr": -0.7060535137469952, "bits_per_byte_corr": 1.0186199028857494}, "model_output": [{"sum_logits": -11.989453315734863, "num_tokens": 5, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -29.707721710205078, "logits_per_token": -2.3978906631469727, "logits_per_char": -0.5709263483683268, "bits_per_byte": 0.8236726115044215, "num_chars": 21}, {"sum_logits": -14.437426567077637, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -30.447362899780273, "logits_per_token": -1.8046783208847046, "logits_per_char": -0.534719502484357, "bits_per_byte": 0.7714371745013497, "num_chars": 27}, {"sum_logits": -18.357391357421875, "num_tokens": 6, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -30.982933044433594, "logits_per_token": -3.059565226236979, "logits_per_char": -0.7060535137469952, "bits_per_byte": 1.0186199028857494, "num_chars": 26}, {"sum_logits": -16.310405731201172, "num_tokens": 11, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -33.937557220458984, "logits_per_token": -1.4827641573819248, "logits_per_char": -0.4408217765189506, "bits_per_byte": 0.6359713909002098, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 968, "native_id": "Mercury_417118", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.66347885131836, "logits_per_token_corr": -4.22115961710612, "logits_per_char_corr": -0.5505860370138417, "bits_per_byte_corr": 0.7943277451831473}, "model_output": [{"sum_logits": -17.76056671142578, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -22.624303817749023, "logits_per_token": -5.920188903808594, "logits_per_char": -0.7721985526706862, "bits_per_byte": 1.1140470225204342, "num_chars": 23}, {"sum_logits": -12.774381637573242, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -21.571430206298828, "logits_per_token": -4.258127212524414, "logits_per_char": -0.5322659015655518, "bits_per_byte": 0.7678973766234664, "num_chars": 24}, {"sum_logits": -12.66347885131836, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -23.63541030883789, "logits_per_token": -4.22115961710612, "logits_per_char": -0.5505860370138417, "bits_per_byte": 0.7943277451831473, "num_chars": 23}, {"sum_logits": -14.281906127929688, "num_tokens": 3, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -22.34343719482422, "logits_per_token": -4.7606353759765625, "logits_per_char": -0.6491775512695312, "bits_per_byte": 0.9365652338736664, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 969, "native_id": "Mercury_7143010", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.623943328857422, "logits_per_token_corr": -3.2319919041224887, "logits_per_char_corr": -0.514180530201305, "bits_per_byte_corr": 0.7418057010436137}, "model_output": [{"sum_logits": -22.583593368530273, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -37.93743133544922, "logits_per_token": -3.2262276240757535, "logits_per_char": -0.5018576304117839, "bits_per_byte": 0.724027514627887, "num_chars": 45}, {"sum_logits": -22.623943328857422, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -36.60123062133789, "logits_per_token": -3.2319919041224887, "logits_per_char": -0.514180530201305, "bits_per_byte": 0.7418057010436137, "num_chars": 44}, {"sum_logits": -27.057775497436523, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -32.02362823486328, "logits_per_token": -3.865396499633789, "logits_per_char": -0.62925059296364, "bits_per_byte": 0.9078167099457354, "num_chars": 43}, {"sum_logits": -14.490987777709961, "num_tokens": 8, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -26.95136260986328, "logits_per_token": -1.8113734722137451, "logits_per_char": -0.4528433680534363, "bits_per_byte": 0.6533148813906176, "num_chars": 32}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 970, "native_id": "Mercury_SC_401801", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.39704704284668, "logits_per_token_corr": -3.799015680948893, "logits_per_char_corr": -0.569852352142334, "bits_per_byte_corr": 0.8221231624752472}, "model_output": [{"sum_logits": -11.39704704284668, "num_tokens": 3, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.99597930908203, "logits_per_token": -3.799015680948893, "logits_per_char": -0.569852352142334, "bits_per_byte": 0.8221231624752472, "num_chars": 20}, {"sum_logits": -15.903827667236328, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.429195404052734, "logits_per_token": -3.975956916809082, "logits_per_char": -0.5484078505943561, "bits_per_byte": 0.7911852864376216, "num_chars": 29}, {"sum_logits": -25.818408966064453, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -39.80353546142578, "logits_per_token": -3.6883441380092075, "logits_per_char": -0.679431814896433, "bits_per_byte": 0.9802129099739763, "num_chars": 38}, {"sum_logits": -23.06058120727539, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -36.56159210205078, "logits_per_token": -2.882572650909424, "logits_per_char": -0.5362925862157067, "bits_per_byte": 0.7737066545994729, "num_chars": 43}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 971, "native_id": "Mercury_410334", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.225719451904297, "logits_per_token_corr": -2.4608170645577565, "logits_per_char_corr": -0.43064298629760744, "bits_per_byte_corr": 0.6212865007256184}, "model_output": [{"sum_logits": -21.056764602661133, "num_tokens": 7, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -37.39314270019531, "logits_per_token": -3.0081092289515903, "logits_per_char": -0.6193166059606215, "bits_per_byte": 0.8934849961602148, "num_chars": 34}, {"sum_logits": -17.225719451904297, "num_tokens": 7, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -37.000152587890625, "logits_per_token": -2.4608170645577565, "logits_per_char": -0.43064298629760744, "bits_per_byte": 0.6212865007256184, "num_chars": 40}, {"sum_logits": -20.426971435546875, "num_tokens": 8, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -37.701744079589844, "logits_per_token": -2.5533714294433594, "logits_per_char": -0.45393269856770835, "bits_per_byte": 0.654886453121448, "num_chars": 45}, {"sum_logits": -25.857885360717773, "num_tokens": 10, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -42.81291198730469, "logits_per_token": -2.585788536071777, "logits_per_char": -0.5070173600140739, "bits_per_byte": 0.7314714309374442, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 972, "native_id": "NAEP_2000_4_S12+3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.330009460449219, "logits_per_token_corr": -8.330009460449219, "logits_per_char_corr": -1.6660018920898438, "bits_per_byte_corr": 2.4035326678313744}, "model_output": [{"sum_logits": -5.4643402099609375, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -11.405319213867188, "logits_per_token": -2.7321701049804688, "logits_per_char": -1.0928680419921875, "bits_per_byte": 1.576675304529293, "num_chars": 5}, {"sum_logits": -8.330009460449219, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -10.674525260925293, "logits_per_token": -8.330009460449219, "logits_per_char": -1.6660018920898438, "bits_per_byte": 2.4035326678313744, "num_chars": 5}, {"sum_logits": -3.475390911102295, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -10.685851097106934, "logits_per_token": -3.475390911102295, "logits_per_char": -0.695078182220459, "bits_per_byte": 1.002785846520292, "num_chars": 5}, {"sum_logits": -3.9510626792907715, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -10.106708526611328, "logits_per_token": -3.9510626792907715, "logits_per_char": -0.7902125358581543, "bits_per_byte": 1.1400357067316704, "num_chars": 5}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 973, "native_id": "Mercury_7218015", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.1561808586120605, "logits_per_token_corr": -1.7890452146530151, "logits_per_char_corr": -0.31113829820052435, "bits_per_byte_corr": 0.44887767984485044}, "model_output": [{"sum_logits": -11.836591720581055, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -23.76857566833496, "logits_per_token": -2.9591479301452637, "logits_per_char": -0.4227354185921805, "bits_per_byte": 0.609878292011497, "num_chars": 28}, {"sum_logits": -18.326690673828125, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -25.740991592407227, "logits_per_token": -4.581672668457031, "logits_per_char": -0.7636121114095052, "bits_per_byte": 1.1016594062940352, "num_chars": 24}, {"sum_logits": -11.611220359802246, "num_tokens": 3, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -21.404115676879883, "logits_per_token": -3.8704067866007485, "logits_per_char": -0.5529152552286783, "bits_per_byte": 0.7976880967508428, "num_chars": 21}, {"sum_logits": -7.1561808586120605, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -22.36220359802246, "logits_per_token": -1.7890452146530151, "logits_per_char": -0.31113829820052435, "bits_per_byte": 0.44887767984485044, "num_chars": 23}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 974, "native_id": "Mercury_7109603", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.569852828979492, "logits_per_token_corr": -0.8699866208163175, "logits_per_char_corr": -0.16220089540643207, "bits_per_byte_corr": 0.2340064274307771}, "model_output": [{"sum_logits": -9.569852828979492, "num_tokens": 11, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -26.509159088134766, "logits_per_token": -0.8699866208163175, "logits_per_char": -0.16220089540643207, "bits_per_byte": 0.2340064274307771, "num_chars": 59}, {"sum_logits": -9.960746765136719, "num_tokens": 12, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -26.49821662902832, "logits_per_token": -0.8300622304280599, "logits_per_char": -0.16601244608561197, "bits_per_byte": 0.23950533269373087, "num_chars": 60}, {"sum_logits": -35.60455322265625, "num_tokens": 11, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -46.960906982421875, "logits_per_token": -3.236777565696023, "logits_per_char": -0.5934092203776041, "bits_per_byte": 0.8561085394571706, "num_chars": 60}, {"sum_logits": -31.6527042388916, "num_tokens": 10, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -43.98054504394531, "logits_per_token": -3.1652704238891602, "logits_per_char": -0.5861611896091037, "bits_per_byte": 0.8456518414112368, "num_chars": 54}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 975, "native_id": "NYSEDREGENTS_2008_8_42", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.128907203674316, "logits_per_token_corr": -1.6411134004592896, "logits_per_char_corr": -0.3282226800918579, "bits_per_byte_corr": 0.4735252328761483}, "model_output": [{"sum_logits": -17.330060958862305, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -32.644287109375, "logits_per_token": -4.332515239715576, "logits_per_char": -0.9121084715190687, "bits_per_byte": 1.315894368614318, "num_chars": 19}, {"sum_logits": -20.91787338256836, "num_tokens": 4, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -34.29418182373047, "logits_per_token": -5.22946834564209, "logits_per_char": -0.95081242648038, "bits_per_byte": 1.3717323724998318, "num_chars": 22}, {"sum_logits": -13.128907203674316, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.757171630859375, "logits_per_token": -1.6411134004592896, "logits_per_char": -0.3282226800918579, "bits_per_byte": 0.4735252328761483, "num_chars": 40}, {"sum_logits": -16.91551399230957, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.33882141113281, "logits_per_token": -2.1144392490386963, "logits_per_char": -0.3933840463327807, "bits_per_byte": 0.5675332128095446, "num_chars": 43}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 976, "native_id": "NAEP_2000_8_S11+11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.22093391418457, "logits_per_token_corr": -1.4350778261820476, "logits_per_char_corr": -0.3189061835960106, "bits_per_byte_corr": 0.4600843695831203}, "model_output": [{"sum_logits": -18.18293571472168, "num_tokens": 12, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -40.53476333618164, "logits_per_token": -1.5152446428934734, "logits_per_char": -0.33672103175410517, "bits_per_byte": 0.4857857626750117, "num_chars": 54}, {"sum_logits": -17.22093391418457, "num_tokens": 12, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -42.3510856628418, "logits_per_token": -1.4350778261820476, "logits_per_char": -0.3189061835960106, "bits_per_byte": 0.4600843695831203, "num_chars": 54}, {"sum_logits": -16.54987907409668, "num_tokens": 13, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -32.11290740966797, "logits_per_token": -1.27306762108436, "logits_per_char": -0.27130949301797835, "bits_per_byte": 0.3914168601234174, "num_chars": 61}, {"sum_logits": -25.372753143310547, "num_tokens": 23, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -46.02973556518555, "logits_per_token": -1.1031631801439368, "logits_per_char": -0.23493289947509766, "bits_per_byte": 0.3389365290146322, "num_chars": 108}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 977, "native_id": "Mercury_7271670", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.578990936279297, "logits_per_token_corr": -3.511284419468471, "logits_per_char_corr": -0.6144747734069824, "bits_per_byte_corr": 0.8864997083462599}, "model_output": [{"sum_logits": -19.79111099243164, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -33.01792907714844, "logits_per_token": -2.827301570347377, "logits_per_char": -0.6384229352397304, "bits_per_byte": 0.9210496026607966, "num_chars": 31}, {"sum_logits": -21.24083709716797, "num_tokens": 8, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -34.121971130371094, "logits_per_token": -2.655104637145996, "logits_per_char": -0.6851882934570312, "bits_per_byte": 0.9885177530463409, "num_chars": 31}, {"sum_logits": -26.528888702392578, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.25826644897461, "logits_per_token": -3.7898412431989397, "logits_per_char": -0.6316402071998233, "bits_per_byte": 0.911264194553917, "num_chars": 42}, {"sum_logits": -24.578990936279297, "num_tokens": 7, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -36.24720001220703, "logits_per_token": -3.511284419468471, "logits_per_char": -0.6144747734069824, "bits_per_byte": 0.8864997083462599, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 978, "native_id": "ACTAAP_2009_5_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.9582901000976562, "logits_per_token_corr": -1.4791450500488281, "logits_per_char_corr": -0.29582901000976564, "bits_per_byte_corr": 0.42679104569248705}, "model_output": [{"sum_logits": -4.210175514221191, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -16.651010513305664, "logits_per_token": -4.210175514221191, "logits_per_char": -0.5262719392776489, "bits_per_byte": 0.7592499169554272, "num_chars": 8}, {"sum_logits": -2.9582901000976562, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -17.61030387878418, "logits_per_token": -1.4791450500488281, "logits_per_char": -0.29582901000976564, "bits_per_byte": 0.42679104569248705, "num_chars": 10}, {"sum_logits": -3.7327356338500977, "num_tokens": 1, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.22313690185547, "logits_per_token": -3.7327356338500977, "logits_per_char": -0.3732735633850098, "bits_per_byte": 0.5385199187908927, "num_chars": 10}, {"sum_logits": -3.997049331665039, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -17.819902420043945, "logits_per_token": -1.9985246658325195, "logits_per_char": -0.2855035236903599, "bits_per_byte": 0.4118945177847029, "num_chars": 14}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 979, "native_id": "NYSEDREGENTS_2012_4_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.796109199523926, "logits_per_token_corr": -5.898054599761963, "logits_per_char_corr": -1.1796109199523925, "bits_per_byte_corr": 1.7018188243950074}, "model_output": [{"sum_logits": -12.014002799987793, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -21.09389877319336, "logits_per_token": -6.0070013999938965, "logits_per_char": -0.9241540615375226, "bits_per_byte": 1.3332724815985355, "num_chars": 13}, {"sum_logits": -11.796109199523926, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -18.863985061645508, "logits_per_token": -5.898054599761963, "logits_per_char": -1.1796109199523925, "bits_per_byte": 1.7018188243950074, "num_chars": 10}, {"sum_logits": -11.52005386352539, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -21.943904876708984, "logits_per_token": -5.760026931762695, "logits_per_char": -1.0472776239568538, "bits_per_byte": 1.510902234517615, "num_chars": 11}, {"sum_logits": -11.366010665893555, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -18.021732330322266, "logits_per_token": -5.683005332946777, "logits_per_char": -0.8743085127610427, "bits_per_byte": 1.2613605555682674, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 980, "native_id": "Mercury_SC_409030", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.975557327270508, "logits_per_token_corr": -5.243889331817627, "logits_per_char_corr": -1.10397670143529, "bits_per_byte_corr": 1.5927017124187928}, "model_output": [{"sum_logits": -15.269777297973633, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -16.067567825317383, "logits_per_token": -7.634888648986816, "logits_per_char": -1.1745982536902795, "bits_per_byte": 1.6945870756370203, "num_chars": 13}, {"sum_logits": -13.963918685913086, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -15.8533353805542, "logits_per_token": -6.981959342956543, "logits_per_char": -1.0741475912240834, "bits_per_byte": 1.5496674030429238, "num_chars": 13}, {"sum_logits": -18.341976165771484, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -22.810688018798828, "logits_per_token": -4.585494041442871, "logits_per_char": -0.9653671666195518, "bits_per_byte": 1.3927304239200575, "num_chars": 19}, {"sum_logits": -20.975557327270508, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -21.23344612121582, "logits_per_token": -5.243889331817627, "logits_per_char": -1.10397670143529, "bits_per_byte": 1.5927017124187928, "num_chars": 19}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 981, "native_id": "MEA_2013_8_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.470834732055664, "logits_per_token_corr": -3.470834732055664, "logits_per_char_corr": -1.1569449106852214, "bits_per_byte_corr": 1.6691186852284927}, "model_output": [{"sum_logits": -5.274677276611328, "num_tokens": 1, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -8.512578964233398, "logits_per_token": -5.274677276611328, "logits_per_char": -1.7582257588704426, "bits_per_byte": 2.5365835830874444, "num_chars": 3}, {"sum_logits": -3.7662506103515625, "num_tokens": 1, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -8.77786922454834, "logits_per_token": -3.7662506103515625, "logits_per_char": -1.2554168701171875, "bits_per_byte": 1.8111836927677114, "num_chars": 3}, {"sum_logits": -5.243626594543457, "num_tokens": 1, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -8.66379451751709, "logits_per_token": -5.243626594543457, "logits_per_char": -1.7478755315144856, "bits_per_byte": 2.5216513614089213, "num_chars": 3}, {"sum_logits": -3.470834732055664, "num_tokens": 1, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -8.18122386932373, "logits_per_token": -3.470834732055664, "logits_per_char": -1.1569449106852214, "bits_per_byte": 1.6691186852284927, "num_chars": 3}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 982, "native_id": "Mercury_7140333", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.49359893798828, "logits_per_token_corr": -2.408508994362571, "logits_per_char_corr": -0.45678618858600484, "bits_per_byte_corr": 0.6590031690200735}, "model_output": [{"sum_logits": -20.337383270263672, "num_tokens": 8, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -38.10108184814453, "logits_per_token": -2.542172908782959, "logits_per_char": -0.44211702761442767, "bits_per_byte": 0.637840043232362, "num_chars": 46}, {"sum_logits": -28.360591888427734, "num_tokens": 9, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -42.08142852783203, "logits_per_token": -3.1511768764919705, "logits_per_char": -0.5672118377685547, "bits_per_byte": 0.8183137054827969, "num_chars": 50}, {"sum_logits": -28.91169548034668, "num_tokens": 11, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -46.92832946777344, "logits_per_token": -2.628335952758789, "logits_per_char": -0.5782339096069335, "bits_per_byte": 0.8342151938643595, "num_chars": 50}, {"sum_logits": -26.49359893798828, "num_tokens": 11, "num_tokens_all": 226, "is_greedy": false, "sum_logits_uncond": -49.632362365722656, "logits_per_token": -2.408508994362571, "logits_per_char": -0.45678618858600484, "bits_per_byte": 0.6590031690200735, "num_chars": 58}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 983, "native_id": "Mercury_SC_LBS10664", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.277547359466553, "logits_per_token_corr": -7.277547359466553, "logits_per_char_corr": -0.9096934199333191, "bits_per_byte_corr": 1.3124101856680637}, "model_output": [{"sum_logits": -6.340050220489502, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -13.445572853088379, "logits_per_token": -6.340050220489502, "logits_per_char": -0.7044500244988335, "bits_per_byte": 1.0163065568993062, "num_chars": 9}, {"sum_logits": -3.7661986351013184, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -13.711965560913086, "logits_per_token": -3.7661986351013184, "logits_per_char": -0.5380283764430455, "bits_per_byte": 0.7762108705524797, "num_chars": 7}, {"sum_logits": -7.277547359466553, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.900300025939941, "logits_per_token": -7.277547359466553, "logits_per_char": -0.9096934199333191, "bits_per_byte": 1.3124101856680637, "num_chars": 8}, {"sum_logits": -5.833006381988525, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -5.833006381988525, "logits_per_char": -0.9721677303314209, "bits_per_byte": 1.4025415634624276, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 984, "native_id": "Mercury_7171430", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.151702404022217, "logits_per_token_corr": -2.0758512020111084, "logits_per_char_corr": -0.5931003434317452, "bits_per_byte_corr": 0.8556629242191347}, "model_output": [{"sum_logits": -10.198708534240723, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -17.94623374938965, "logits_per_token": -5.099354267120361, "logits_per_char": -1.2748385667800903, "bits_per_byte": 1.8392032782289514, "num_chars": 8}, {"sum_logits": -4.151702404022217, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -16.849191665649414, "logits_per_token": -2.0758512020111084, "logits_per_char": -0.5931003434317452, "bits_per_byte": 0.8556629242191347, "num_chars": 7}, {"sum_logits": -6.2163190841674805, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -15.65728759765625, "logits_per_token": -3.1081595420837402, "logits_per_char": -1.03605318069458, "bits_per_byte": 1.4947087858863817, "num_chars": 6}, {"sum_logits": -2.4699084758758545, "num_tokens": 2, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -16.110065460205078, "logits_per_token": -1.2349542379379272, "logits_per_char": -0.41165141264597577, "bits_per_byte": 0.5938874515997122, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 985, "native_id": "Mercury_SC_407572", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.817506313323975, "logits_per_token_corr": -1.6058354377746582, "logits_per_char_corr": -0.28338272431317496, "bits_per_byte_corr": 0.40883485104051553}, "model_output": [{"sum_logits": -13.261527061462402, "num_tokens": 4, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -19.989303588867188, "logits_per_token": -3.3153817653656006, "logits_per_char": -0.4736259664808001, "bits_per_byte": 0.6832978330785836, "num_chars": 28}, {"sum_logits": -8.104302406311035, "num_tokens": 4, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -24.606351852416992, "logits_per_token": -2.026075601577759, "logits_per_char": -0.3241720962524414, "bits_per_byte": 0.467681475658313, "num_chars": 25}, {"sum_logits": -9.127943992614746, "num_tokens": 3, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -17.415998458862305, "logits_per_token": -3.0426479975382485, "logits_per_char": -0.4563971996307373, "bits_per_byte": 0.658441976583348, "num_chars": 20}, {"sum_logits": -4.817506313323975, "num_tokens": 3, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -23.744022369384766, "logits_per_token": -1.6058354377746582, "logits_per_char": -0.28338272431317496, "bits_per_byte": 0.40883485104051553, "num_chars": 17}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 986, "native_id": "VASoL_2009_3_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.235770225524902, "logits_per_token_corr": -5.078590075174968, "logits_per_char_corr": -1.1719823250403771, "bits_per_byte_corr": 1.690813088346484}, "model_output": [{"sum_logits": -9.643346786499023, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -15.790197372436523, "logits_per_token": -3.214448928833008, "logits_per_char": -0.803612232208252, "bits_per_byte": 1.159367382205388, "num_chars": 12}, {"sum_logits": -16.323143005371094, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -24.844575881958008, "logits_per_token": -5.441047668457031, "logits_per_char": -1.2556263850285456, "bits_per_byte": 1.8114859588913204, "num_chars": 13}, {"sum_logits": -12.347143173217773, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.876319885253906, "logits_per_token": -4.115714391072591, "logits_per_char": -0.9497802440936749, "bits_per_byte": 1.3702432480892384, "num_chars": 13}, {"sum_logits": -15.235770225524902, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -17.519542694091797, "logits_per_token": -5.078590075174968, "logits_per_char": -1.1719823250403771, "bits_per_byte": 1.690813088346484, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 987, "native_id": "Mercury_SC_407383", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -37.718482971191406, "logits_per_token_corr": -4.190942552354601, "logits_per_char_corr": -0.9671405890049078, "bits_per_byte_corr": 1.3952889316008141}, "model_output": [{"sum_logits": -23.81495475769043, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -35.46480178833008, "logits_per_token": -2.164995887062766, "logits_per_char": -0.6615265210469564, "bits_per_byte": 0.9543810313316582, "num_chars": 36}, {"sum_logits": -27.489524841308594, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -32.389156341552734, "logits_per_token": -2.499047712846236, "logits_per_char": -0.584883507261885, "bits_per_byte": 0.8438085354250717, "num_chars": 47}, {"sum_logits": -31.089134216308594, "num_tokens": 11, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -37.235694885253906, "logits_per_token": -2.8262849287553267, "logits_per_char": -0.7402174813406808, "bits_per_byte": 1.0679080895102862, "num_chars": 42}, {"sum_logits": -37.718482971191406, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -44.735595703125, "logits_per_token": -4.190942552354601, "logits_per_char": -0.9671405890049078, "bits_per_byte": 1.3952889316008141, "num_chars": 39}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 988, "native_id": "Mercury_7218400", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -29.66120719909668, "logits_per_token_corr": -3.29568968878852, "logits_per_char_corr": -0.5203720561245031, "bits_per_byte_corr": 0.7507381847885534}, "model_output": [{"sum_logits": -24.56521987915039, "num_tokens": 10, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -42.183189392089844, "logits_per_token": -2.456521987915039, "logits_per_char": -0.3962132238572644, "bits_per_byte": 0.5716148531939147, "num_chars": 62}, {"sum_logits": -29.66120719909668, "num_tokens": 9, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -50.07197570800781, "logits_per_token": -3.29568968878852, "logits_per_char": -0.5203720561245031, "bits_per_byte": 0.7507381847885534, "num_chars": 57}, {"sum_logits": -23.985349655151367, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -30.00920295715332, "logits_per_token": -3.997558275858561, "logits_per_char": -0.7737209566177861, "bits_per_byte": 1.1162433871451467, "num_chars": 31}, {"sum_logits": -15.358268737792969, "num_tokens": 6, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -32.054542541503906, "logits_per_token": -2.559711456298828, "logits_per_char": -0.5119422912597656, "bits_per_byte": 0.7385766048223278, "num_chars": 30}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 989, "native_id": "Mercury_184818", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.325571060180664, "logits_per_token_corr": -4.331392765045166, "logits_per_char_corr": -0.8662785530090332, "bits_per_byte_corr": 1.2497757724554972}, "model_output": [{"sum_logits": -14.071090698242188, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -26.945877075195312, "logits_per_token": -3.517772674560547, "logits_per_char": -0.8277112175436581, "bits_per_byte": 1.1941348688392595, "num_chars": 17}, {"sum_logits": -17.306819915771484, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -26.919479370117188, "logits_per_token": -4.326704978942871, "logits_per_char": -0.910885258724815, "bits_per_byte": 1.314129645582095, "num_chars": 19}, {"sum_logits": -16.667476654052734, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.85973358154297, "logits_per_token": -4.166869163513184, "logits_per_char": -0.9259709252251519, "bits_per_byte": 1.3358936618306516, "num_chars": 18}, {"sum_logits": -17.325571060180664, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -27.953367233276367, "logits_per_token": -4.331392765045166, "logits_per_char": -0.8662785530090332, "bits_per_byte": 1.2497757724554972, "num_chars": 20}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 990, "native_id": "Mercury_SC_405931", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -31.258886337280273, "logits_per_token_corr": -3.4732095930311413, "logits_per_char_corr": -0.5683433879505504, "bits_per_byte_corr": 0.8199461873188804}, "model_output": [{"sum_logits": -21.970699310302734, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -32.53420639038086, "logits_per_token": -3.138671330043248, "logits_per_char": -0.4993340752341531, "bits_per_byte": 0.7203867940877068, "num_chars": 44}, {"sum_logits": -26.607810974121094, "num_tokens": 8, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -39.59585189819336, "logits_per_token": -3.3259763717651367, "logits_per_char": -0.5430165504922673, "bits_per_byte": 0.7834072845163882, "num_chars": 49}, {"sum_logits": -35.67406463623047, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -41.562294006347656, "logits_per_token": -3.9637849595811634, "logits_per_char": -0.7432096799214681, "bits_per_byte": 1.0722249195641462, "num_chars": 48}, {"sum_logits": -31.258886337280273, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -41.16156768798828, "logits_per_token": -3.4732095930311413, "logits_per_char": -0.5683433879505504, "bits_per_byte": 0.8199461873188804, "num_chars": 55}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 991, "native_id": "Mercury_SC_416177", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.371260643005371, "logits_per_token_corr": -2.5618767738342285, "logits_per_char_corr": -0.6148504257202149, "bits_per_byte_corr": 0.8870416600756592}, "model_output": [{"sum_logits": -21.894662857055664, "num_tokens": 6, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -37.39407730102539, "logits_per_token": -3.649110476175944, "logits_per_char": -0.7298220952351888, "bits_per_byte": 1.0529107175277561, "num_chars": 30}, {"sum_logits": -14.212923049926758, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -30.20496368408203, "logits_per_token": -2.8425846099853516, "logits_per_char": -0.5264045574046947, "bits_per_byte": 0.7594412444696483, "num_chars": 27}, {"sum_logits": -15.371260643005371, "num_tokens": 6, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -30.638328552246094, "logits_per_token": -2.5618767738342285, "logits_per_char": -0.6148504257202149, "bits_per_byte": 0.8870416600756592, "num_chars": 25}, {"sum_logits": -12.662862777709961, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -31.711816787719727, "logits_per_token": -2.532572555541992, "logits_per_char": -0.5505592512047809, "bits_per_byte": 0.7942891014292491, "num_chars": 23}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 992, "native_id": "Mercury_SC_406625", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.5519962310791, "logits_per_token_corr": -3.2217137472970143, "logits_per_char_corr": -0.7517332077026367, "bits_per_byte_corr": 1.0845217708249264}, "model_output": [{"sum_logits": -18.300588607788086, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.121437072753906, "logits_per_token": -3.050098101298014, "logits_per_char": -0.6535924502781459, "bits_per_byte": 0.942934586779425, "num_chars": 28}, {"sum_logits": -22.5519962310791, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.161502838134766, "logits_per_token": -3.2217137472970143, "logits_per_char": -0.7517332077026367, "bits_per_byte": 1.0845217708249264, "num_chars": 30}, {"sum_logits": -21.918325424194336, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -29.947996139526367, "logits_per_token": -3.6530542373657227, "logits_per_char": -0.8767330169677734, "bits_per_byte": 1.264858375763935, "num_chars": 25}, {"sum_logits": -24.004638671875, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -26.965728759765625, "logits_per_token": -4.800927734375, "logits_per_char": -1.0911199396306819, "bits_per_byte": 1.574153325921381, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 993, "native_id": "MCAS_2014_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.09274673461914, "logits_per_token_corr": -1.1902497031471946, "logits_per_char_corr": -0.1870392390659877, "bits_per_byte_corr": 0.2698405826523396}, "model_output": [{"sum_logits": -15.395910263061523, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -36.232940673828125, "logits_per_token": -1.3996282057328657, "logits_per_char": -0.21994157518659319, "bits_per_byte": 0.317308619807233, "num_chars": 70}, {"sum_logits": -13.692248344421387, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -37.73649597167969, "logits_per_token": -1.2447498494928533, "logits_per_char": -0.19560354777744837, "bits_per_byte": 0.2821962683590149, "num_chars": 70}, {"sum_logits": -14.745323181152344, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -36.47220230102539, "logits_per_token": -1.340483925559304, "logits_per_char": -0.21064747401646206, "bits_per_byte": 0.3039000661395549, "num_chars": 70}, {"sum_logits": -13.09274673461914, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -37.84535217285156, "logits_per_token": -1.1902497031471946, "logits_per_char": -0.1870392390659877, "bits_per_byte": 0.2698405826523396, "num_chars": 70}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 994, "native_id": "Mercury_7138460", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.336034774780273, "logits_per_token_corr": -7.336034774780273, "logits_per_char_corr": -0.9170043468475342, "bits_per_byte_corr": 1.322957623671511}, "model_output": [{"sum_logits": -3.0764992237091064, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -12.887895584106445, "logits_per_token": -3.0764992237091064, "logits_per_char": -0.3418332470787896, "bits_per_byte": 0.4931611303718958, "num_chars": 9}, {"sum_logits": -6.8152666091918945, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -13.95900821685791, "logits_per_token": -6.8152666091918945, "logits_per_char": -0.6195696917447177, "bits_per_byte": 0.8938501217658501, "num_chars": 11}, {"sum_logits": -7.336034774780273, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -15.983927726745605, "logits_per_token": -7.336034774780273, "logits_per_char": -0.9170043468475342, "bits_per_byte": 1.322957623671511, "num_chars": 8}, {"sum_logits": -5.49522590637207, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -13.47834587097168, "logits_per_token": -5.49522590637207, "logits_per_char": -0.6869032382965088, "bits_per_byte": 0.9909918954616551, "num_chars": 8}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 995, "native_id": "Mercury_7129640", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.125324249267578, "logits_per_token_corr": -2.732189178466797, "logits_per_char_corr": -0.49039292946839946, "bits_per_byte_corr": 0.7074874474315794}, "model_output": [{"sum_logits": -28.764284133911133, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -48.690773010253906, "logits_per_token": -3.1960315704345703, "logits_per_char": -0.6537337303161621, "bits_per_byte": 0.9431384107896477, "num_chars": 44}, {"sum_logits": -19.125324249267578, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -34.26434326171875, "logits_per_token": -2.732189178466797, "logits_per_char": -0.49039292946839946, "bits_per_byte": 0.7074874474315794, "num_chars": 39}, {"sum_logits": -14.83414363861084, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -30.787071228027344, "logits_per_token": -2.119163376944406, "logits_per_char": -0.4009228010435362, "bits_per_byte": 0.5784093368452378, "num_chars": 37}, {"sum_logits": -18.59902000427246, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -28.57839012145996, "logits_per_token": -2.6570028577532088, "logits_per_char": -0.5026762163316881, "bits_per_byte": 0.7252084844750752, "num_chars": 37}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 996, "native_id": "Mercury_7024290", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.959132194519043, "logits_per_token_corr": -6.959132194519043, "logits_per_char_corr": -0.8698915243148804, "bits_per_byte_corr": 1.2549881882413207}, "model_output": [{"sum_logits": -4.8673295974731445, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.843673706054688, "logits_per_token": -4.8673295974731445, "logits_per_char": -0.6953327996390206, "bits_per_byte": 1.003153181807375, "num_chars": 7}, {"sum_logits": -6.959132194519043, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.193592071533203, "logits_per_token": -6.959132194519043, "logits_per_char": -0.8698915243148804, "bits_per_byte": 1.2549881882413207, "num_chars": 8}, {"sum_logits": -3.2741034030914307, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.651010513305664, "logits_per_token": -3.2741034030914307, "logits_per_char": -0.40926292538642883, "bits_per_byte": 0.5904415928751349, "num_chars": 8}, {"sum_logits": -3.0287210941314697, "num_tokens": 1, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -14.433614730834961, "logits_per_token": -3.0287210941314697, "logits_per_char": -0.33652456601460773, "bits_per_byte": 0.485502322526934, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 997, "native_id": "NYSEDREGENTS_2008_4_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -29.35724449157715, "logits_per_token_corr": -2.935724449157715, "logits_per_char_corr": -0.7339311122894288, "bits_per_byte_corr": 1.0588387760548406}, "model_output": [{"sum_logits": -22.040454864501953, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.11540412902832, "logits_per_token": -2.755056858062744, "logits_per_char": -0.6482486724853516, "bits_per_byte": 0.9352251450581425, "num_chars": 34}, {"sum_logits": -30.28342056274414, "num_tokens": 11, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -38.15290832519531, "logits_per_token": -2.75303823297674, "logits_per_char": -0.7969321200722143, "bits_per_byte": 1.1497300175541374, "num_chars": 38}, {"sum_logits": -29.35724449157715, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.814083099365234, "logits_per_token": -2.935724449157715, "logits_per_char": -0.7339311122894288, "bits_per_byte": 1.0588387760548406, "num_chars": 40}, {"sum_logits": -25.616119384765625, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -31.9970645904541, "logits_per_token": -2.8462354871961804, "logits_per_char": -0.7762460419625947, "bits_per_byte": 1.119886315249926, "num_chars": 33}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 998, "native_id": "Mercury_SC_414339", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.573966979980469, "logits_per_token_corr": -1.2339969981800427, "logits_per_char_corr": -0.30164371066623263, "bits_per_byte_corr": 0.4351798854938318}, "model_output": [{"sum_logits": -17.84386444091797, "num_tokens": 11, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.61572265625, "logits_per_token": -1.6221694946289062, "logits_per_char": -0.3796566902322972, "bits_per_byte": 0.5477288242388461, "num_chars": 47}, {"sum_logits": -15.566631317138672, "num_tokens": 11, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -33.617401123046875, "logits_per_token": -1.415148301558061, "logits_per_char": -0.3312049216412483, "bits_per_byte": 0.4778276979701899, "num_chars": 47}, {"sum_logits": -13.573966979980469, "num_tokens": 11, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -32.545162200927734, "logits_per_token": -1.2339969981800427, "logits_per_char": -0.30164371066623263, "bits_per_byte": 0.4351798854938318, "num_chars": 45}, {"sum_logits": -16.783294677734375, "num_tokens": 11, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -35.98341751098633, "logits_per_token": -1.5257540616122158, "logits_per_char": -0.3729621039496528, "bits_per_byte": 0.5380705778080648, "num_chars": 45}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 999, "native_id": "LEAP_2000_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -29.27553367614746, "logits_per_token_corr": -1.9517022450764974, "logits_per_char_corr": -0.6099069515864054, "bits_per_byte_corr": 0.8799097344580443}, "model_output": [{"sum_logits": -29.27553367614746, "num_tokens": 15, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -59.048763275146484, "logits_per_token": -1.9517022450764974, "logits_per_char": -0.6099069515864054, "bits_per_byte": 0.8799097344580443, "num_chars": 48}, {"sum_logits": -22.6182861328125, "num_tokens": 11, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -52.65888214111328, "logits_per_token": -2.0562078302556817, "logits_per_char": -0.5654571533203125, "bits_per_byte": 0.8157822309309912, "num_chars": 40}, {"sum_logits": -28.975894927978516, "num_tokens": 13, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -62.22792053222656, "logits_per_token": -2.2289149944598856, "logits_per_char": -0.49111686318607656, "bits_per_byte": 0.7085318630160052, "num_chars": 59}, {"sum_logits": -33.97795867919922, "num_tokens": 15, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -60.828346252441406, "logits_per_token": -2.265197245279948, "logits_per_char": -0.6795591735839843, "bits_per_byte": 0.9803966497209208, "num_chars": 50}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1000, "native_id": "Mercury_7172270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.890520095825195, "logits_per_token_corr": -2.2100577884250217, "logits_per_char_corr": -0.40592898154745294, "bits_per_byte_corr": 0.5856317286320386}, "model_output": [{"sum_logits": -19.890520095825195, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -38.91203308105469, "logits_per_token": -2.2100577884250217, "logits_per_char": -0.40592898154745294, "bits_per_byte": 0.5856317286320386, "num_chars": 49}, {"sum_logits": -30.704303741455078, "num_tokens": 10, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -55.42151641845703, "logits_per_token": -3.070430374145508, "logits_per_char": -0.5793264856878316, "bits_per_byte": 0.8357914479580661, "num_chars": 53}, {"sum_logits": -32.009429931640625, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -58.56476593017578, "logits_per_token": -3.5566033257378473, "logits_per_char": -0.5615689461691338, "bits_per_byte": 0.8101727337560326, "num_chars": 57}, {"sum_logits": -41.805267333984375, "num_tokens": 9, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -59.0214958190918, "logits_per_token": -4.645029703776042, "logits_per_char": -0.7207804712755926, "bits_per_byte": 1.0398664114796545, "num_chars": 58}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1001, "native_id": "Mercury_184205", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.900592803955078, "logits_per_token_corr": -2.4900592803955077, "logits_per_char_corr": -0.4368525053325452, "bits_per_byte_corr": 0.6302449430436352}, "model_output": [{"sum_logits": -9.172891616821289, "num_tokens": 9, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -26.624494552612305, "logits_per_token": -1.0192101796468098, "logits_per_char": -0.2413918846531918, "bits_per_byte": 0.3482548749002507, "num_chars": 38}, {"sum_logits": -24.203689575195312, "num_tokens": 12, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -42.90519714355469, "logits_per_token": -2.016974131266276, "logits_per_char": -0.4566733882112323, "bits_per_byte": 0.6588404324787787, "num_chars": 53}, {"sum_logits": -24.900592803955078, "num_tokens": 10, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -44.09464645385742, "logits_per_token": -2.4900592803955077, "logits_per_char": -0.4368525053325452, "bits_per_byte": 0.6302449430436352, "num_chars": 57}, {"sum_logits": -22.367752075195312, "num_tokens": 13, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -41.683509826660156, "logits_per_token": -1.7205963134765625, "logits_per_char": -0.392416703073602, "bits_per_byte": 0.5661376314866892, "num_chars": 57}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1002, "native_id": "Mercury_SC_400683", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.937210083007812, "logits_per_token_corr": -2.133887154715402, "logits_per_char_corr": -0.3830053867437901, "bits_per_byte_corr": 0.5525599720894224}, "model_output": [{"sum_logits": -14.937210083007812, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -31.301197052001953, "logits_per_token": -2.133887154715402, "logits_per_char": -0.3830053867437901, "bits_per_byte": 0.5525599720894224, "num_chars": 39}, {"sum_logits": -18.40644073486328, "num_tokens": 8, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -27.620712280273438, "logits_per_token": -2.30080509185791, "logits_per_char": -0.4489375788991044, "bits_per_byte": 0.647680018746901, "num_chars": 41}, {"sum_logits": -11.731008529663086, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -23.82837677001953, "logits_per_token": -1.6758583613804408, "logits_per_char": -0.45119263575627255, "bits_per_byte": 0.6509333780916624, "num_chars": 26}, {"sum_logits": -11.993249893188477, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -23.769269943237305, "logits_per_token": -1.9988749821980794, "logits_per_char": -0.49971874554951984, "bits_per_byte": 0.720941756044064, "num_chars": 24}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1003, "native_id": "Mercury_7182210", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.290367126464844, "logits_per_token_corr": -2.5290367126464846, "logits_per_char_corr": -0.4215061187744141, "bits_per_byte_corr": 0.6081047872606384}, "model_output": [{"sum_logits": -25.89529037475586, "num_tokens": 12, "num_tokens_all": 249, "is_greedy": false, "sum_logits_uncond": -44.63959503173828, "logits_per_token": -2.1579408645629883, "logits_per_char": -0.43158817291259766, "bits_per_byte": 0.6226501167677804, "num_chars": 60}, {"sum_logits": -25.290367126464844, "num_tokens": 10, "num_tokens_all": 247, "is_greedy": false, "sum_logits_uncond": -41.491207122802734, "logits_per_token": -2.5290367126464846, "logits_per_char": -0.4215061187744141, "bits_per_byte": 0.6081047872606384, "num_chars": 60}, {"sum_logits": -33.31144332885742, "num_tokens": 11, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -52.437530517578125, "logits_per_token": -3.0283130298961294, "logits_per_char": -0.5124837435208834, "bits_per_byte": 0.7393577553143211, "num_chars": 65}, {"sum_logits": -27.31588363647461, "num_tokens": 11, "num_tokens_all": 248, "is_greedy": false, "sum_logits_uncond": -52.511104583740234, "logits_per_token": -2.483262148770419, "logits_per_char": -0.4405787683302356, "bits_per_byte": 0.6356208041914551, "num_chars": 62}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1004, "native_id": "Mercury_7238945", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.840365409851074, "logits_per_token_corr": -1.920182704925537, "logits_per_char_corr": -1.2801218032836914, "bits_per_byte_corr": 1.8468253773325456}, "model_output": [{"sum_logits": -2.836266040802002, "num_tokens": 1, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -6.817997932434082, "logits_per_token": -2.836266040802002, "logits_per_char": -1.418133020401001, "bits_per_byte": 2.045933475854881, "num_chars": 2}, {"sum_logits": -1.45562744140625, "num_tokens": 2, "num_tokens_all": 239, "is_greedy": true, "sum_logits_uncond": -14.43525218963623, "logits_per_token": -0.727813720703125, "logits_per_char": -0.4852091471354167, "bits_per_byte": 0.7000088303667319, "num_chars": 3}, {"sum_logits": -3.840365409851074, "num_tokens": 2, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -15.671220779418945, "logits_per_token": -1.920182704925537, "logits_per_char": -1.2801218032836914, "bits_per_byte": 1.8468253773325456, "num_chars": 3}, {"sum_logits": -3.3268322944641113, "num_tokens": 1, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -12.189156532287598, "logits_per_token": -3.3268322944641113, "logits_per_char": -1.1089440981547039, "bits_per_byte": 1.5998681510320245, "num_chars": 3}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1005, "native_id": "Mercury_SC_408748", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.660625457763672, "logits_per_token_corr": -2.2956250508626304, "logits_per_char_corr": -0.4695596694946289, "bits_per_byte_corr": 0.6774314065818485}, "model_output": [{"sum_logits": -16.540359497070312, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -27.879924774169922, "logits_per_token": -2.362908499581473, "logits_per_char": -0.4864811616785386, "bits_per_byte": 0.7018439594400339, "num_chars": 34}, {"sum_logits": -17.653348922729492, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -28.891019821166992, "logits_per_token": -2.521906988961356, "logits_per_char": -0.4526499723776793, "bits_per_byte": 0.6530358704082734, "num_chars": 39}, {"sum_logits": -16.473419189453125, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -28.448030471801758, "logits_per_token": -2.3533455984933034, "logits_per_char": -0.4017907119378811, "bits_per_byte": 0.5796614675884435, "num_chars": 41}, {"sum_logits": -20.660625457763672, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -32.02928161621094, "logits_per_token": -2.2956250508626304, "logits_per_char": -0.4695596694946289, "bits_per_byte": 0.6774314065818485, "num_chars": 44}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1006, "native_id": "MEA_2016_5_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.64984893798828, "logits_per_token_corr": -1.5136226307262073, "logits_per_char_corr": -0.47570996965680806, "bits_per_byte_corr": 0.6863044141258093}, "model_output": [{"sum_logits": -12.894044876098633, "num_tokens": 9, "num_tokens_all": 250, "is_greedy": false, "sum_logits_uncond": -29.539169311523438, "logits_per_token": -1.432671652899848, "logits_per_char": -0.4029389023780823, "bits_per_byte": 0.581317956242519, "num_chars": 32}, {"sum_logits": -19.9647216796875, "num_tokens": 9, "num_tokens_all": 250, "is_greedy": false, "sum_logits_uncond": -33.24340057373047, "logits_per_token": -2.2183024088541665, "logits_per_char": -0.6238975524902344, "bits_per_byte": 0.9000939050010696, "num_chars": 32}, {"sum_logits": -18.017528533935547, "num_tokens": 10, "num_tokens_all": 251, "is_greedy": false, "sum_logits_uncond": -36.33844757080078, "logits_per_token": -1.8017528533935547, "logits_per_char": -0.5004869037204318, "bits_per_byte": 0.7220499740278578, "num_chars": 36}, {"sum_logits": -16.64984893798828, "num_tokens": 11, "num_tokens_all": 252, "is_greedy": false, "sum_logits_uncond": -32.2662353515625, "logits_per_token": -1.5136226307262073, "logits_per_char": -0.47570996965680806, "bits_per_byte": 0.6863044141258093, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1007, "native_id": "Mercury_7271513", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.251323699951172, "logits_per_token_corr": -2.1390359666612415, "logits_per_char_corr": -0.39288415714186065, "bits_per_byte_corr": 0.5668120251528098}, "model_output": [{"sum_logits": -13.561691284179688, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -27.758037567138672, "logits_per_token": -1.9373844691685267, "logits_per_char": -0.39887327306410847, "bits_per_byte": 0.575452492993152, "num_chars": 34}, {"sum_logits": -33.69116973876953, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -44.228973388671875, "logits_per_token": -3.743463304307726, "logits_per_char": -0.8021707080659413, "bits_per_byte": 1.1572877024739534, "num_chars": 42}, {"sum_logits": -27.36318016052246, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -43.99006652832031, "logits_per_token": -3.4203975200653076, "logits_per_char": -0.5700662533442179, "bits_per_byte": 0.8224317566784455, "num_chars": 48}, {"sum_logits": -19.251323699951172, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -38.84844970703125, "logits_per_token": -2.1390359666612415, "logits_per_char": -0.39288415714186065, "bits_per_byte": 0.5668120251528098, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1008, "native_id": "Mercury_7189000", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.338811874389648, "logits_per_token_corr": -3.834702968597412, "logits_per_char_corr": -0.5289245473927465, "bits_per_byte_corr": 0.7630768215285031}, "model_output": [{"sum_logits": -15.338811874389648, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -30.091590881347656, "logits_per_token": -3.834702968597412, "logits_per_char": -0.5289245473927465, "bits_per_byte": 0.7630768215285031, "num_chars": 29}, {"sum_logits": -8.869338035583496, "num_tokens": 6, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -29.416656494140625, "logits_per_token": -1.4782230059305828, "logits_per_char": -0.30583924260632744, "bits_per_byte": 0.44123275861770217, "num_chars": 29}, {"sum_logits": -13.925209045410156, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -26.59587860107422, "logits_per_token": -3.481302261352539, "logits_per_char": -0.6631051926385789, "bits_per_byte": 0.9566585730080859, "num_chars": 21}, {"sum_logits": -18.06743812561035, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -31.22727394104004, "logits_per_token": -3.6134876251220702, "logits_per_char": -0.8603541964576358, "bits_per_byte": 1.2412287326383318, "num_chars": 21}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1009, "native_id": "Mercury_SC_401585", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.7101354598999023, "logits_per_token_corr": -3.7101354598999023, "logits_per_char_corr": -0.5300193514142718, "bits_per_byte_corr": 0.7646562898611041}, "model_output": [{"sum_logits": -3.7101354598999023, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -12.981000900268555, "logits_per_token": -3.7101354598999023, "logits_per_char": -0.5300193514142718, "bits_per_byte": 0.7646562898611041, "num_chars": 7}, {"sum_logits": -5.796623229980469, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -13.24683666229248, "logits_per_token": -5.796623229980469, "logits_per_char": -1.1593246459960938, "bits_per_byte": 1.6725519175601191, "num_chars": 5}, {"sum_logits": -4.105415344238281, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -13.102810859680176, "logits_per_token": -4.105415344238281, "logits_per_char": -0.5864879063197544, "bits_per_byte": 0.8461231939894686, "num_chars": 7}, {"sum_logits": -5.264886856079102, "num_tokens": 1, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -13.08686351776123, "logits_per_token": -5.264886856079102, "logits_per_char": -0.7521266937255859, "bits_per_byte": 1.0850894511588947, "num_chars": 7}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1010, "native_id": "Mercury_188528", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.675267219543457, "logits_per_token_corr": -3.2250890731811523, "logits_per_char_corr": -0.46072701045445036, "bits_per_byte_corr": 0.6646885731867107}, "model_output": [{"sum_logits": -4.872868061065674, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.845972061157227, "logits_per_token": -2.436434030532837, "logits_per_char": -0.4872868061065674, "bits_per_byte": 0.7030062586610717, "num_chars": 10}, {"sum_logits": -5.036016464233398, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -15.965056419372559, "logits_per_token": -5.036016464233398, "logits_per_char": -0.4578196785666726, "bits_per_byte": 0.6604941798899924, "num_chars": 11}, {"sum_logits": -5.430398464202881, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -20.409318923950195, "logits_per_token": -2.7151992321014404, "logits_per_char": -0.45253320535024005, "bits_per_byte": 0.6528674111968473, "num_chars": 12}, {"sum_logits": -9.675267219543457, "num_tokens": 3, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.58736801147461, "logits_per_token": -3.2250890731811523, "logits_per_char": -0.46072701045445036, "bits_per_byte": 0.6646885731867107, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1011, "native_id": "Mercury_SC_415719", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.089912414550781, "logits_per_token_corr": -1.408991241455078, "logits_per_char_corr": -0.33547410510835196, "bits_per_byte_corr": 0.48398682778682994}, "model_output": [{"sum_logits": -14.089912414550781, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -46.82859420776367, "logits_per_token": -1.408991241455078, "logits_per_char": -0.33547410510835196, "bits_per_byte": 0.48398682778682994, "num_chars": 42}, {"sum_logits": -20.901647567749023, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -50.240928649902344, "logits_per_token": -2.0901647567749024, "logits_per_char": -0.4976582754225958, "bits_per_byte": 0.7179691260100486, "num_chars": 42}, {"sum_logits": -14.964449882507324, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -48.057716369628906, "logits_per_token": -1.4964449882507325, "logits_per_char": -0.3562964257739839, "bits_per_byte": 0.5140270865509585, "num_chars": 42}, {"sum_logits": -23.797338485717773, "num_tokens": 10, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -49.398094177246094, "logits_per_token": -2.379733848571777, "logits_per_char": -0.5666032972789946, "bits_per_byte": 0.8174357671363279, "num_chars": 42}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1012, "native_id": "Mercury_SC_407072", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.371364593505859, "logits_per_token_corr": -1.2285607655843098, "logits_per_char_corr": -0.24571215311686198, "bits_per_byte_corr": 0.3544877047881011}, "model_output": [{"sum_logits": -7.371364593505859, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.09319305419922, "logits_per_token": -1.2285607655843098, "logits_per_char": -0.24571215311686198, "bits_per_byte": 0.3544877047881011, "num_chars": 30}, {"sum_logits": -13.042695999145508, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -37.024986267089844, "logits_per_token": -2.1737826665242515, "logits_per_char": -0.4207321290046938, "bits_per_byte": 0.6069881560581635, "num_chars": 31}, {"sum_logits": -15.786290168762207, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -31.525222778320312, "logits_per_token": -2.6310483614603677, "logits_per_char": -0.5262096722920736, "bits_per_byte": 0.7591600846841265, "num_chars": 30}, {"sum_logits": -14.461710929870605, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -38.951416015625, "logits_per_token": -2.410285154978434, "logits_per_char": -0.4382336645415335, "bits_per_byte": 0.6322375345851222, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1013, "native_id": "Mercury_7091823", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.386764526367188, "logits_per_token_corr": -4.193382263183594, "logits_per_char_corr": -0.7624331387606534, "bits_per_byte_corr": 1.099958508300192}, "model_output": [{"sum_logits": -8.386764526367188, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -17.642181396484375, "logits_per_token": -4.193382263183594, "logits_per_char": -0.7624331387606534, "bits_per_byte": 1.099958508300192, "num_chars": 11}, {"sum_logits": -11.170223236083984, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -16.89922332763672, "logits_per_token": -3.723407745361328, "logits_per_char": -0.7446815490722656, "bits_per_byte": 1.0743483778888407, "num_chars": 15}, {"sum_logits": -6.758931636810303, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.14154815673828, "logits_per_token": -3.3794658184051514, "logits_per_char": -0.4827808312007359, "bits_per_byte": 0.6965055110100538, "num_chars": 14}, {"sum_logits": -10.877452850341797, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.935270309448242, "logits_per_token": -3.625817616780599, "logits_per_char": -0.4944296750155362, "bits_per_byte": 0.7133112402137683, "num_chars": 22}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1014, "native_id": "Mercury_7040985", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.497817993164062, "logits_per_token_corr": -2.0711168561662947, "logits_per_char_corr": -0.45305681228637695, "bits_per_byte_corr": 0.6536228163269876}, "model_output": [{"sum_logits": -13.129133224487305, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.86703872680664, "logits_per_token": -2.6258266448974608, "logits_per_char": -0.5470472176869711, "bits_per_byte": 0.7892223080896654, "num_chars": 24}, {"sum_logits": -14.497817993164062, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -39.178871154785156, "logits_per_token": -2.0711168561662947, "logits_per_char": -0.45305681228637695, "bits_per_byte": 0.6536228163269876, "num_chars": 32}, {"sum_logits": -24.413408279418945, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -36.826507568359375, "logits_per_token": -2.712600919935438, "logits_per_char": -0.7180414199829102, "bits_per_byte": 1.0359147957629582, "num_chars": 34}, {"sum_logits": -21.819442749023438, "num_tokens": 9, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.1629638671875, "logits_per_token": -2.424382527669271, "logits_per_char": -0.6417483161477482, "bits_per_byte": 0.9258471132058641, "num_chars": 34}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1015, "native_id": "Mercury_SC_409383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.05353355407715, "logits_per_token_corr": -2.2566916942596436, "logits_per_char_corr": -0.40118963453504775, "bits_per_byte_corr": 0.5787942962001849}, "model_output": [{"sum_logits": -17.748088836669922, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -28.974246978759766, "logits_per_token": -2.958014806111654, "logits_per_char": -0.6573366235803675, "bits_per_byte": 0.9483362870347727, "num_chars": 27}, {"sum_logits": -15.677392959594727, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -26.804786682128906, "logits_per_token": -2.612898826599121, "logits_per_char": -0.4750725139271129, "bits_per_byte": 0.6853847599057912, "num_chars": 33}, {"sum_logits": -19.381824493408203, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.325700759887695, "logits_per_token": -2.7688320704868863, "logits_per_char": -0.5383840137057834, "bits_per_byte": 0.7767239466677874, "num_chars": 36}, {"sum_logits": -18.05353355407715, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -30.04388427734375, "logits_per_token": -2.2566916942596436, "logits_per_char": -0.40118963453504775, "bits_per_byte": 0.5787942962001849, "num_chars": 45}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1016, "native_id": "Mercury_SC_407080", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.8646183013916, "logits_per_token_corr": -5.17292366027832, "logits_per_char_corr": -1.0776924292246501, "bits_per_byte_corr": 1.5547815232470998}, "model_output": [{"sum_logits": -23.758859634399414, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -28.623432159423828, "logits_per_token": -5.9397149085998535, "logits_per_char": -1.0799481651999734, "bits_per_byte": 1.5580358623522559, "num_chars": 22}, {"sum_logits": -20.87525177001953, "num_tokens": 4, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -27.785167694091797, "logits_per_token": -5.218812942504883, "logits_per_char": -0.802894298846905, "bits_per_byte": 1.1583316233052834, "num_chars": 26}, {"sum_logits": -19.128108978271484, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -22.910419464111328, "logits_per_token": -6.376036326090495, "logits_per_char": -1.0067425778037624, "bits_per_byte": 1.4524225244503028, "num_chars": 19}, {"sum_logits": -25.8646183013916, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -25.196491241455078, "logits_per_token": -5.17292366027832, "logits_per_char": -1.0776924292246501, "bits_per_byte": 1.5547815232470998, "num_chars": 24}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1017, "native_id": "MCAS_2000_4_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.514358520507812, "logits_per_token_corr": -2.1514358520507812, "logits_per_char_corr": -0.4677034460979959, "bits_per_byte_corr": 0.674753442292742}, "model_output": [{"sum_logits": -21.514358520507812, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -39.03128433227539, "logits_per_token": -2.1514358520507812, "logits_per_char": -0.4677034460979959, "bits_per_byte": 0.674753442292742, "num_chars": 46}, {"sum_logits": -14.74583625793457, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.20461654663086, "logits_per_token": -1.638426250881619, "logits_per_char": -0.35965454287645293, "bits_per_byte": 0.5188718254414185, "num_chars": 41}, {"sum_logits": -34.709999084472656, "num_tokens": 14, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -50.209861755371094, "logits_per_token": -2.479285648890904, "logits_per_char": -0.46905404168206294, "bits_per_byte": 0.6767019398441235, "num_chars": 74}, {"sum_logits": -17.810949325561523, "num_tokens": 9, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -42.830238342285156, "logits_per_token": -1.978994369506836, "logits_per_char": -0.40479430285367096, "bits_per_byte": 0.5839947333075158, "num_chars": 44}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1018, "native_id": "Mercury_7032498", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.284587860107422, "logits_per_token_corr": -4.3211469650268555, "logits_per_char_corr": -0.8230756123860677, "bits_per_byte_corr": 1.1874471042668797}, "model_output": [{"sum_logits": -40.9549446105957, "num_tokens": 8, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -42.42774200439453, "logits_per_token": -5.119368076324463, "logits_per_char": -0.9524405723394349, "bits_per_byte": 1.374081290456536, "num_chars": 43}, {"sum_logits": -21.78205108642578, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -28.836740493774414, "logits_per_token": -3.1117215837751115, "logits_per_char": -0.8377711956317608, "bits_per_byte": 1.2086483493384272, "num_chars": 26}, {"sum_logits": -12.829421997070312, "num_tokens": 5, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -23.209001541137695, "logits_per_token": -2.5658843994140623, "logits_per_char": -0.4934393075796274, "bits_per_byte": 0.7118824420253238, "num_chars": 26}, {"sum_logits": -17.284587860107422, "num_tokens": 4, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -24.140148162841797, "logits_per_token": -4.3211469650268555, "logits_per_char": -0.8230756123860677, "bits_per_byte": 1.1874471042668797, "num_chars": 21}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1019, "native_id": "TAKS_2009_5_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.229310035705566, "logits_per_token_corr": -2.8073275089263916, "logits_per_char_corr": -0.8637930796696589, "bits_per_byte_corr": 1.2461899923945174}, "model_output": [{"sum_logits": -11.229310035705566, "num_tokens": 4, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -16.545066833496094, "logits_per_token": -2.8073275089263916, "logits_per_char": -0.8637930796696589, "bits_per_byte": 1.2461899923945174, "num_chars": 13}, {"sum_logits": -8.66595458984375, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -14.052603721618652, "logits_per_token": -4.332977294921875, "logits_per_char": -1.0832443237304688, "bits_per_byte": 1.562791213918189, "num_chars": 8}, {"sum_logits": -11.553226470947266, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -15.146127700805664, "logits_per_token": -5.776613235473633, "logits_per_char": -1.650460924421038, "bits_per_byte": 2.3811117908449564, "num_chars": 7}, {"sum_logits": -11.269815444946289, "num_tokens": 2, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -14.154871940612793, "logits_per_token": -5.6349077224731445, "logits_per_char": -1.2522017161051433, "bits_per_byte": 1.8065452060188378, "num_chars": 9}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1020, "native_id": "Mercury_SC_415761", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.455196380615234, "logits_per_token_corr": -1.586836034601385, "logits_per_char_corr": -0.3293433279361365, "bits_per_byte_corr": 0.47514198596367313}, "model_output": [{"sum_logits": -6.995355129241943, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -22.03966522216797, "logits_per_token": -1.7488387823104858, "logits_per_char": -0.41149147819070253, "bits_per_byte": 0.5936567149542221, "num_chars": 17}, {"sum_logits": -10.934690475463867, "num_tokens": 4, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -22.356760025024414, "logits_per_token": -2.733672618865967, "logits_per_char": -0.6432170867919922, "bits_per_byte": 0.9279661013305198, "num_chars": 17}, {"sum_logits": -17.455196380615234, "num_tokens": 11, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -37.083984375, "logits_per_token": -1.586836034601385, "logits_per_char": -0.3293433279361365, "bits_per_byte": 0.47514198596367313, "num_chars": 53}, {"sum_logits": -19.90057373046875, "num_tokens": 10, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -36.53546142578125, "logits_per_token": -1.990057373046875, "logits_per_char": -0.4061341577646684, "bits_per_byte": 0.5859277353431239, "num_chars": 49}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1021, "native_id": "ACTAAP_2008_5_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.948261260986328, "logits_per_token_corr": -2.493532657623291, "logits_per_char_corr": -0.5249542437101665, "bits_per_byte_corr": 0.7573488840948177}, "model_output": [{"sum_logits": -12.519342422485352, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -28.73300552368164, "logits_per_token": -2.086557070414225, "logits_per_char": -0.3793740128025864, "bits_per_byte": 0.5473210069128308, "num_chars": 33}, {"sum_logits": -13.683845520019531, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -28.34706687927246, "logits_per_token": -2.2806409200032554, "logits_per_char": -0.4146619854551373, "bits_per_byte": 0.5982307900617279, "num_chars": 33}, {"sum_logits": -25.29039764404297, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -33.4056282043457, "logits_per_token": -3.161299705505371, "logits_per_char": -0.7225827898297991, "bits_per_byte": 1.0424666075199123, "num_chars": 35}, {"sum_logits": -19.948261260986328, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -29.993846893310547, "logits_per_token": -2.493532657623291, "logits_per_char": -0.5249542437101665, "bits_per_byte": 0.7573488840948177, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1022, "native_id": "Mercury_416671", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.113191604614258, "logits_per_token_corr": -1.3891489505767822, "logits_per_char_corr": -0.28495363088754505, "bits_per_byte_corr": 0.41110119016506075}, "model_output": [{"sum_logits": -11.193604469299316, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -22.741559982299805, "logits_per_token": -2.798401117324829, "logits_per_char": -0.5330287842523485, "bits_per_byte": 0.7689979836924887, "num_chars": 21}, {"sum_logits": -9.115947723388672, "num_tokens": 5, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -22.640872955322266, "logits_per_token": -1.8231895446777344, "logits_per_char": -0.3506133739764874, "bits_per_byte": 0.5058281759055893, "num_chars": 26}, {"sum_logits": -9.454821586608887, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -20.12301254272461, "logits_per_token": -2.3637053966522217, "logits_per_char": -0.3049942447293189, "bits_per_byte": 0.4400136843709794, "num_chars": 31}, {"sum_logits": -11.113191604614258, "num_tokens": 8, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -24.067485809326172, "logits_per_token": -1.3891489505767822, "logits_per_char": -0.28495363088754505, "bits_per_byte": 0.41110119016506075, "num_chars": 39}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1023, "native_id": "Mercury_400803", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.641500473022461, "logits_per_token_corr": -4.641500473022461, "logits_per_char_corr": -0.46415004730224607, "bits_per_byte_corr": 0.6696269714718093}, "model_output": [{"sum_logits": -5.289151191711426, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.41556453704834, "logits_per_token": -5.289151191711426, "logits_per_char": -0.5289151191711425, "bits_per_byte": 0.7630632194799506, "num_chars": 10}, {"sum_logits": -6.244318962097168, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.51697063446045, "logits_per_token": -6.244318962097168, "logits_per_char": -0.6244318962097168, "bits_per_byte": 0.9008648000352976, "num_chars": 10}, {"sum_logits": -4.906257629394531, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.883553504943848, "logits_per_token": -4.906257629394531, "logits_per_char": -0.49062576293945315, "bits_per_byte": 0.7078233551256218, "num_chars": 10}, {"sum_logits": -4.641500473022461, "num_tokens": 1, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.273592948913574, "logits_per_token": -4.641500473022461, "logits_per_char": -0.46415004730224607, "bits_per_byte": 0.6696269714718093, "num_chars": 10}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1024, "native_id": "Mercury_7005880", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.59912109375, "logits_per_token_corr": -2.23994140625, "logits_per_char_corr": -0.4732270576584507, "bits_per_byte_corr": 0.6827223292988128}, "model_output": [{"sum_logits": -30.84048080444336, "num_tokens": 10, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.9765625, "logits_per_token": -3.0840480804443358, "logits_per_char": -0.656180442647731, "bits_per_byte": 0.9466682705368865, "num_chars": 47}, {"sum_logits": -27.9874267578125, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.950227737426758, "logits_per_token": -2.5443115234375, "logits_per_char": -0.5182856807002315, "bits_per_byte": 0.7477281813105219, "num_chars": 54}, {"sum_logits": -33.59912109375, "num_tokens": 15, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -42.187965393066406, "logits_per_token": -2.23994140625, "logits_per_char": -0.4732270576584507, "bits_per_byte": 0.6827223292988128, "num_chars": 71}, {"sum_logits": -20.719480514526367, "num_tokens": 11, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.850830078125, "logits_per_token": -1.883589137684215, "logits_per_char": -0.38369408360234014, "bits_per_byte": 0.5535535516319291, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1025, "native_id": "Mercury_7210508", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -40.96455383300781, "logits_per_token_corr": -3.413712819417318, "logits_per_char_corr": -0.620675058075876, "bits_per_byte_corr": 0.8954448282901789}, "model_output": [{"sum_logits": -23.18425750732422, "num_tokens": 7, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -35.195518493652344, "logits_per_token": -3.312036786760603, "logits_per_char": -0.7994571554249731, "bits_per_byte": 1.1533728735356346, "num_chars": 29}, {"sum_logits": -25.42577362060547, "num_tokens": 9, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -37.110084533691406, "logits_per_token": -2.8250859578450522, "logits_per_char": -0.6201408200147676, "bits_per_byte": 0.8946740856887632, "num_chars": 41}, {"sum_logits": -40.96455383300781, "num_tokens": 12, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -58.05537033081055, "logits_per_token": -3.413712819417318, "logits_per_char": -0.620675058075876, "bits_per_byte": 0.8954448282901789, "num_chars": 66}, {"sum_logits": -31.7356014251709, "num_tokens": 13, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -47.955322265625, "logits_per_token": -2.4412001096285305, "logits_per_char": -0.48824002192570615, "bits_per_byte": 0.7043814583962411, "num_chars": 65}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1026, "native_id": "NYSEDREGENTS_2013_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.937265396118164, "logits_per_token_corr": -3.419609342302595, "logits_per_char_corr": -0.8865653850414135, "bits_per_byte_corr": 1.2790434844239804}, "model_output": [{"sum_logits": -22.021846771240234, "num_tokens": 5, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -24.718414306640625, "logits_per_token": -4.404369354248047, "logits_per_char": -0.7340615590413412, "bits_per_byte": 1.0590269709369249, "num_chars": 30}, {"sum_logits": -18.365097045898438, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.764625549316406, "logits_per_token": -2.6235852922712053, "logits_per_char": -0.6801887794777199, "bits_per_byte": 0.9813049790215282, "num_chars": 27}, {"sum_logits": -23.937265396118164, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.526405334472656, "logits_per_token": -3.419609342302595, "logits_per_char": -0.8865653850414135, "bits_per_byte": 1.2790434844239804, "num_chars": 27}, {"sum_logits": -28.89236831665039, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -36.59235382080078, "logits_per_token": -4.127481188092913, "logits_per_char": -1.0318702970232283, "bits_per_byte": 1.4886741603571028, "num_chars": 28}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1027, "native_id": "NYSEDREGENTS_2008_4_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.551379203796387, "logits_per_token_corr": -3.1102758407592774, "logits_per_char_corr": -0.6761469219041907, "bits_per_byte_corr": 0.9754738111442139}, "model_output": [{"sum_logits": -6.595647811889648, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -25.88051986694336, "logits_per_token": -1.648911952972412, "logits_per_char": -0.3471393585205078, "bits_per_byte": 0.5008162310352724, "num_chars": 19}, {"sum_logits": -9.0072021484375, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -25.594755172729492, "logits_per_token": -3.0024007161458335, "logits_per_char": -0.5629501342773438, "bits_per_byte": 0.8121653669902834, "num_chars": 16}, {"sum_logits": -15.551379203796387, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -36.178401947021484, "logits_per_token": -3.1102758407592774, "logits_per_char": -0.6761469219041907, "bits_per_byte": 0.9754738111442139, "num_chars": 23}, {"sum_logits": -19.814350128173828, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -37.724891662597656, "logits_per_token": -3.302391688028971, "logits_per_char": -0.8255979220072428, "bits_per_byte": 1.1910860278489381, "num_chars": 24}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1028, "native_id": "Mercury_400091", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.235921859741211, "logits_per_token_corr": -2.1179609298706055, "logits_per_char_corr": -1.0589804649353027, "bits_per_byte_corr": 1.5277858651615477}, "model_output": [{"sum_logits": -3.9990317821502686, "num_tokens": 2, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -10.419174194335938, "logits_per_token": -1.9995158910751343, "logits_per_char": -1.3330105940500896, "bits_per_byte": 1.923127773489897, "num_chars": 3}, {"sum_logits": -4.235921859741211, "num_tokens": 2, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -10.692663192749023, "logits_per_token": -2.1179609298706055, "logits_per_char": -1.0589804649353027, "bits_per_byte": 1.5277858651615477, "num_chars": 4}, {"sum_logits": -3.2091732025146484, "num_tokens": 2, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -9.797324180603027, "logits_per_token": -1.6045866012573242, "logits_per_char": -0.8022933006286621, "bits_per_byte": 1.1574645661562406, "num_chars": 4}, {"sum_logits": -5.595561504364014, "num_tokens": 2, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -10.62488842010498, "logits_per_token": -2.797780752182007, "logits_per_char": -1.3988903760910034, "bits_per_byte": 2.0181722083352374, "num_chars": 4}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1029, "native_id": "Mercury_SC_402257", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.023646354675293, "logits_per_token_corr": -4.023646354675293, "logits_per_char_corr": -0.8047292709350586, "bits_per_byte_corr": 1.1609789284370342}, "model_output": [{"sum_logits": -1.5167057514190674, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": true, "sum_logits_uncond": -13.13008975982666, "logits_per_token": -1.5167057514190674, "logits_per_char": -0.2527842919031779, "bits_per_byte": 0.36469064434360493, "num_chars": 6}, {"sum_logits": -3.387244462966919, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -13.611979484558105, "logits_per_token": -3.387244462966919, "logits_per_char": -0.5645407438278198, "bits_per_byte": 0.8144601315007475, "num_chars": 6}, {"sum_logits": -4.057814598083496, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -4.057814598083496, "logits_per_char": -0.676302433013916, "bits_per_byte": 0.975698166251018, "num_chars": 6}, {"sum_logits": -4.023646354675293, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.139776229858398, "logits_per_token": -4.023646354675293, "logits_per_char": -0.8047292709350586, "bits_per_byte": 1.1609789284370342, "num_chars": 5}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1030, "native_id": "Mercury_7227815", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.23647689819336, "logits_per_token_corr": -4.02955961227417, "logits_per_char_corr": -0.6858824871956034, "bits_per_byte_corr": 0.9895192629103959}, "model_output": [{"sum_logits": -35.16320037841797, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -41.965782165527344, "logits_per_token": -3.5163200378417967, "logits_per_char": -0.7991636449640448, "bits_per_byte": 1.152949427449204, "num_chars": 44}, {"sum_logits": -23.456859588623047, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -35.77838897705078, "logits_per_token": -2.3456859588623047, "logits_per_char": -0.4510934536273663, "bits_per_byte": 0.6507902885261445, "num_chars": 52}, {"sum_logits": -23.127702713012695, "num_tokens": 10, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -32.32225799560547, "logits_per_token": -2.3127702713012694, "logits_per_char": -0.4920787811279297, "bits_per_byte": 0.7099196172604598, "num_chars": 47}, {"sum_logits": -32.23647689819336, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -40.6263542175293, "logits_per_token": -4.02955961227417, "logits_per_char": -0.6858824871956034, "bits_per_byte": 0.9895192629103959, "num_chars": 47}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1031, "native_id": "ACTAAP_2010_7_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.867351531982422, "logits_per_token_corr": -2.4834189414978027, "logits_per_char_corr": -0.5843338685877183, "bits_per_byte_corr": 0.8430155744355701}, "model_output": [{"sum_logits": -19.867351531982422, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.41926574707031, "logits_per_token": -2.4834189414978027, "logits_per_char": -0.5843338685877183, "bits_per_byte": 0.8430155744355701, "num_chars": 34}, {"sum_logits": -14.911681175231934, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -26.820980072021484, "logits_per_token": -1.8639601469039917, "logits_per_char": -0.3823507993649214, "bits_per_byte": 0.5516156021240994, "num_chars": 39}, {"sum_logits": -31.887794494628906, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -38.835575103759766, "logits_per_token": -3.1887794494628907, "logits_per_char": -0.6507713162169164, "bits_per_byte": 0.9388645506596033, "num_chars": 49}, {"sum_logits": -32.30449295043945, "num_tokens": 10, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -40.36298751831055, "logits_per_token": -3.2304492950439454, "logits_per_char": -0.646089859008789, "bits_per_byte": 0.9321106355612991, "num_chars": 50}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1032, "native_id": "Mercury_SC_410905", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.782146453857422, "logits_per_token_corr": -2.8260209219796315, "logits_per_char_corr": -0.5652041843959263, "bits_per_byte_corr": 0.81541727391828}, "model_output": [{"sum_logits": -26.22585105895996, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -32.98655700683594, "logits_per_token": -3.7465501512799944, "logits_per_char": -0.6244250252133324, "bits_per_byte": 0.9008548872828879, "num_chars": 42}, {"sum_logits": -31.37005615234375, "num_tokens": 6, "num_tokens_all": 227, "is_greedy": false, "sum_logits_uncond": -35.35418701171875, "logits_per_token": -5.228342692057292, "logits_per_char": -0.7842514038085937, "bits_per_byte": 1.131435611085679, "num_chars": 40}, {"sum_logits": -23.845516204833984, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -34.20245361328125, "logits_per_token": -3.4065023149762834, "logits_per_char": -0.6114234924316406, "bits_per_byte": 0.8820976404147723, "num_chars": 39}, {"sum_logits": -19.782146453857422, "num_tokens": 7, "num_tokens_all": 228, "is_greedy": false, "sum_logits_uncond": -26.984947204589844, "logits_per_token": -2.8260209219796315, "logits_per_char": -0.5652041843959263, "bits_per_byte": 0.81541727391828, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1033, "native_id": "OHAT_2010_5_18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.976444244384766, "logits_per_token_corr": -2.1394920349121094, "logits_per_char_corr": -0.45383164376923535, "bits_per_byte_corr": 0.6547406618648328}, "model_output": [{"sum_logits": -8.877305030822754, "num_tokens": 6, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -29.016019821166992, "logits_per_token": -1.479550838470459, "logits_per_char": -0.3414348088777982, "bits_per_byte": 0.49258630555522437, "num_chars": 26}, {"sum_logits": -8.620991706848145, "num_tokens": 6, "num_tokens_all": 235, "is_greedy": false, "sum_logits_uncond": -23.376609802246094, "logits_per_token": -1.4368319511413574, "logits_per_char": -0.29727557609821187, "bits_per_byte": 0.428877999414608, "num_chars": 29}, {"sum_logits": -11.615018844604492, "num_tokens": 7, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -32.77757263183594, "logits_per_token": -1.6592884063720703, "logits_per_char": -0.3746780272453062, "bits_per_byte": 0.5405461318372515, "num_chars": 31}, {"sum_logits": -14.976444244384766, "num_tokens": 7, "num_tokens_all": 236, "is_greedy": false, "sum_logits_uncond": -37.41425704956055, "logits_per_token": -2.1394920349121094, "logits_per_char": -0.45383164376923535, "bits_per_byte": 0.6547406618648328, "num_chars": 33}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1034, "native_id": "NAEP_2000_8_S11+10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.2755749225616455, "logits_per_token_corr": -1.6377874612808228, "logits_per_char_corr": -0.36395276917351616, "bits_per_byte_corr": 0.5250728552048146}, "model_output": [{"sum_logits": -3.2755749225616455, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": true, "sum_logits_uncond": -15.452784538269043, "logits_per_token": -1.6377874612808228, "logits_per_char": -0.36395276917351616, "bits_per_byte": 0.5250728552048146, "num_chars": 9}, {"sum_logits": -11.796648025512695, "num_tokens": 3, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -19.261043548583984, "logits_per_token": -3.932216008504232, "logits_per_char": -0.983054002126058, "bits_per_byte": 1.4182471337943312, "num_chars": 12}, {"sum_logits": -17.256582260131836, "num_tokens": 5, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -32.60350036621094, "logits_per_token": -3.4513164520263673, "logits_per_char": -0.663714702312763, "bits_per_byte": 0.9575379095924057, "num_chars": 26}, {"sum_logits": -3.7980592250823975, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -3.7980592250823975, "logits_per_char": -0.6330098708470663, "bits_per_byte": 0.9132402015054818, "num_chars": 6}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1035, "native_id": "MCAS_2003_8_29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.108478546142578, "logits_per_token_corr": -4.01807975769043, "logits_per_char_corr": -0.803615951538086, "bits_per_byte_corr": 1.1593727480640952}, "model_output": [{"sum_logits": -21.971969604492188, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -36.69205093383789, "logits_per_token": -3.6619949340820312, "logits_per_char": -0.7847132001604352, "bits_per_byte": 1.1321018423923817, "num_chars": 28}, {"sum_logits": -24.108478546142578, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -37.804222106933594, "logits_per_token": -4.01807975769043, "logits_per_char": -0.803615951538086, "bits_per_byte": 1.1593727480640952, "num_chars": 30}, {"sum_logits": -13.97181224822998, "num_tokens": 5, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -34.41872787475586, "logits_per_token": -2.794362449645996, "logits_per_char": -0.5373773941626916, "bits_per_byte": 0.7752717016449059, "num_chars": 26}, {"sum_logits": -5.00625467300415, "num_tokens": 5, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -24.35697364807129, "logits_per_token": -1.00125093460083, "logits_per_char": -0.22755703059109775, "bits_per_byte": 0.32829539955343073, "num_chars": 22}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1036, "native_id": "Mercury_401433", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.327658653259277, "logits_per_token_corr": -2.2655317306518556, "logits_per_char_corr": -1.258628739251031, "bits_per_byte_corr": 1.815817440439095}, "model_output": [{"sum_logits": -14.456085205078125, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.920682907104492, "logits_per_token": -3.6140213012695312, "logits_per_char": -2.065155029296875, "bits_per_byte": 2.979388919435644, "num_chars": 7}, {"sum_logits": -8.606240272521973, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -21.434499740600586, "logits_per_token": -4.303120136260986, "logits_per_char": -2.151560068130493, "bits_per_byte": 3.104045040468813, "num_chars": 4}, {"sum_logits": -2.0269200801849365, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": true, "sum_logits_uncond": -17.833528518676758, "logits_per_token": -1.0134600400924683, "logits_per_char": -0.4053840160369873, "bits_per_byte": 0.5848455095926338, "num_chars": 5}, {"sum_logits": -11.327658653259277, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -27.477615356445312, "logits_per_token": -2.2655317306518556, "logits_per_char": -1.258628739251031, "bits_per_byte": 1.815817440439095, "num_chars": 9}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1037, "native_id": "TIMSS_1995_8_N4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -5.229134559631348, "logits_per_token_corr": -5.229134559631348, "logits_per_char_corr": -0.5810149510701498, "bits_per_byte_corr": 0.838227388591851}, "model_output": [{"sum_logits": -5.387699127197266, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -12.709002494812012, "logits_per_token": -5.387699127197266, "logits_per_char": -0.7696713038853237, "bits_per_byte": 1.1104009732306965, "num_chars": 7}, {"sum_logits": -5.229134559631348, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -14.830971717834473, "logits_per_token": -5.229134559631348, "logits_per_char": -0.5810149510701498, "bits_per_byte": 0.838227388591851, "num_chars": 9}, {"sum_logits": -5.507319450378418, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -11.888246536254883, "logits_per_token": -5.507319450378418, "logits_per_char": -0.6884149312973022, "bits_per_byte": 0.9931728074572479, "num_chars": 8}, {"sum_logits": -5.213201522827148, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -5.213201522827148, "logits_per_char": -0.7447430746895927, "bits_per_byte": 1.074437140591846, "num_chars": 7}, {"sum_logits": -5.823711395263672, "num_tokens": 1, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -5.823711395263672, "logits_per_char": -0.9706185658772787, "bits_per_byte": 1.4003065915869137, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1038, "native_id": "Mercury_SC_405885", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.092299461364746, "logits_per_token_corr": -1.8487165768941243, "logits_per_char_corr": -0.3578161116569273, "bits_per_byte_corr": 0.5162195298379915}, "model_output": [{"sum_logits": -13.262258529663086, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -29.782176971435547, "logits_per_token": -2.210376421610514, "logits_per_char": -0.5100868665255033, "bits_per_byte": 0.7358997927594628, "num_chars": 26}, {"sum_logits": -14.693920135498047, "num_tokens": 4, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -28.548290252685547, "logits_per_token": -3.6734800338745117, "logits_per_char": -0.5066869012240706, "bits_per_byte": 0.7309946796798878, "num_chars": 29}, {"sum_logits": -11.092299461364746, "num_tokens": 6, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -31.738727569580078, "logits_per_token": -1.8487165768941243, "logits_per_char": -0.3578161116569273, "bits_per_byte": 0.5162195298379915, "num_chars": 31}, {"sum_logits": -14.237655639648438, "num_tokens": 5, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -31.022541046142578, "logits_per_token": -2.8475311279296873, "logits_per_char": -0.5084877014160156, "bits_per_byte": 0.7335926851864407, "num_chars": 28}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1039, "native_id": "Mercury_7263638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.959962844848633, "logits_per_token_corr": -2.659993807474772, "logits_per_char_corr": -0.4987488389015198, "bits_per_byte_corr": 0.7195424765328681}, "model_output": [{"sum_logits": -18.797027587890625, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -33.90058517456055, "logits_per_token": -3.132837931315104, "logits_per_char": -0.6063557286416331, "bits_per_byte": 0.8747864027265265, "num_chars": 31}, {"sum_logits": -15.959962844848633, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -29.916004180908203, "logits_per_token": -2.659993807474772, "logits_per_char": -0.4987488389015198, "bits_per_byte": 0.7195424765328681, "num_chars": 32}, {"sum_logits": -16.595943450927734, "num_tokens": 5, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -27.364273071289062, "logits_per_token": -3.319188690185547, "logits_per_char": -0.46099842919243705, "bits_per_byte": 0.6650801476540087, "num_chars": 36}, {"sum_logits": -24.925193786621094, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.30743408203125, "logits_per_token": -3.560741969517299, "logits_per_char": -0.6559261522795025, "bits_per_byte": 0.9463014070836971, "num_chars": 38}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1040, "native_id": "Mercury_401428", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -41.0060920715332, "logits_per_token_corr": -3.727826551957564, "logits_per_char_corr": -0.8040410210104549, "bits_per_byte_corr": 1.1599859936839156}, "model_output": [{"sum_logits": -35.074520111083984, "num_tokens": 10, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -45.629493713378906, "logits_per_token": -3.5074520111083984, "logits_per_char": -0.715806532879265, "bits_per_byte": 1.0326905352215803, "num_chars": 49}, {"sum_logits": -41.0060920715332, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -51.14247131347656, "logits_per_token": -3.727826551957564, "logits_per_char": -0.8040410210104549, "bits_per_byte": 1.1599859936839156, "num_chars": 51}, {"sum_logits": -28.311620712280273, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.74947738647461, "logits_per_token": -4.044517244611468, "logits_per_char": -0.5777881778016383, "bits_per_byte": 0.833572138799293, "num_chars": 49}, {"sum_logits": -48.03446960449219, "num_tokens": 11, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -59.30849075317383, "logits_per_token": -4.366769964044744, "logits_per_char": -0.9606893920898437, "bits_per_byte": 1.3859818218036462, "num_chars": 50}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1041, "native_id": "Mercury_SC_402121", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.712015151977539, "logits_per_token_corr": -1.8160021645682198, "logits_per_char_corr": -0.30266702742803664, "bits_per_byte_corr": 0.43665621951134603}, "model_output": [{"sum_logits": -13.090987205505371, "num_tokens": 8, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -21.412784576416016, "logits_per_token": -1.6363734006881714, "logits_per_char": -0.3116901715596517, "bits_per_byte": 0.44967386480326277, "num_chars": 42}, {"sum_logits": -12.712015151977539, "num_tokens": 7, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -27.427942276000977, "logits_per_token": -1.8160021645682198, "logits_per_char": -0.30266702742803664, "bits_per_byte": 0.43665621951134603, "num_chars": 42}, {"sum_logits": -19.955841064453125, "num_tokens": 7, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -41.497928619384766, "logits_per_token": -2.850834437779018, "logits_per_char": -0.3991168212890625, "bits_per_byte": 0.5758038588095108, "num_chars": 50}, {"sum_logits": -22.364826202392578, "num_tokens": 7, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -42.00684356689453, "logits_per_token": -3.1949751717703685, "logits_per_char": -0.4385260039684819, "bits_per_byte": 0.6326592912266372, "num_chars": 51}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1042, "native_id": "NYSEDREGENTS_2015_4_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -1.6058595180511475, "logits_per_token_corr": -1.6058595180511475, "logits_per_char_corr": -0.17842883533901638, "bits_per_byte_corr": 0.2574183958953773}, "model_output": [{"sum_logits": -1.6058595180511475, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": true, "sum_logits_uncond": -13.385931015014648, "logits_per_token": -1.6058595180511475, "logits_per_char": -0.17842883533901638, "bits_per_byte": 0.2574183958953773, "num_chars": 9}, {"sum_logits": -2.416635274887085, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -11.988180160522461, "logits_per_token": -2.416635274887085, "logits_per_char": -0.26851503054300946, "bits_per_byte": 0.3873853029688266, "num_chars": 9}, {"sum_logits": -4.8573527336120605, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -18.153175354003906, "logits_per_token": -1.6191175778706868, "logits_per_char": -0.3035845458507538, "bits_per_byte": 0.43797991878972525, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1043, "native_id": "MCAS_2012_5_23614", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.597320556640625, "logits_per_token_corr": -1.2662200927734375, "logits_per_char_corr": -0.23741626739501953, "bits_per_byte_corr": 0.34251927159740886}, "model_output": [{"sum_logits": -19.483139038085938, "num_tokens": 7, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -31.88022232055664, "logits_per_token": -2.7833055768694197, "logits_per_char": -0.5411983066134982, "bits_per_byte": 0.7807841130893595, "num_chars": 36}, {"sum_logits": -18.265920639038086, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -34.853553771972656, "logits_per_token": -3.0443201065063477, "logits_per_char": -0.5218834468296596, "bits_per_byte": 0.7529186606637298, "num_chars": 35}, {"sum_logits": -7.597320556640625, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -30.242761611938477, "logits_per_token": -1.2662200927734375, "logits_per_char": -0.23741626739501953, "bits_per_byte": 0.34251927159740886, "num_chars": 32}, {"sum_logits": -9.110671043395996, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.700740814208984, "logits_per_token": -1.5184451738993328, "logits_per_char": -0.30368903477986653, "bits_per_byte": 0.43813066444958415, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1044, "native_id": "Mercury_407262", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -27.663970947265625, "logits_per_token_corr": -3.951995849609375, "logits_per_char_corr": -0.8136462043313419, "bits_per_byte_corr": 1.1738433440277987}, "model_output": [{"sum_logits": -27.663970947265625, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -40.71637725830078, "logits_per_token": -3.951995849609375, "logits_per_char": -0.8136462043313419, "bits_per_byte": 1.1738433440277987, "num_chars": 34}, {"sum_logits": -40.06858444213867, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -42.75570297241211, "logits_per_token": -5.724083491734096, "logits_per_char": -1.0017146110534667, "bits_per_byte": 1.4451687017538914, "num_chars": 40}, {"sum_logits": -40.15678405761719, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -51.19489669799805, "logits_per_token": -5.736683436802456, "logits_per_char": -1.1154662238226996, "bits_per_byte": 1.6092775893893032, "num_chars": 36}, {"sum_logits": -33.79597473144531, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -44.38011169433594, "logits_per_token": -5.632662455240886, "logits_per_char": -0.8242920666206174, "bits_per_byte": 1.189202076758534, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1045, "native_id": "MCAS_2014_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -40.73204040527344, "logits_per_token_corr": -2.7154693603515625, "logits_per_char_corr": -0.5903194261633832, "bits_per_byte_corr": 0.8516509086669434}, "model_output": [{"sum_logits": -39.425453186035156, "num_tokens": 12, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -48.83229064941406, "logits_per_token": -3.285454432169596, "logits_per_char": -0.6463189046891009, "bits_per_byte": 0.9324410786284224, "num_chars": 61}, {"sum_logits": -40.73204040527344, "num_tokens": 15, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -55.27117919921875, "logits_per_token": -2.7154693603515625, "logits_per_char": -0.5903194261633832, "bits_per_byte": 0.8516509086669434, "num_chars": 69}, {"sum_logits": -48.137245178222656, "num_tokens": 13, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -58.4825325012207, "logits_per_token": -3.702865013709435, "logits_per_char": -0.7293521996700403, "bits_per_byte": 1.0522328015261802, "num_chars": 66}, {"sum_logits": -39.043792724609375, "num_tokens": 13, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -53.0731201171875, "logits_per_token": -3.003368671123798, "logits_per_char": -0.5276188206028294, "bits_per_byte": 0.7611930559639325, "num_chars": 74}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1046, "native_id": "Mercury_7032515", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.00924301147461, "logits_per_token_corr": -2.201848602294922, "logits_per_char_corr": -0.6880776882171631, "bits_per_byte_corr": 0.9926862685379567}, "model_output": [{"sum_logits": -10.708024978637695, "num_tokens": 2, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -15.99191665649414, "logits_per_token": -5.354012489318848, "logits_per_char": -0.9734568162397905, "bits_per_byte": 1.4044013213097137, "num_chars": 11}, {"sum_logits": -12.04409122467041, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -16.28146743774414, "logits_per_token": -4.014697074890137, "logits_per_char": -1.3382323582967122, "bits_per_byte": 1.9306611868731962, "num_chars": 9}, {"sum_logits": -11.869853019714355, "num_tokens": 3, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -17.68353271484375, "logits_per_token": -3.9566176732381186, "logits_per_char": -0.9891544183095297, "bits_per_byte": 1.427048173969591, "num_chars": 12}, {"sum_logits": -11.00924301147461, "num_tokens": 5, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -21.155513763427734, "logits_per_token": -2.201848602294922, "logits_per_char": -0.6880776882171631, "bits_per_byte": 0.9926862685379567, "num_chars": 16}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1047, "native_id": "Mercury_7270165", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.92844295501709, "logits_per_token_corr": -0.8273702462514242, "logits_per_char_corr": -0.17729362419673375, "bits_per_byte_corr": 0.25578063241004306}, "model_output": [{"sum_logits": -10.422224044799805, "num_tokens": 11, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -26.122886657714844, "logits_per_token": -0.9474749131636186, "logits_per_char": -0.18284603587368078, "bits_per_byte": 0.2637910692013543, "num_chars": 57}, {"sum_logits": -11.505127906799316, "num_tokens": 10, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -29.48956298828125, "logits_per_token": -1.1505127906799317, "logits_per_char": -0.21707788503394937, "bits_per_byte": 0.3131771882253683, "num_chars": 53}, {"sum_logits": -9.92844295501709, "num_tokens": 12, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -27.82645606994629, "logits_per_token": -0.8273702462514242, "logits_per_char": -0.17729362419673375, "bits_per_byte": 0.25578063241004306, "num_chars": 56}, {"sum_logits": -11.625520706176758, "num_tokens": 12, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -29.182376861572266, "logits_per_token": -0.9687933921813965, "logits_per_char": -0.2075985840388707, "bits_per_byte": 0.2995014476886646, "num_chars": 56}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1048, "native_id": "Mercury_7017045", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.54755163192749, "logits_per_token_corr": -1.8491838773091633, "logits_per_char_corr": -0.42673474091749924, "bits_per_byte_corr": 0.6156480944971551}, "model_output": [{"sum_logits": -6.746259689331055, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -20.43441390991211, "logits_per_token": -2.248753229777018, "logits_per_char": -0.5189430530254657, "bits_per_byte": 0.7486765691041557, "num_chars": 13}, {"sum_logits": -5.54755163192749, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.068395614624023, "logits_per_token": -1.8491838773091633, "logits_per_char": -0.42673474091749924, "bits_per_byte": 0.6156480944971551, "num_chars": 13}, {"sum_logits": -6.183536529541016, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -22.222347259521484, "logits_per_token": -2.0611788431803384, "logits_per_char": -0.3864710330963135, "bits_per_byte": 0.5575598428956865, "num_chars": 16}, {"sum_logits": -8.588264465332031, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -23.271194458007812, "logits_per_token": -2.8627548217773438, "logits_per_char": -0.45201391922800166, "bits_per_byte": 0.6521182396834909, "num_chars": 19}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1049, "native_id": "Mercury_SC_400386", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.7159481048584, "logits_per_token_corr": -3.2451354435511996, "logits_per_char_corr": -0.8413314112910518, "bits_per_byte_corr": 1.2137846548145854}, "model_output": [{"sum_logits": -22.7159481048584, "num_tokens": 7, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.564815521240234, "logits_per_token": -3.2451354435511996, "logits_per_char": -0.8413314112910518, "bits_per_byte": 1.2137846548145854, "num_chars": 27}, {"sum_logits": -13.974828720092773, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -24.730533599853516, "logits_per_token": -2.3291381200154624, "logits_per_char": -0.6076012486996858, "bits_per_byte": 0.8765833083376082, "num_chars": 23}, {"sum_logits": -19.77191162109375, "num_tokens": 7, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -29.892940521240234, "logits_per_token": -2.824558803013393, "logits_per_char": -0.7322930230034722, "bits_per_byte": 1.056475512765456, "num_chars": 27}, {"sum_logits": -20.625856399536133, "num_tokens": 6, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.758657455444336, "logits_per_token": -3.437642733256022, "logits_per_char": -0.7366377285548619, "bits_per_byte": 1.062743597918573, "num_chars": 28}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1050, "native_id": "Mercury_400750", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -3.9382429122924805, "logits_per_token_corr": -3.9382429122924805, "logits_per_char_corr": -1.9691214561462402, "bits_per_byte_corr": 2.8408417596922764}, "model_output": [{"sum_logits": -2.870272636413574, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -6.875624656677246, "logits_per_token": -2.870272636413574, "logits_per_char": -1.435136318206787, "bits_per_byte": 2.0704640492780646, "num_chars": 2}, {"sum_logits": -3.534609794616699, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -6.782100677490234, "logits_per_token": -3.534609794616699, "logits_per_char": -1.7673048973083496, "bits_per_byte": 2.5496820110873664, "num_chars": 2}, {"sum_logits": -3.9876813888549805, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -7.313617706298828, "logits_per_token": -3.9876813888549805, "logits_per_char": -1.9938406944274902, "bits_per_byte": 2.876504082175214, "num_chars": 2}, {"sum_logits": -3.9382429122924805, "num_tokens": 1, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -7.417354583740234, "logits_per_token": -3.9382429122924805, "logits_per_char": -1.9691214561462402, "bits_per_byte": 2.8408417596922764, "num_chars": 2}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1051, "native_id": "MCAS_2006_9_28-v1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -32.370086669921875, "logits_per_token_corr": -2.023130416870117, "logits_per_char_corr": -0.43160115559895834, "bits_per_byte_corr": 0.6226688468250104}, "model_output": [{"sum_logits": -28.875682830810547, "num_tokens": 11, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -46.681114196777344, "logits_per_token": -2.6250620755282315, "logits_per_char": -0.48126138051350914, "bits_per_byte": 0.6943134070387149, "num_chars": 60}, {"sum_logits": -34.085777282714844, "num_tokens": 14, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -50.744686126708984, "logits_per_token": -2.4346983773367747, "logits_per_char": -0.508742944518132, "bits_per_byte": 0.7339609231440853, "num_chars": 67}, {"sum_logits": -32.370086669921875, "num_tokens": 16, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -52.662837982177734, "logits_per_token": -2.023130416870117, "logits_per_char": -0.43160115559895834, "bits_per_byte": 0.6226688468250104, "num_chars": 75}, {"sum_logits": -41.47981643676758, "num_tokens": 15, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -63.010780334472656, "logits_per_token": -2.7653210957845054, "logits_per_char": -0.5386989147632153, "bits_per_byte": 0.7771782528617155, "num_chars": 77}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1052, "native_id": "Mercury_416376", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.7659454345703125, "logits_per_token_corr": -5.7659454345703125, "logits_per_char_corr": -0.6406606038411459, "bits_per_byte_corr": 0.9242778760552139}, "model_output": [{"sum_logits": -2.155336618423462, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -12.482002258300781, "logits_per_token": -2.155336618423462, "logits_per_char": -0.17961138486862183, "bits_per_byte": 0.25912445423734587, "num_chars": 12}, {"sum_logits": -4.719852924346924, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -17.17950439453125, "logits_per_token": -2.359926462173462, "logits_per_char": -0.5244281027052138, "bits_per_byte": 0.7565898230761635, "num_chars": 9}, {"sum_logits": -9.730534553527832, "num_tokens": 2, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -16.047266006469727, "logits_per_token": -4.865267276763916, "logits_per_char": -1.081170505947537, "bits_per_byte": 1.5597993272870436, "num_chars": 9}, {"sum_logits": -5.7659454345703125, "num_tokens": 1, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -13.244049072265625, "logits_per_token": -5.7659454345703125, "logits_per_char": -0.6406606038411459, "bits_per_byte": 0.9242778760552139, "num_chars": 9}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1053, "native_id": "Mercury_7086520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.221258163452148, "logits_per_token_corr": -1.3702096939086914, "logits_per_char_corr": -0.37369355288418854, "bits_per_byte_corr": 0.5391258355585837}, "model_output": [{"sum_logits": -8.221258163452148, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -26.09626007080078, "logits_per_token": -1.3702096939086914, "logits_per_char": -0.37369355288418854, "bits_per_byte": 0.5391258355585837, "num_chars": 22}, {"sum_logits": -12.309576034545898, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -29.93598175048828, "logits_per_token": -2.05159600575765, "logits_per_char": -0.5351989580237347, "bits_per_byte": 0.7721288826303373, "num_chars": 23}, {"sum_logits": -15.896358489990234, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -31.25819969177246, "logits_per_token": -2.649393081665039, "logits_per_char": -0.5887540181477865, "bits_per_byte": 0.8493925022858726, "num_chars": 27}, {"sum_logits": -10.183005332946777, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -29.234512329101562, "logits_per_token": -1.4547150475638253, "logits_per_char": -0.3085759191802054, "bits_per_byte": 0.44518094833935573, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1054, "native_id": "Mercury_7014333", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.115017890930176, "logits_per_token_corr": -3.3716726303100586, "logits_per_char_corr": -0.9195470809936523, "bits_per_byte_corr": 1.3266260136144172}, "model_output": [{"sum_logits": -10.115017890930176, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -15.785848617553711, "logits_per_token": -3.3716726303100586, "logits_per_char": -0.9195470809936523, "bits_per_byte": 1.3266260136144172, "num_chars": 11}, {"sum_logits": -10.59998893737793, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -17.603015899658203, "logits_per_token": -3.533329645792643, "logits_per_char": -0.9636353579434481, "bits_per_byte": 1.3902319521312725, "num_chars": 11}, {"sum_logits": -10.09153938293457, "num_tokens": 3, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -18.103519439697266, "logits_per_token": -3.36384646097819, "logits_per_char": -0.7208242416381836, "bits_per_byte": 1.0399295587647024, "num_chars": 14}, {"sum_logits": -11.375152587890625, "num_tokens": 4, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -19.17098045349121, "logits_per_token": -2.8437881469726562, "logits_per_char": -0.6319529215494791, "bits_per_byte": 0.9117153459953807, "num_chars": 18}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1055, "native_id": "Mercury_SC_406623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.096776962280273, "logits_per_token_corr": -2.442396708897182, "logits_per_char_corr": -0.4070661181495303, "bits_per_byte_corr": 0.5872722699686701}, "model_output": [{"sum_logits": -17.096776962280273, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -37.379268646240234, "logits_per_token": -2.442396708897182, "logits_per_char": -0.4070661181495303, "bits_per_byte": 0.5872722699686701, "num_chars": 42}, {"sum_logits": -18.28228759765625, "num_tokens": 8, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -39.256813049316406, "logits_per_token": -2.2852859497070312, "logits_per_char": -0.4352925618489583, "bits_per_byte": 0.6279944203157958, "num_chars": 42}, {"sum_logits": -23.490312576293945, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -42.31814193725586, "logits_per_token": -3.355758939470564, "logits_per_char": -0.6525086826748319, "bits_per_byte": 0.9413710406326465, "num_chars": 36}, {"sum_logits": -20.578062057495117, "num_tokens": 7, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -35.04814910888672, "logits_per_token": -2.939723151070731, "logits_per_char": -0.5716128349304199, "bits_per_byte": 0.8246630022631909, "num_chars": 36}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1056, "native_id": "Mercury_7042648", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.285826683044434, "logits_per_token_corr": -5.285826683044434, "logits_per_char_corr": -0.6607283353805542, "bits_per_byte_corr": 0.9532294928290302}, "model_output": [{"sum_logits": -5.285826683044434, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.858990669250488, "logits_per_token": -5.285826683044434, "logits_per_char": -0.6607283353805542, "bits_per_byte": 0.9532294928290302, "num_chars": 8}, {"sum_logits": -3.3413093090057373, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -15.566609382629395, "logits_per_token": -3.3413093090057373, "logits_per_char": -0.37125658988952637, "bits_per_byte": 0.5356100411313521, "num_chars": 9}, {"sum_logits": -10.888386726379395, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -17.082311630249023, "logits_per_token": -5.444193363189697, "logits_per_char": -0.9073655605316162, "bits_per_byte": 1.3090517944533377, "num_chars": 12}, {"sum_logits": -8.992793083190918, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -14.805025100708008, "logits_per_token": -8.992793083190918, "logits_per_char": -0.7493994235992432, "bits_per_byte": 1.0811548320724524, "num_chars": 12}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1057, "native_id": "MCAS_2004_8_23", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -19.336994171142578, "logits_per_token_corr": -3.8673988342285157, "logits_per_char_corr": -1.7579085610129617, "bits_per_byte_corr": 2.5361259633114757}, "model_output": [{"sum_logits": -19.856842041015625, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -33.27098083496094, "logits_per_token": -3.971368408203125, "logits_per_char": -1.8051674582741477, "bits_per_byte": 2.6043061400281187, "num_chars": 11}, {"sum_logits": -19.336994171142578, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -34.271419525146484, "logits_per_token": -3.8673988342285157, "logits_per_char": -1.7579085610129617, "bits_per_byte": 2.5361259633114757, "num_chars": 11}, {"sum_logits": -17.523178100585938, "num_tokens": 6, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -35.659217834472656, "logits_per_token": -2.9205296834309897, "logits_per_char": -1.593016190962358, "bits_per_byte": 2.298236558758871, "num_chars": 11}, {"sum_logits": -22.0372371673584, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -36.20309066772461, "logits_per_token": -4.407447433471679, "logits_per_char": -1.8364364306132, "bits_per_byte": 2.649417731355396, "num_chars": 12}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1058, "native_id": "MCAS_2013_8_29425", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.779253005981445, "logits_per_token_corr": -1.8643614451090496, "logits_per_char_corr": -0.4302372565636268, "bits_per_byte_corr": 0.620701156450463}, "model_output": [{"sum_logits": -16.779253005981445, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -30.03644561767578, "logits_per_token": -1.8643614451090496, "logits_per_char": -0.4302372565636268, "bits_per_byte": 0.620701156450463, "num_chars": 39}, {"sum_logits": -21.39910888671875, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -34.5878791809082, "logits_per_token": -2.3776787651909723, "logits_per_char": -0.5486950996594552, "bits_per_byte": 0.7915996992393403, "num_chars": 39}, {"sum_logits": -14.011429786682129, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -27.580846786499023, "logits_per_token": -1.55682553185357, "logits_per_char": -0.3417421899190763, "bits_per_byte": 0.4930297626591399, "num_chars": 41}, {"sum_logits": -25.124408721923828, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -35.02339172363281, "logits_per_token": -2.7916009691026478, "logits_per_char": -0.5583201938205296, "bits_per_byte": 0.8054857748536215, "num_chars": 45}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1059, "native_id": "MEAP_2005_5_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.45541763305664, "logits_per_token_corr": -1.9504925120960583, "logits_per_char_corr": -0.4048192006237102, "bits_per_byte_corr": 0.5840306531968806}, "model_output": [{"sum_logits": -44.13220977783203, "num_tokens": 11, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -43.62848663330078, "logits_per_token": -4.012019070712003, "logits_per_char": -0.9006573424047354, "bits_per_byte": 1.2993738814284783, "num_chars": 49}, {"sum_logits": -21.45541763305664, "num_tokens": 11, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -25.90245819091797, "logits_per_token": -1.9504925120960583, "logits_per_char": -0.4048192006237102, "bits_per_byte": 0.5840306531968806, "num_chars": 53}, {"sum_logits": -46.61445236206055, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -51.757049560546875, "logits_per_token": -3.884537696838379, "logits_per_char": -0.8036974545182853, "bits_per_byte": 1.1594903320094465, "num_chars": 58}, {"sum_logits": -38.769683837890625, "num_tokens": 14, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -52.55109405517578, "logits_per_token": -2.769263131277902, "logits_per_char": -0.578651997580457, "bits_per_byte": 0.8348183673104176, "num_chars": 67}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1060, "native_id": "Mercury_7016258", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.603660583496094, "logits_per_token_corr": -1.3719515119280135, "logits_per_char_corr": -0.26676834954155815, "bits_per_byte_corr": 0.384865374950016}, "model_output": [{"sum_logits": -7.788440704345703, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.2486515045166, "logits_per_token": -1.298073450724284, "logits_per_char": -0.24338877201080322, "bits_per_byte": 0.35113577438829263, "num_chars": 32}, {"sum_logits": -9.603660583496094, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -34.84355926513672, "logits_per_token": -1.3719515119280135, "logits_per_char": -0.26676834954155815, "bits_per_byte": 0.384865374950016, "num_chars": 36}, {"sum_logits": -16.493446350097656, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -31.13966178894043, "logits_per_token": -2.748907725016276, "logits_per_char": -0.47124132428850446, "bits_per_byte": 0.6798575216134617, "num_chars": 35}, {"sum_logits": -16.910438537597656, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -28.913164138793945, "logits_per_token": -2.818406422932943, "logits_per_char": -0.46973440382215714, "bits_per_byte": 0.6776834949296467, "num_chars": 36}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1061, "native_id": "NCEOGA_2013_8_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.469572067260742, "logits_per_token_corr": -4.74492867787679, "logits_per_char_corr": -0.6470357288013805, "bits_per_byte_corr": 0.9334752372203985}, "model_output": [{"sum_logits": -28.469572067260742, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -36.64138412475586, "logits_per_token": -4.74492867787679, "logits_per_char": -0.6470357288013805, "bits_per_byte": 0.9334752372203985, "num_chars": 44}, {"sum_logits": -21.965438842773438, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -29.36511993408203, "logits_per_token": -5.491359710693359, "logits_per_char": -0.7844799586704799, "bits_per_byte": 1.1317653460514934, "num_chars": 28}, {"sum_logits": -24.220844268798828, "num_tokens": 4, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -31.791790008544922, "logits_per_token": -6.055211067199707, "logits_per_char": -0.8970683062518084, "bits_per_byte": 1.2941959967690757, "num_chars": 27}, {"sum_logits": -23.95962142944336, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.756900787353516, "logits_per_token": -4.791924285888672, "logits_per_char": -0.6845606122698102, "bits_per_byte": 0.9876122005102772, "num_chars": 35}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1062, "native_id": "Mercury_7015540", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.164338111877441, "logits_per_token_corr": -3.0410845279693604, "logits_per_char_corr": -0.5288842657338018, "bits_per_byte_corr": 0.7630187073789046}, "model_output": [{"sum_logits": -11.140400886535645, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -25.730823516845703, "logits_per_token": -2.228080177307129, "logits_per_char": -0.4843652559363324, "bits_per_byte": 0.6987913527187624, "num_chars": 23}, {"sum_logits": -12.164338111877441, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -19.401714324951172, "logits_per_token": -3.0410845279693604, "logits_per_char": -0.5288842657338018, "bits_per_byte": 0.7630187073789046, "num_chars": 23}, {"sum_logits": -15.457741737365723, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -28.991100311279297, "logits_per_token": -3.0915483474731444, "logits_per_char": -0.5945285283602201, "bits_per_byte": 0.8577233595329191, "num_chars": 26}, {"sum_logits": -11.35901165008545, "num_tokens": 6, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -29.406982421875, "logits_per_token": -1.893168608347575, "logits_per_char": -0.436885063464825, "bits_per_byte": 0.6302919144996159, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1063, "native_id": "Mercury_SC_414001", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.29020881652832, "logits_per_token_corr": -2.0193472544352216, "logits_per_char_corr": -0.4266226593877228, "bits_per_byte_corr": 0.6154863950299713}, "model_output": [{"sum_logits": -30.54840087890625, "num_tokens": 15, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -49.70555877685547, "logits_per_token": -2.03656005859375, "logits_per_char": -0.418471244916524, "bits_per_byte": 0.6037263897961337, "num_chars": 73}, {"sum_logits": -29.9352970123291, "num_tokens": 15, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -48.173973083496094, "logits_per_token": -1.9956864674886068, "logits_per_char": -0.4157680140601264, "bits_per_byte": 0.5998264520452281, "num_chars": 72}, {"sum_logits": -32.54066848754883, "num_tokens": 15, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -50.2745361328125, "logits_per_token": -2.169377899169922, "logits_per_char": -0.4519537289937337, "bits_per_byte": 0.6520314035310025, "num_chars": 72}, {"sum_logits": -30.29020881652832, "num_tokens": 15, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -47.84089660644531, "logits_per_token": -2.0193472544352216, "logits_per_char": -0.4266226593877228, "bits_per_byte": 0.6154863950299713, "num_chars": 71}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1064, "native_id": "Mercury_7017973", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -21.458288192749023, "logits_per_token_corr": -5.364572048187256, "logits_per_char_corr": -0.9329690518586532, "bits_per_byte_corr": 1.345989824420324}, "model_output": [{"sum_logits": -18.655193328857422, "num_tokens": 3, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -24.239627838134766, "logits_per_token": -6.218397776285808, "logits_per_char": -1.0973643134622013, "bits_per_byte": 1.583162053081577, "num_chars": 17}, {"sum_logits": -18.669212341308594, "num_tokens": 3, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -23.018150329589844, "logits_per_token": -6.223070780436198, "logits_per_char": -1.037178463406033, "bits_per_byte": 1.496332225673794, "num_chars": 18}, {"sum_logits": -15.328201293945312, "num_tokens": 2, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -18.59722900390625, "logits_per_token": -7.664100646972656, "logits_per_char": -1.0218800862630208, "bits_per_byte": 1.4742613328359055, "num_chars": 15}, {"sum_logits": -21.458288192749023, "num_tokens": 4, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -27.836977005004883, "logits_per_token": -5.364572048187256, "logits_per_char": -0.9329690518586532, "bits_per_byte": 1.345989824420324, "num_chars": 23}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1065, "native_id": "Mercury_407097", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -30.897159576416016, "logits_per_token_corr": -2.574763298034668, "logits_per_char_corr": -0.5065108127281314, "bits_per_byte_corr": 0.7307406376800386}, "model_output": [{"sum_logits": -14.467706680297852, "num_tokens": 10, "num_tokens_all": 232, "is_greedy": false, "sum_logits_uncond": -32.02306365966797, "logits_per_token": -1.4467706680297852, "logits_per_char": -0.30782354638931597, "bits_per_byte": 0.444095503845039, "num_chars": 47}, {"sum_logits": -33.46293258666992, "num_tokens": 12, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -51.31782150268555, "logits_per_token": -2.7885777155558267, "logits_per_char": -0.6435179343590369, "bits_per_byte": 0.928400132623559, "num_chars": 52}, {"sum_logits": -30.897159576416016, "num_tokens": 12, "num_tokens_all": 234, "is_greedy": false, "sum_logits_uncond": -45.57774353027344, "logits_per_token": -2.574763298034668, "logits_per_char": -0.5065108127281314, "bits_per_byte": 0.7307406376800386, "num_chars": 61}, {"sum_logits": -31.593923568725586, "num_tokens": 15, "num_tokens_all": 237, "is_greedy": false, "sum_logits_uncond": -45.61478042602539, "logits_per_token": -2.1062615712483725, "logits_per_char": -0.4388044940100776, "bits_per_byte": 0.6330610674285846, "num_chars": 72}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1066, "native_id": "Mercury_SC_406794", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.649153709411621, "logits_per_token_corr": -1.1649153709411622, "logits_per_char_corr": -0.25887008243136933, "bits_per_byte_corr": 0.373470584158522}, "model_output": [{"sum_logits": -16.280200958251953, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -29.102428436279297, "logits_per_token": -1.8089112175835504, "logits_per_char": -0.37000456723299896, "bits_per_byte": 0.5338037542536982, "num_chars": 44}, {"sum_logits": -25.25778579711914, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -35.690879821777344, "logits_per_token": -3.1572232246398926, "logits_per_char": -0.5262038707733154, "bits_per_byte": 0.7591517148617846, "num_chars": 48}, {"sum_logits": -11.649153709411621, "num_tokens": 10, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -29.027118682861328, "logits_per_token": -1.1649153709411622, "logits_per_char": -0.25887008243136933, "bits_per_byte": 0.373470584158522, "num_chars": 45}, {"sum_logits": -15.65601634979248, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -33.67950439453125, "logits_per_token": -1.7395573721991644, "logits_per_char": -0.37276229404267813, "bits_per_byte": 0.5377823130461516, "num_chars": 42}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1067, "native_id": "Mercury_7227710", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.684393882751465, "logits_per_token_corr": -1.894797960917155, "logits_per_char_corr": -0.5167630802501332, "bits_per_byte_corr": 0.7455315331919081}, "model_output": [{"sum_logits": -7.407765865325928, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -17.656103134155273, "logits_per_token": -3.703882932662964, "logits_per_char": -0.6734332604841753, "bits_per_byte": 0.9715588252709032, "num_chars": 11}, {"sum_logits": -8.932690620422363, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -16.18396759033203, "logits_per_token": -4.466345310211182, "logits_per_char": -1.2760986600603377, "bits_per_byte": 1.841021208555423, "num_chars": 7}, {"sum_logits": -6.437180519104004, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -23.643251419067383, "logits_per_token": -1.2874361038208009, "logits_per_char": -0.5364317099253336, "bits_per_byte": 0.7739073676854218, "num_chars": 12}, {"sum_logits": -5.684393882751465, "num_tokens": 3, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -18.884750366210938, "logits_per_token": -1.894797960917155, "logits_per_char": -0.5167630802501332, "bits_per_byte": 0.7455315331919081, "num_chars": 11}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1068, "native_id": "Mercury_SC_406710", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -9.68297290802002, "logits_per_token_corr": -2.420743227005005, "logits_per_char_corr": -0.6455315272013347, "bits_per_byte_corr": 0.9313051330315136}, "model_output": [{"sum_logits": -11.64493465423584, "num_tokens": 3, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -20.72705841064453, "logits_per_token": -3.88164488474528, "logits_per_char": -1.0586304231123491, "bits_per_byte": 1.5272808615594684, "num_chars": 11}, {"sum_logits": -9.599781036376953, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -22.003623962402344, "logits_per_token": -2.3999452590942383, "logits_per_char": -0.7384446951059195, "bits_per_byte": 1.065350499600838, "num_chars": 13}, {"sum_logits": -9.68297290802002, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -26.174650192260742, "logits_per_token": -2.420743227005005, "logits_per_char": -0.6455315272013347, "bits_per_byte": 0.9313051330315136, "num_chars": 15}, {"sum_logits": -14.798337936401367, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -26.79669761657715, "logits_per_token": -3.699584484100342, "logits_per_char": -0.9248961210250854, "bits_per_byte": 1.334343047141288, "num_chars": 16}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1069, "native_id": "Mercury_401926", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.942902565002441, "logits_per_token_corr": -4.9809675216674805, "logits_per_char_corr": -0.747145128250122, "bits_per_byte_corr": 1.077902571351574}, "model_output": [{"sum_logits": -7.478091239929199, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "sum_logits_uncond": -13.904370307922363, "logits_per_token": -7.478091239929199, "logits_per_char": -0.9347614049911499, "bits_per_byte": 1.3485756433961007, "num_chars": 8}, {"sum_logits": -6.817047595977783, "num_tokens": 3, "num_tokens_all": 247, "is_greedy": false, "sum_logits_uncond": -17.488937377929688, "logits_per_token": -2.272349198659261, "logits_per_char": -0.5243882766136756, "bits_per_byte": 0.7565323661714033, "num_chars": 13}, {"sum_logits": -4.715641498565674, "num_tokens": 2, "num_tokens_all": 246, "is_greedy": false, "sum_logits_uncond": -13.88475227355957, "logits_per_token": -2.357820749282837, "logits_per_char": -0.31437609990437826, "bits_per_byte": 0.4535488403063857, "num_chars": 15}, {"sum_logits": -14.942902565002441, "num_tokens": 3, "num_tokens_all": 247, "is_greedy": false, "sum_logits_uncond": -24.98659324645996, "logits_per_token": -4.9809675216674805, "logits_per_char": -0.747145128250122, "bits_per_byte": 1.077902571351574, "num_chars": 20}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1070, "native_id": "MCAS_2014_5_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.688310623168945, "logits_per_token_corr": -2.076478958129883, "logits_per_char_corr": -0.34607982635498047, "bits_per_byte_corr": 0.4992876492344026}, "model_output": [{"sum_logits": -25.29790687561035, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -37.96971130371094, "logits_per_token": -2.108158906300863, "logits_per_char": -0.4773189976530255, "bits_per_byte": 0.6886257508366054, "num_chars": 53}, {"sum_logits": -22.014480590820312, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -32.949398040771484, "logits_per_token": -2.446053398980035, "logits_per_char": -0.4153675583173644, "bits_per_byte": 0.5992487165310494, "num_chars": 53}, {"sum_logits": -25.665653228759766, "num_tokens": 12, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -38.375213623046875, "logits_per_token": -2.1388044357299805, "logits_per_char": -0.458315236227853, "bits_per_byte": 0.6612091184702523, "num_chars": 56}, {"sum_logits": -18.688310623168945, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -29.173538208007812, "logits_per_token": -2.076478958129883, "logits_per_char": -0.34607982635498047, "bits_per_byte": 0.4992876492344026, "num_chars": 54}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1071, "native_id": "Mercury_LBS10151", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.668323040008545, "logits_per_token_corr": -5.668323040008545, "logits_per_char_corr": -1.4170807600021362, "bits_per_byte_corr": 2.044415384995714}, "model_output": [{"sum_logits": -5.668323040008545, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -12.679121971130371, "logits_per_token": -5.668323040008545, "logits_per_char": -1.4170807600021362, "bits_per_byte": 2.044415384995714, "num_chars": 4}, {"sum_logits": -2.699845790863037, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -14.647337913513184, "logits_per_token": -2.699845790863037, "logits_per_char": -0.44997429847717285, "bits_per_byte": 0.6491756889409739, "num_chars": 6}, {"sum_logits": -2.9166674613952637, "num_tokens": 1, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -14.588896751403809, "logits_per_token": -2.9166674613952637, "logits_per_char": -0.4166667801993234, "bits_per_byte": 0.6011230974971675, "num_chars": 7}, {"sum_logits": -7.878413677215576, "num_tokens": 2, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -15.228195190429688, "logits_per_token": -3.939206838607788, "logits_per_char": -0.5252275784810384, "bits_per_byte": 0.7577432228132573, "num_chars": 15}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1072, "native_id": "ACTAAP_2013_5_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.280299663543701, "logits_per_token_corr": -5.280299663543701, "logits_per_char_corr": -0.5866999626159668, "bits_per_byte_corr": 0.8464291265564037}, "model_output": [{"sum_logits": -3.0358853340148926, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -3.0358853340148926, "logits_per_char": -0.5059808890024821, "bits_per_byte": 0.7299761193489943, "num_chars": 6}, {"sum_logits": -5.280299663543701, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -13.882975578308105, "logits_per_token": -5.280299663543701, "logits_per_char": -0.5866999626159668, "bits_per_byte": 0.8464291265564037, "num_chars": 9}, {"sum_logits": -4.053036212921143, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -4.053036212921143, "logits_per_char": -0.45033735699123806, "bits_per_byte": 0.6496994716587686, "num_chars": 9}, {"sum_logits": -3.405071496963501, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -14.251230239868164, "logits_per_token": -1.7025357484817505, "logits_per_char": -0.2270047664642334, "bits_per_byte": 0.32749865083634205, "num_chars": 15}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1073, "native_id": "Mercury_SC_407592", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.69428825378418, "logits_per_token_corr": -4.69428825378418, "logits_per_char_corr": -0.9388576507568359, "bits_per_byte_corr": 1.3544852768485227}, "model_output": [{"sum_logits": -4.69428825378418, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -12.599719047546387, "logits_per_token": -4.69428825378418, "logits_per_char": -0.9388576507568359, "bits_per_byte": 1.3544852768485227, "num_chars": 5}, {"sum_logits": -3.012894630432129, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -14.924777030944824, "logits_per_token": -3.012894630432129, "logits_per_char": -0.33476607004801434, "bits_per_byte": 0.4829653491165046, "num_chars": 9}, {"sum_logits": -5.380183219909668, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -5.380183219909668, "logits_per_char": -0.896697203318278, "bits_per_byte": 1.2936606084072115, "num_chars": 6}, {"sum_logits": -4.420083045959473, "num_tokens": 1, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -13.27519702911377, "logits_per_token": -4.420083045959473, "logits_per_char": -0.8840166091918945, "bits_per_byte": 1.2753663781455393, "num_chars": 5}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1074, "native_id": "TIMSS_1995_8_L6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -38.089752197265625, "logits_per_token_corr": -1.9044876098632812, "logits_per_char_corr": -0.40521012975814497, "bits_per_byte_corr": 0.5845946447204691}, "model_output": [{"sum_logits": -52.838157653808594, "num_tokens": 22, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -71.25357818603516, "logits_per_token": -2.4017344388094815, "logits_per_char": -0.5561911331979852, "bits_per_byte": 0.8024141896517226, "num_chars": 95}, {"sum_logits": -32.86284637451172, "num_tokens": 18, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -50.0854377746582, "logits_per_token": -1.8257136874728732, "logits_per_char": -0.4107855796813965, "bits_per_byte": 0.5926383186754747, "num_chars": 80}, {"sum_logits": -37.61334991455078, "num_tokens": 20, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -59.502899169921875, "logits_per_token": -1.880667495727539, "logits_per_char": -0.43233735533966416, "bits_per_byte": 0.6237309585400312, "num_chars": 87}, {"sum_logits": -38.089752197265625, "num_tokens": 20, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -56.989158630371094, "logits_per_token": -1.9044876098632812, "logits_per_char": -0.40521012975814497, "bits_per_byte": 0.5845946447204691, "num_chars": 94}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1075, "native_id": "Mercury_7233398", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -24.79343032836914, "logits_per_token_corr": -3.0991787910461426, "logits_per_char_corr": -0.6700927115775444, "bits_per_byte_corr": 0.9667394319294562}, "model_output": [{"sum_logits": -17.642732620239258, "num_tokens": 7, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -32.548553466796875, "logits_per_token": -2.520390374319894, "logits_per_char": -0.5040780748639788, "bits_per_byte": 0.7272309388276403, "num_chars": 35}, {"sum_logits": -21.026687622070312, "num_tokens": 9, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -38.979366302490234, "logits_per_token": -2.336298624674479, "logits_per_char": -0.618431988884421, "bits_per_byte": 0.8922087634912936, "num_chars": 34}, {"sum_logits": -24.79343032836914, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -34.64366912841797, "logits_per_token": -3.0991787910461426, "logits_per_char": -0.6700927115775444, "bits_per_byte": 0.9667394319294562, "num_chars": 37}, {"sum_logits": -24.742557525634766, "num_tokens": 8, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -39.020572662353516, "logits_per_token": -3.0928196907043457, "logits_per_char": -0.6687177709631018, "bits_per_byte": 0.9647558119234817, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1076, "native_id": "Mercury_407664", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.22879409790039, "logits_per_token_corr": -4.445758819580078, "logits_per_char_corr": -0.717057874125819, "bits_per_byte_corr": 1.0344958390324448}, "model_output": [{"sum_logits": -12.114705085754395, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -19.400196075439453, "logits_per_token": -4.038235028584798, "logits_per_char": -0.8653360775538853, "bits_per_byte": 1.2484160677901948, "num_chars": 14}, {"sum_logits": -14.400496482849121, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -19.577444076538086, "logits_per_token": -4.80016549428304, "logits_per_char": -1.02860689163208, "bits_per_byte": 1.4839660615828794, "num_chars": 14}, {"sum_logits": -16.070276260375977, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -25.696712493896484, "logits_per_token": -3.2140552520751955, "logits_per_char": -0.642811050415039, "bits_per_byte": 0.9273803146630686, "num_chars": 25}, {"sum_logits": -22.22879409790039, "num_tokens": 5, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -28.27082061767578, "logits_per_token": -4.445758819580078, "logits_per_char": -0.717057874125819, "bits_per_byte": 1.0344958390324448, "num_chars": 31}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1077, "native_id": "Mercury_SC_408657", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.052486419677734, "logits_per_token_corr": -4.4104972839355465, "logits_per_char_corr": -0.630071040562221, "bits_per_byte_corr": 0.9090003656275182}, "model_output": [{"sum_logits": -20.25484848022461, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -27.126232147216797, "logits_per_token": -4.0509696960449215, "logits_per_char": -0.8101939392089844, "bits_per_byte": 1.1688627782559358, "num_chars": 25}, {"sum_logits": -16.26012420654297, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -28.11025619506836, "logits_per_token": -3.252024841308594, "logits_per_char": -0.5606939381566541, "bits_per_byte": 0.8089103640356892, "num_chars": 29}, {"sum_logits": -21.725971221923828, "num_tokens": 7, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -32.63475036621094, "logits_per_token": -3.103710174560547, "logits_per_char": -0.6789366006851196, "bits_per_byte": 0.9794984668871362, "num_chars": 32}, {"sum_logits": -22.052486419677734, "num_tokens": 5, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -34.19502258300781, "logits_per_token": -4.4104972839355465, "logits_per_char": -0.630071040562221, "bits_per_byte": 0.9090003656275182, "num_chars": 35}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1078, "native_id": "Mercury_7142800", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.635011672973633, "logits_per_token_corr": -4.439168612162272, "logits_per_char_corr": -0.7833826962639304, "bits_per_byte_corr": 1.1301823310190096}, "model_output": [{"sum_logits": -22.780670166015625, "num_tokens": 8, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -30.945880889892578, "logits_per_token": -2.847583770751953, "logits_per_char": -0.7855403505522629, "bits_per_byte": 1.1332951681607417, "num_chars": 29}, {"sum_logits": -26.635011672973633, "num_tokens": 6, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -41.82386779785156, "logits_per_token": -4.439168612162272, "logits_per_char": -0.7833826962639304, "bits_per_byte": 1.1301823310190096, "num_chars": 34}, {"sum_logits": -18.438810348510742, "num_tokens": 7, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -29.447158813476562, "logits_per_token": -2.634115764072963, "logits_per_char": -0.5121891763475206, "bits_per_byte": 0.7389327847141016, "num_chars": 36}, {"sum_logits": -47.884063720703125, "num_tokens": 10, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -57.03187561035156, "logits_per_token": -4.7884063720703125, "logits_per_char": -1.1400967552548362, "bits_per_byte": 1.6448119349409323, "num_chars": 42}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1079, "native_id": "Mercury_SC_410837", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -19.454139709472656, "logits_per_token_corr": -1.6211783091227214, "logits_per_char_corr": -0.4139178661589927, "bits_per_byte_corr": 0.5971572528433494}, "model_output": [{"sum_logits": -29.699268341064453, "num_tokens": 9, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -41.15094757080078, "logits_per_token": -3.299918704562717, "logits_per_char": -0.6599837409125434, "bits_per_byte": 0.952155270082557, "num_chars": 45}, {"sum_logits": -19.454139709472656, "num_tokens": 12, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -38.72321701049805, "logits_per_token": -1.6211783091227214, "logits_per_char": -0.4139178661589927, "bits_per_byte": 0.5971572528433494, "num_chars": 47}, {"sum_logits": -24.278247833251953, "num_tokens": 8, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -39.366146087646484, "logits_per_token": -3.034780979156494, "logits_per_char": -0.48556495666503907, "bits_per_byte": 0.7005221550106195, "num_chars": 50}, {"sum_logits": -33.01585006713867, "num_tokens": 11, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -50.46941375732422, "logits_per_token": -3.001440915194425, "logits_per_char": -0.5895687511989048, "bits_per_byte": 0.8505679136183701, "num_chars": 56}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1080, "native_id": "Mercury_7154315", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -25.798532485961914, "logits_per_token_corr": -2.5798532485961916, "logits_per_char_corr": -0.4961256247300368, "bits_per_byte_corr": 0.7157579784564772}, "model_output": [{"sum_logits": -25.798532485961914, "num_tokens": 10, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -34.206016540527344, "logits_per_token": -2.5798532485961916, "logits_per_char": -0.4961256247300368, "bits_per_byte": 0.7157579784564772, "num_chars": 52}, {"sum_logits": -26.93631362915039, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.616519927978516, "logits_per_token": -2.9929237365722656, "logits_per_char": -0.5497206863091917, "bits_per_byte": 0.7930793080129181, "num_chars": 49}, {"sum_logits": -35.24895477294922, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -44.640865325927734, "logits_per_token": -3.916550530327691, "logits_per_char": -0.6408900867808949, "bits_per_byte": 0.9246089499543587, "num_chars": 55}, {"sum_logits": -30.608741760253906, "num_tokens": 11, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -39.8580436706543, "logits_per_token": -2.7826128872958096, "logits_per_char": -0.5187922332246425, "bits_per_byte": 0.74845898212544, "num_chars": 59}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1081, "native_id": "Mercury_7239628", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.956165313720703, "logits_per_token_corr": -3.5912330627441404, "logits_per_char_corr": -0.561130166053772, "bits_per_byte_corr": 0.809539707859559}, "model_output": [{"sum_logits": -19.994230270385742, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -33.34251403808594, "logits_per_token": -4.9985575675964355, "logits_per_char": -0.9088286486538973, "bits_per_byte": 1.3111625844317378, "num_chars": 22}, {"sum_logits": -12.97563362121582, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -31.172679901123047, "logits_per_token": -2.16260560353597, "logits_per_char": -0.3506928005734006, "bits_per_byte": 0.5059427642630707, "num_chars": 37}, {"sum_logits": -21.46558380126953, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -40.520477294921875, "logits_per_token": -3.5775973002115884, "logits_per_char": -0.825599376971905, "bits_per_byte": 1.1910881269192408, "num_chars": 26}, {"sum_logits": -17.956165313720703, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -33.642967224121094, "logits_per_token": -3.5912330627441404, "logits_per_char": -0.561130166053772, "bits_per_byte": 0.809539707859559, "num_chars": 32}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1082, "native_id": "Mercury_401241", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.330492973327637, "logits_per_token_corr": -3.832623243331909, "logits_per_char_corr": -0.7665246486663818, "bits_per_byte_corr": 1.1058613093509384}, "model_output": [{"sum_logits": -15.330492973327637, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -21.29473304748535, "logits_per_token": -3.832623243331909, "logits_per_char": -0.7665246486663818, "bits_per_byte": 1.1058613093509384, "num_chars": 20}, {"sum_logits": -16.595252990722656, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -19.922849655151367, "logits_per_token": -4.148813247680664, "logits_per_char": -0.8297626495361328, "bits_per_byte": 1.1970944596015256, "num_chars": 20}, {"sum_logits": -16.267684936523438, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -24.720897674560547, "logits_per_token": -4.066921234130859, "logits_per_char": -0.6507073974609375, "bits_per_byte": 0.9387723353873327, "num_chars": 25}, {"sum_logits": -17.991079330444336, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -24.646177291870117, "logits_per_token": -4.497769832611084, "logits_per_char": -0.6425385475158691, "bits_per_byte": 0.9269871760818079, "num_chars": 28}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1083, "native_id": "Mercury_SC_408251", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.335988998413086, "logits_per_token_corr": -1.0419986248016357, "logits_per_char_corr": -0.26049965620040894, "bits_per_byte_corr": 0.3758215621538799}, "model_output": [{"sum_logits": -8.335988998413086, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -24.567811965942383, "logits_per_token": -1.0419986248016357, "logits_per_char": -0.26049965620040894, "bits_per_byte": 0.3758215621538799, "num_chars": 32}, {"sum_logits": -8.004185676574707, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -23.98727035522461, "logits_per_token": -1.0005232095718384, "logits_per_char": -0.23541722578160904, "bits_per_byte": 0.3396352641752088, "num_chars": 34}, {"sum_logits": -16.468475341796875, "num_tokens": 8, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -29.18558120727539, "logits_per_token": -2.0585594177246094, "logits_per_char": -0.42226859850761217, "bits_per_byte": 0.6092048129905024, "num_chars": 39}, {"sum_logits": -18.678159713745117, "num_tokens": 7, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -31.195873260498047, "logits_per_token": -2.668308530535017, "logits_per_char": -0.5493576386395622, "bits_per_byte": 0.792555540940337, "num_chars": 34}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1084, "native_id": "Mercury_7175893", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.976524353027344, "logits_per_token_corr": -1.5953048706054687, "logits_per_char_corr": -0.36256928877397016, "bits_per_byte_corr": 0.5230769148932211}, "model_output": [{"sum_logits": -11.207123756408691, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -23.582767486572266, "logits_per_token": -2.2414247512817385, "logits_per_char": -0.400254419871739, "bits_per_byte": 0.5774450666432617, "num_chars": 28}, {"sum_logits": -8.251826286315918, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -23.037141799926758, "logits_per_token": -1.6503652572631835, "logits_per_char": -0.35877505592677905, "bits_per_byte": 0.5176029939805965, "num_chars": 23}, {"sum_logits": -4.9649658203125, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -21.384746551513672, "logits_per_token": -0.9929931640625, "logits_per_char": -0.21586807914402173, "bits_per_byte": 0.31143180726753017, "num_chars": 23}, {"sum_logits": -7.976524353027344, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -18.701168060302734, "logits_per_token": -1.5953048706054687, "logits_per_char": -0.36256928877397016, "bits_per_byte": 0.5230769148932211, "num_chars": 22}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1085, "native_id": "Mercury_7202843", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -24.74189567565918, "logits_per_token_corr": -1.9032227442814753, "logits_per_char_corr": -0.42658440820102034, "bits_per_byte_corr": 0.6154312102326075}, "model_output": [{"sum_logits": -24.74189567565918, "num_tokens": 13, "num_tokens_all": 224, "is_greedy": false, "sum_logits_uncond": -42.099281311035156, "logits_per_token": -1.9032227442814753, "logits_per_char": -0.42658440820102034, "bits_per_byte": 0.6154312102326075, "num_chars": 58}, {"sum_logits": -23.712474822998047, "num_tokens": 10, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -34.08708572387695, "logits_per_token": -2.371247482299805, "logits_per_char": -0.49400989214579266, "bits_per_byte": 0.7127056215493388, "num_chars": 48}, {"sum_logits": -22.29759979248047, "num_tokens": 9, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -32.921913146972656, "logits_per_token": -2.4775110880533853, "logits_per_char": -0.46453332901000977, "bits_per_byte": 0.6701799300908639, "num_chars": 48}, {"sum_logits": -20.231861114501953, "num_tokens": 7, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -29.401594161987305, "logits_per_token": -2.890265873500279, "logits_per_char": -0.5187656696026142, "bits_per_byte": 0.7484206589196716, "num_chars": 39}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1086, "native_id": "Mercury_7159023", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.784807205200195, "logits_per_token_corr": -2.531645245022244, "logits_per_char_corr": -0.438169369330773, "bits_per_byte_corr": 0.632144776203405}, "model_output": [{"sum_logits": -29.464431762695312, "num_tokens": 8, "num_tokens_all": 230, "is_greedy": false, "sum_logits_uncond": -41.21370315551758, "logits_per_token": -3.683053970336914, "logits_per_char": -0.5892886352539063, "bits_per_byte": 0.8501637917336465, "num_chars": 50}, {"sum_logits": -13.217409133911133, "num_tokens": 11, "num_tokens_all": 233, "is_greedy": false, "sum_logits_uncond": -26.973880767822266, "logits_per_token": -1.2015826485373757, "logits_per_char": -0.25916488497864965, "bits_per_byte": 0.3738958943315251, "num_chars": 51}, {"sum_logits": -22.784807205200195, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -35.767189025878906, "logits_per_token": -2.531645245022244, "logits_per_char": -0.438169369330773, "bits_per_byte": 0.632144776203405, "num_chars": 52}, {"sum_logits": -40.60748291015625, "num_tokens": 9, "num_tokens_all": 231, "is_greedy": false, "sum_logits_uncond": -50.01948165893555, "logits_per_token": -4.511942545572917, "logits_per_char": -0.6656964411501025, "bits_per_byte": 0.9603969543853745, "num_chars": 61}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1087, "native_id": "MDSA_2008_8_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.4196138381958, "logits_per_token_corr": -1.4419613838195802, "logits_per_char_corr": -0.288392276763916, "bits_per_byte_corr": 0.41606210751827793}, "model_output": [{"sum_logits": -8.16794204711914, "num_tokens": 4, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -25.98276138305664, "logits_per_token": -2.041985511779785, "logits_per_char": -0.37127009305087005, "bits_per_byte": 0.535629522075259, "num_chars": 22}, {"sum_logits": -9.959698677062988, "num_tokens": 6, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -21.32292366027832, "logits_per_token": -1.659949779510498, "logits_per_char": -0.4330303772636082, "bits_per_byte": 0.6247307778329333, "num_chars": 23}, {"sum_logits": -9.507322311401367, "num_tokens": 5, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -25.307682037353516, "logits_per_token": -1.9014644622802734, "logits_per_char": -0.3395472254071917, "bits_per_byte": 0.4898630982429144, "num_chars": 28}, {"sum_logits": -14.4196138381958, "num_tokens": 10, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -31.30988121032715, "logits_per_token": -1.4419613838195802, "logits_per_char": -0.288392276763916, "bits_per_byte": 0.41606210751827793, "num_chars": 50}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1088, "native_id": "Mercury_7218348", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -7.860818862915039, "logits_per_token_corr": -1.5721637725830078, "logits_per_char_corr": -0.28074353081839426, "bits_per_byte_corr": 0.40502729967364626}, "model_output": [{"sum_logits": -7.860818862915039, "num_tokens": 5, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -23.18311882019043, "logits_per_token": -1.5721637725830078, "logits_per_char": -0.28074353081839426, "bits_per_byte": 0.40502729967364626, "num_chars": 28}, {"sum_logits": -13.978891372680664, "num_tokens": 7, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -22.907203674316406, "logits_per_token": -1.9969844818115234, "logits_per_char": -0.41114386390237245, "bits_per_byte": 0.5931552135443058, "num_chars": 34}, {"sum_logits": -13.107500076293945, "num_tokens": 8, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -26.47231674194336, "logits_per_token": -1.6384375095367432, "logits_per_char": -0.3360897455459986, "bits_per_byte": 0.4848750091931941, "num_chars": 39}, {"sum_logits": -16.908769607543945, "num_tokens": 6, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -30.22083282470703, "logits_per_token": -2.8181282679239907, "logits_per_char": -0.3932272001754406, "bits_per_byte": 0.5673069316361674, "num_chars": 43}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1089, "native_id": "Mercury_SC_406458", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -16.263490676879883, "logits_per_token_corr": -1.8070545196533203, "logits_per_char_corr": -0.306858314658111, "bits_per_byte_corr": 0.4427029688131199}, "model_output": [{"sum_logits": -15.910601615905762, "num_tokens": 7, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -32.11005401611328, "logits_per_token": -2.2729430879865373, "logits_per_char": -0.388063454046482, "bits_per_byte": 0.559857220703504, "num_chars": 41}, {"sum_logits": -12.48601245880127, "num_tokens": 8, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -27.91155433654785, "logits_per_token": -1.5607515573501587, "logits_per_char": -0.27143505345220154, "bits_per_byte": 0.3915980055392032, "num_chars": 46}, {"sum_logits": -20.341930389404297, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -34.485694885253906, "logits_per_token": -2.2602144877115884, "logits_per_char": -0.41514143651845503, "bits_per_byte": 0.5989224917331257, "num_chars": 49}, {"sum_logits": -16.263490676879883, "num_tokens": 9, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -36.10136413574219, "logits_per_token": -1.8070545196533203, "logits_per_char": -0.306858314658111, "bits_per_byte": 0.4427029688131199, "num_chars": 53}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1090, "native_id": "LEAP_2007_4_10280", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.200947761535645, "logits_per_token_corr": -3.0401895523071287, "logits_per_char_corr": -0.6609107722406802, "bits_per_byte_corr": 0.9534926935824095}, "model_output": [{"sum_logits": -15.200947761535645, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -29.028364181518555, "logits_per_token": -3.0401895523071287, "logits_per_char": -0.6609107722406802, "bits_per_byte": 0.9534926935824095, "num_chars": 23}, {"sum_logits": -22.319351196289062, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -37.90187454223633, "logits_per_token": -4.463870239257813, "logits_per_char": -1.0145159634676846, "bits_per_byte": 1.463637149398569, "num_chars": 22}, {"sum_logits": -14.732810974121094, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -26.465621948242188, "logits_per_token": -2.9465621948242187, "logits_per_char": -0.6405569988748302, "bits_per_byte": 0.9241284056840988, "num_chars": 23}, {"sum_logits": -16.42938995361328, "num_tokens": 5, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -25.90191650390625, "logits_per_token": -3.285877990722656, "logits_per_char": -0.6571755981445313, "bits_per_byte": 0.9481039764370347, "num_chars": 25}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1091, "native_id": "Mercury_7216965", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.990339756011963, "logits_per_token_corr": -2.4951698780059814, "logits_per_char_corr": -0.2772410975562202, "bits_per_byte_corr": 0.39997435657525954}, "model_output": [{"sum_logits": -5.434473991394043, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -18.060028076171875, "logits_per_token": -2.7172369956970215, "logits_per_char": -0.3019152217441135, "bits_per_byte": 0.4355715931794372, "num_chars": 18}, {"sum_logits": -11.921796798706055, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.949178695678711, "logits_per_token": -5.960898399353027, "logits_per_char": -1.4902245998382568, "bits_per_byte": 2.1499396399989377, "num_chars": 8}, {"sum_logits": -4.990339756011963, "num_tokens": 2, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -15.993900299072266, "logits_per_token": -2.4951698780059814, "logits_per_char": -0.2772410975562202, "bits_per_byte": 0.39997435657525954, "num_chars": 18}, {"sum_logits": -11.187235832214355, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -16.462472915649414, "logits_per_token": -11.187235832214355, "logits_per_char": -1.0170214392922141, "bits_per_byte": 1.4672517869456874, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1092, "native_id": "NYSEDREGENTS_2010_8_42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.6278076171875, "logits_per_token_corr": -1.6636775090144231, "logits_per_char_corr": -0.5545591696714743, "bits_per_byte_corr": 0.800059763965112}, "model_output": [{"sum_logits": -20.713027954101562, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -39.00968933105469, "logits_per_token": -1.593309842623197, "logits_per_char": -0.5311032808743991, "bits_per_byte": 0.7662200695179042, "num_chars": 39}, {"sum_logits": -21.6278076171875, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -39.87502670288086, "logits_per_token": -1.6636775090144231, "logits_per_char": -0.5545591696714743, "bits_per_byte": 0.800059763965112, "num_chars": 39}, {"sum_logits": -22.968114852905273, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -38.27481460571289, "logits_per_token": -1.766778065608098, "logits_per_char": -0.5889260218693659, "bits_per_byte": 0.8496406512020098, "num_chars": 39}, {"sum_logits": -23.575178146362305, "num_tokens": 13, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -38.574520111083984, "logits_per_token": -1.8134752420278697, "logits_per_char": -0.6044917473426232, "bits_per_byte": 0.8720972461501333, "num_chars": 39}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1093, "native_id": "LEAP__7_10351", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.093175888061523, "logits_per_token_corr": -1.0054809345918543, "logits_per_char_corr": -0.17093175888061524, "bits_per_byte_corr": 0.2466024008676688}, "model_output": [{"sum_logits": -20.09384536743164, "num_tokens": 12, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -39.23313522338867, "logits_per_token": -1.6744871139526367, "logits_per_char": -0.304452202536843, "bits_per_byte": 0.4392316827879413, "num_chars": 66}, {"sum_logits": -17.235092163085938, "num_tokens": 12, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -38.820858001708984, "logits_per_token": -1.4362576802571614, "logits_per_char": -0.24274777694487235, "bits_per_byte": 0.3502110139854392, "num_chars": 71}, {"sum_logits": -17.093175888061523, "num_tokens": 17, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -40.11820983886719, "logits_per_token": -1.0054809345918543, "logits_per_char": -0.17093175888061524, "bits_per_byte": 0.2466024008676688, "num_chars": 100}, {"sum_logits": -14.960383415222168, "num_tokens": 16, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -38.927490234375, "logits_per_token": -0.9350239634513855, "logits_per_char": -0.14960383415222167, "bits_per_byte": 0.2158327096295402, "num_chars": 100}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1094, "native_id": "Mercury_SC_400590", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.508993148803711, "logits_per_token_corr": -1.836331049601237, "logits_per_char_corr": -0.34431207180023193, "bits_per_byte_corr": 0.49673731850475616}, "model_output": [{"sum_logits": -3.7829833030700684, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": true, "sum_logits_uncond": -18.16351890563965, "logits_per_token": -1.2609944343566895, "logits_per_char": -0.27021309307643343, "bits_per_byte": 0.38983508936491845, "num_chars": 14}, {"sum_logits": -6.710761547088623, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -18.04732894897461, "logits_per_token": -2.2369205156962075, "logits_per_char": -0.47934011050633024, "bits_per_byte": 0.6915416003271472, "num_chars": 14}, {"sum_logits": -7.773913383483887, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -18.73611068725586, "logits_per_token": -2.5913044611612954, "logits_per_char": -0.5182608922322591, "bits_per_byte": 0.7476924191107069, "num_chars": 15}, {"sum_logits": -5.508993148803711, "num_tokens": 3, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -18.939292907714844, "logits_per_token": -1.836331049601237, "logits_per_char": -0.34431207180023193, "bits_per_byte": 0.49673731850475616, "num_chars": 16}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1095, "native_id": "Mercury_7086608", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.383981227874756, "logits_per_token_corr": -2.1279937426249185, "logits_per_char_corr": -0.31919906139373777, "bits_per_byte_corr": 0.4605069029294881}, "model_output": [{"sum_logits": -5.7028727531433105, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -24.37813377380371, "logits_per_token": -1.9009575843811035, "logits_per_char": -0.28514363765716555, "bits_per_byte": 0.41137531198932775, "num_chars": 20}, {"sum_logits": -6.383981227874756, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -21.48424530029297, "logits_per_token": -2.1279937426249185, "logits_per_char": -0.31919906139373777, "bits_per_byte": 0.4605069029294881, "num_chars": 20}, {"sum_logits": -4.581413269042969, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -19.661182403564453, "logits_per_token": -2.2907066345214844, "logits_per_char": -0.3524164053109976, "bits_per_byte": 0.5084294002704565, "num_chars": 13}, {"sum_logits": -3.059199333190918, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -16.336524963378906, "logits_per_token": -1.529599666595459, "logits_per_char": -0.3399110370212131, "bits_per_byte": 0.4903879674542813, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1096, "native_id": "Mercury_7187863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.617658615112305, "logits_per_token_corr": -3.723531723022461, "logits_per_char_corr": -0.5818018317222595, "bits_per_byte_corr": 0.839362617406422}, "model_output": [{"sum_logits": -15.19005298614502, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -19.40034294128418, "logits_per_token": -7.59502649307251, "logits_per_char": -1.519005298614502, "bits_per_byte": 2.191461411396775, "num_chars": 10}, {"sum_logits": -13.645292282104492, "num_tokens": 2, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -20.241382598876953, "logits_per_token": -6.822646141052246, "logits_per_char": -1.3645292282104493, "bits_per_byte": 1.9685995506886742, "num_chars": 10}, {"sum_logits": -16.158260345458984, "num_tokens": 4, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -27.738361358642578, "logits_per_token": -4.039565086364746, "logits_per_char": -0.7025330584982167, "bits_per_byte": 1.0135409595566613, "num_chars": 23}, {"sum_logits": -18.617658615112305, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -28.761381149291992, "logits_per_token": -3.723531723022461, "logits_per_char": -0.5818018317222595, "bits_per_byte": 0.839362617406422, "num_chars": 32}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1097, "native_id": "Mercury_7120873", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.331157684326172, "logits_per_token_corr": -2.1901653834751675, "logits_per_char_corr": -0.4034515180085835, "bits_per_byte_corr": 0.5820575042705258}, "model_output": [{"sum_logits": -12.895368576049805, "num_tokens": 5, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -24.054237365722656, "logits_per_token": -2.5790737152099608, "logits_per_char": -0.5158147430419922, "bits_per_byte": 0.7441633718046317, "num_chars": 25}, {"sum_logits": -12.852773666381836, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -26.188119888305664, "logits_per_token": -1.8361105237688338, "logits_per_char": -0.36722210475376676, "bits_per_byte": 0.5297895094334474, "num_chars": 35}, {"sum_logits": -15.331157684326172, "num_tokens": 7, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -26.111299514770508, "logits_per_token": -2.1901653834751675, "logits_per_char": -0.4034515180085835, "bits_per_byte": 0.5820575042705258, "num_chars": 38}, {"sum_logits": -20.14139175415039, "num_tokens": 8, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -37.503570556640625, "logits_per_token": -2.517673969268799, "logits_per_char": -0.5164459424141126, "bits_per_byte": 0.7450740000086027, "num_chars": 39}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1098, "native_id": "Mercury_184730", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.645902633666992, "logits_per_token_corr": -2.046135902404785, "logits_per_char_corr": -0.4546968672010634, "bits_per_byte_corr": 0.655988915419193}, "model_output": [{"sum_logits": -30.86736488342285, "num_tokens": 12, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -43.59109115600586, "logits_per_token": -2.5722804069519043, "logits_per_char": -0.49786072392617503, "bits_per_byte": 0.7182611974621981, "num_chars": 62}, {"sum_logits": -28.645902633666992, "num_tokens": 14, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -45.02243423461914, "logits_per_token": -2.046135902404785, "logits_per_char": -0.4546968672010634, "bits_per_byte": 0.655988915419193, "num_chars": 63}, {"sum_logits": -27.98131561279297, "num_tokens": 13, "num_tokens_all": 202, "is_greedy": false, "sum_logits_uncond": -40.069740295410156, "logits_per_token": -2.1524088932917667, "logits_per_char": -0.47425958665750795, "bits_per_byte": 0.6842119537653278, "num_chars": 59}, {"sum_logits": -25.790449142456055, "num_tokens": 14, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -43.37874984741211, "logits_per_token": -1.8421749387468611, "logits_per_char": -0.40937220861041357, "bits_per_byte": 0.5905992552404302, "num_chars": 63}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1099, "native_id": "Mercury_SC_401265", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -20.589801788330078, "logits_per_token_corr": -2.5737252235412598, "logits_per_char_corr": -0.4202000364965322, "bits_per_byte_corr": 0.606220508835344}, "model_output": [{"sum_logits": -22.07494354248047, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -37.310386657714844, "logits_per_token": -2.7593679428100586, "logits_per_char": -0.5255938938685826, "bits_per_byte": 0.758271704206269, "num_chars": 42}, {"sum_logits": -24.26567840576172, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.028282165527344, "logits_per_token": -3.033209800720215, "logits_per_char": -0.539237297905816, "bits_per_byte": 0.7779549755516443, "num_chars": 45}, {"sum_logits": -24.256046295166016, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -42.16185760498047, "logits_per_token": -3.465149470738002, "logits_per_char": -0.5273053542427395, "bits_per_byte": 0.7607408196007449, "num_chars": 46}, {"sum_logits": -20.589801788330078, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -34.73639678955078, "logits_per_token": -2.5737252235412598, "logits_per_char": -0.4202000364965322, "bits_per_byte": 0.606220508835344, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1100, "native_id": "OHAT_2009_8_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -15.290292739868164, "logits_per_token_corr": -1.9112865924835205, "logits_per_char_corr": -0.3822573184967041, "bits_per_byte_corr": 0.5514807377391042}, "model_output": [{"sum_logits": -18.250804901123047, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.682645797729492, "logits_per_token": -3.0418008168538413, "logits_per_char": -0.5703376531600952, "bits_per_byte": 0.8228233038469102, "num_chars": 32}, {"sum_logits": -17.755521774291992, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -30.99488067626953, "logits_per_token": -2.219440221786499, "logits_per_char": -0.45526978908441007, "bits_per_byte": 0.6568154669791146, "num_chars": 39}, {"sum_logits": -19.62640380859375, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -32.82032012939453, "logits_per_token": -2.4533004760742188, "logits_per_char": -0.47869277581935976, "bits_per_byte": 0.6906076937844587, "num_chars": 41}, {"sum_logits": -15.290292739868164, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -29.147628784179688, "logits_per_token": -1.9112865924835205, "logits_per_char": -0.3822573184967041, "bits_per_byte": 0.5514807377391042, "num_chars": 40}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1101, "native_id": "Mercury_406639", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.624551773071289, "logits_per_token_corr": -2.089221681867327, "logits_per_char_corr": -0.3848566256071392, "bits_per_byte_corr": 0.555230745217079}, "model_output": [{"sum_logits": -16.773704528808594, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -28.308874130249023, "logits_per_token": -2.3962435041155135, "logits_per_char": -0.44141327707391037, "bits_per_byte": 0.636824745817534, "num_chars": 38}, {"sum_logits": -14.624551773071289, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -29.42525863647461, "logits_per_token": -2.089221681867327, "logits_per_char": -0.3848566256071392, "bits_per_byte": 0.555230745217079, "num_chars": 38}, {"sum_logits": -23.17707061767578, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -32.49698257446289, "logits_per_token": -2.8971338272094727, "logits_per_char": -0.5652944053091654, "bits_per_byte": 0.8155474351823946, "num_chars": 41}, {"sum_logits": -19.698604583740234, "num_tokens": 8, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -30.462020874023438, "logits_per_token": -2.4623255729675293, "logits_per_char": -0.46901439485095797, "bits_per_byte": 0.6766447415575013, "num_chars": 42}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1102, "native_id": "Mercury_7008610", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.98065948486328, "logits_per_token_corr": -3.996776580810547, "logits_per_char_corr": -0.8881725735134549, "bits_per_byte_corr": 1.2813621672623703}, "model_output": [{"sum_logits": -17.966623306274414, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -23.68572998046875, "logits_per_token": -3.593324661254883, "logits_per_char": -0.7486093044281006, "bits_per_byte": 1.0800149310625329, "num_chars": 24}, {"sum_logits": -29.237157821655273, "num_tokens": 5, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -36.5397834777832, "logits_per_token": -5.847431564331055, "logits_per_char": -1.0081778559191474, "bits_per_byte": 1.4544931930696667, "num_chars": 29}, {"sum_logits": -25.70850944519043, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -39.52110290527344, "logits_per_token": -4.284751574198405, "logits_per_char": -0.856950314839681, "bits_per_byte": 1.2363179695083317, "num_chars": 30}, {"sum_logits": -23.98065948486328, "num_tokens": 6, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -27.754772186279297, "logits_per_token": -3.996776580810547, "logits_per_char": -0.8881725735134549, "bits_per_byte": 1.2813621672623703, "num_chars": 27}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1103, "native_id": "MCAS_2009_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.972858428955078, "logits_per_token_corr": -3.9932146072387695, "logits_per_char_corr": -0.8873810238308377, "bits_per_byte_corr": 1.2802202024606404}, "model_output": [{"sum_logits": -15.972858428955078, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -23.901039123535156, "logits_per_token": -3.9932146072387695, "logits_per_char": -0.8873810238308377, "bits_per_byte": 1.2802202024606404, "num_chars": 18}, {"sum_logits": -7.594676971435547, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -26.173259735107422, "logits_per_token": -1.5189353942871093, "logits_per_char": -0.34521258961070667, "bits_per_byte": 0.4980364910841612, "num_chars": 22}, {"sum_logits": -18.33863639831543, "num_tokens": 7, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -40.58197784423828, "logits_per_token": -2.6198051997593472, "logits_per_char": -0.6549512999398368, "bits_per_byte": 0.9448949924476614, "num_chars": 28}, {"sum_logits": -17.692895889282227, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -27.64406394958496, "logits_per_token": -3.5385791778564455, "logits_per_char": -0.6100998582511112, "bits_per_byte": 0.88018803994657, "num_chars": 29}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1104, "native_id": "MCAS_2005_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -31.047527313232422, "logits_per_token_corr": -3.4497252570258246, "logits_per_char_corr": -0.6336230063924984, "bits_per_byte_corr": 0.91412476911627}, "model_output": [{"sum_logits": -18.134668350219727, "num_tokens": 7, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -30.54320526123047, "logits_per_token": -2.5906669071742465, "logits_per_char": -0.4772281144794665, "bits_per_byte": 0.6884946341328114, "num_chars": 38}, {"sum_logits": -17.648387908935547, "num_tokens": 8, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.209510803222656, "logits_per_token": -2.2060484886169434, "logits_per_char": -0.38366060671599017, "bits_per_byte": 0.5535052546940076, "num_chars": 46}, {"sum_logits": -31.047527313232422, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -48.31249237060547, "logits_per_token": -3.4497252570258246, "logits_per_char": -0.6336230063924984, "bits_per_byte": 0.91412476911627, "num_chars": 49}, {"sum_logits": -39.58717346191406, "num_tokens": 9, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -47.34471130371094, "logits_per_token": -4.3985748291015625, "logits_per_char": -0.7330958048502604, "bits_per_byte": 1.0576336821547339, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1105, "native_id": "ACTAAP_2008_7_4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -3.0706777572631836, "logits_per_token_corr": -1.5353388786315918, "logits_per_char_corr": -0.38383471965789795, "bits_per_byte_corr": 0.5537564465718527}, "model_output": [{"sum_logits": -5.572707176208496, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -10.923490524291992, "logits_per_token": -2.786353588104248, "logits_per_char": -0.696588397026062, "bits_per_byte": 1.004964625931014, "num_chars": 8}, {"sum_logits": -3.0706777572631836, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -11.838747024536133, "logits_per_token": -1.5353388786315918, "logits_per_char": -0.38383471965789795, "bits_per_byte": 0.5537564465718527, "num_chars": 8}, {"sum_logits": -5.05731725692749, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -12.53227710723877, "logits_per_token": -2.528658628463745, "logits_per_char": -0.6321646571159363, "bits_per_byte": 0.9120208158470885, "num_chars": 8}, {"sum_logits": -6.09921407699585, "num_tokens": 2, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -14.32619571685791, "logits_per_token": -3.049607038497925, "logits_per_char": -0.6776904529995389, "bits_per_byte": 0.9777006558009324, "num_chars": 9}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1106, "native_id": "NYSEDREGENTS_2008_4_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.703223705291748, "logits_per_token_corr": -1.703223705291748, "logits_per_char_corr": -0.1548385186628862, "bits_per_byte_corr": 0.2233847630136996}, "model_output": [{"sum_logits": -5.290889263153076, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -11.467022895812988, "logits_per_token": -5.290889263153076, "logits_per_char": -0.8818148771921793, "bits_per_byte": 1.2721899503081815, "num_chars": 6}, {"sum_logits": -3.2751574516296387, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -12.272923469543457, "logits_per_token": -3.2751574516296387, "logits_per_char": -0.6550314903259278, "bits_per_byte": 0.945010682720002, "num_chars": 5}, {"sum_logits": -8.003837585449219, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -12.68324089050293, "logits_per_token": -8.003837585449219, "logits_per_char": -1.3339729309082031, "bits_per_byte": 1.924516132102763, "num_chars": 6}, {"sum_logits": -1.703223705291748, "num_tokens": 1, "num_tokens_all": 187, "is_greedy": true, "sum_logits_uncond": -13.95900821685791, "logits_per_token": -1.703223705291748, "logits_per_char": -0.1548385186628862, "bits_per_byte": 0.2233847630136996, "num_chars": 11}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1107, "native_id": "Mercury_SC_416181", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -2.6510050296783447, "logits_per_token_corr": -1.3255025148391724, "logits_per_char_corr": -0.17673366864522297, "bits_per_byte_corr": 0.25497278731275963}, "model_output": [{"sum_logits": -7.1910552978515625, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -16.89964485168457, "logits_per_token": -3.5955276489257812, "logits_per_char": -0.4230032528147978, "bits_per_byte": 0.6102646951162477, "num_chars": 17}, {"sum_logits": -2.6510050296783447, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": true, "sum_logits_uncond": -14.851265907287598, "logits_per_token": -1.3255025148391724, "logits_per_char": -0.17673366864522297, "bits_per_byte": 0.25497278731275963, "num_chars": 15}, {"sum_logits": -7.1125617027282715, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -17.372642517089844, "logits_per_token": -3.5562808513641357, "logits_per_char": -0.6465965184298429, "bits_per_byte": 0.9328415905954738, "num_chars": 11}, {"sum_logits": -8.711421012878418, "num_tokens": 2, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -15.82663345336914, "logits_per_token": -4.355710506439209, "logits_per_char": -0.6701093086829553, "bits_per_byte": 0.9667633764911256, "num_chars": 13}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1108, "native_id": "NYSEDREGENTS_2010_4_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.461014747619629, "logits_per_token_corr": -1.4326268434524536, "logits_per_char_corr": -0.27953694506389337, "bits_per_byte_corr": 0.4032865643892193}, "model_output": [{"sum_logits": -21.287689208984375, "num_tokens": 8, "num_tokens_all": 240, "is_greedy": false, "sum_logits_uncond": -37.108299255371094, "logits_per_token": -2.660961151123047, "logits_per_char": -0.6082196916852679, "bits_per_byte": 0.8774755329659807, "num_chars": 35}, {"sum_logits": -11.461014747619629, "num_tokens": 8, "num_tokens_all": 240, "is_greedy": false, "sum_logits_uncond": -32.6336555480957, "logits_per_token": -1.4326268434524536, "logits_per_char": -0.27953694506389337, "bits_per_byte": 0.4032865643892193, "num_chars": 41}, {"sum_logits": -24.635814666748047, "num_tokens": 7, "num_tokens_all": 239, "is_greedy": false, "sum_logits_uncond": -39.40332794189453, "logits_per_token": -3.519402095249721, "logits_per_char": -0.5729259224825127, "bits_per_byte": 0.8265573871628497, "num_chars": 43}, {"sum_logits": -14.32738971710205, "num_tokens": 9, "num_tokens_all": 241, "is_greedy": false, "sum_logits_uncond": -35.18473434448242, "logits_per_token": -1.5919321907891169, "logits_per_char": -0.31146499385004456, "bits_per_byte": 0.4493490020382936, "num_chars": 46}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1109, "native_id": "Mercury_7025060", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.031161308288574, "logits_per_token_corr": -3.010387102762858, "logits_per_char_corr": -0.694704716022198, "bits_per_byte_corr": 1.0022470486881208}, "model_output": [{"sum_logits": -9.035421371459961, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -20.7580623626709, "logits_per_token": -3.011807123819987, "logits_per_char": -0.6950324131892278, "bits_per_byte": 1.0027198157659083, "num_chars": 13}, {"sum_logits": -8.050276756286621, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.57311248779297, "logits_per_token": -2.6834255854288735, "logits_per_char": -0.6192520581758939, "bits_per_byte": 0.8933918733912879, "num_chars": 13}, {"sum_logits": -8.663957595825195, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -21.660045623779297, "logits_per_token": -2.887985865275065, "logits_per_char": -0.6664582766019381, "bits_per_byte": 0.961496050613712, "num_chars": 13}, {"sum_logits": -9.031161308288574, "num_tokens": 3, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -19.93828010559082, "logits_per_token": -3.010387102762858, "logits_per_char": -0.694704716022198, "bits_per_byte": 1.0022470486881208, "num_chars": 13}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1110, "native_id": "Mercury_SC_402103", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -26.09102439880371, "logits_per_token_corr": -2.89900271097819, "logits_per_char_corr": -0.6212148666381836, "bits_per_byte_corr": 0.8962236074260501}, "model_output": [{"sum_logits": -30.29123306274414, "num_tokens": 8, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -34.26217269897461, "logits_per_token": -3.7864041328430176, "logits_per_char": -0.7044472805289335, "bits_per_byte": 1.0163025981875389, "num_chars": 43}, {"sum_logits": -20.107460021972656, "num_tokens": 7, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -24.328840255737305, "logits_per_token": -2.8724942888532365, "logits_per_char": -0.6702486673990885, "bits_per_byte": 0.9669644286197959, "num_chars": 30}, {"sum_logits": -20.18512725830078, "num_tokens": 6, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -22.528493881225586, "logits_per_token": -3.3641878763834634, "logits_per_char": -0.5455439799540752, "bits_per_byte": 0.7870535944671379, "num_chars": 37}, {"sum_logits": -26.09102439880371, "num_tokens": 9, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.69129943847656, "logits_per_token": -2.89900271097819, "logits_per_char": -0.6212148666381836, "bits_per_byte": 0.8962236074260501, "num_chars": 42}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1111, "native_id": "VASoL_2009_5_37", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -22.380901336669922, "logits_per_token_corr": -2.7976126670837402, "logits_per_char_corr": -0.5328786032540458, "bits_per_byte_corr": 0.7687813183110016}, "model_output": [{"sum_logits": -25.1787052154541, "num_tokens": 11, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -41.655967712402344, "logits_per_token": -2.2889732014049184, "logits_per_char": -0.41276565926973935, "bits_per_byte": 0.5954949696781444, "num_chars": 61}, {"sum_logits": -22.790075302124023, "num_tokens": 11, "num_tokens_all": 220, "is_greedy": false, "sum_logits_uncond": -36.78449630737305, "logits_per_token": -2.0718250274658203, "logits_per_char": -0.3998258824934039, "bits_per_byte": 0.5768268178927016, "num_chars": 57}, {"sum_logits": -22.380901336669922, "num_tokens": 8, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -38.87919616699219, "logits_per_token": -2.7976126670837402, "logits_per_char": -0.5328786032540458, "bits_per_byte": 0.7687813183110016, "num_chars": 42}, {"sum_logits": -22.921154022216797, "num_tokens": 7, "num_tokens_all": 216, "is_greedy": false, "sum_logits_uncond": -31.139673233032227, "logits_per_token": -3.2744505746023997, "logits_per_char": -0.6031882637425473, "bits_per_byte": 0.8702167168244224, "num_chars": 38}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1112, "native_id": "Mercury_SC_402981", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.330357551574707, "logits_per_token_corr": -3.0660715103149414, "logits_per_char_corr": -0.5286330190198175, "bits_per_byte_corr": 0.7626562349905998}, "model_output": [{"sum_logits": -20.752824783325195, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -28.12794303894043, "logits_per_token": -4.150564956665039, "logits_per_char": -0.9433102174238726, "bits_per_byte": 1.3609089726982886, "num_chars": 22}, {"sum_logits": -13.11496639251709, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -26.345355987548828, "logits_per_token": -4.371655464172363, "logits_per_char": -0.5702159301094387, "bits_per_byte": 0.822647694605366, "num_chars": 23}, {"sum_logits": -18.449283599853516, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -25.2523193359375, "logits_per_token": -4.612320899963379, "logits_per_char": -0.7379713439941407, "bits_per_byte": 1.064667598299275, "num_chars": 25}, {"sum_logits": -15.330357551574707, "num_tokens": 5, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -21.97138786315918, "logits_per_token": -3.0660715103149414, "logits_per_char": -0.5286330190198175, "bits_per_byte": 0.7626562349905998, "num_chars": 29}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1113, "native_id": "NYSEDREGENTS_2008_8_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -5.635031700134277, "logits_per_token_corr": -5.635031700134277, "logits_per_char_corr": -1.878343900044759, "bits_per_byte_corr": 2.7098774296805557}, "model_output": [{"sum_logits": -3.998842239379883, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -9.175007820129395, "logits_per_token": -3.998842239379883, "logits_per_char": -1.3329474131266277, "bits_per_byte": 1.9230366226849398, "num_chars": 3}, {"sum_logits": -5.635031700134277, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -10.036802291870117, "logits_per_token": -5.635031700134277, "logits_per_char": -1.878343900044759, "bits_per_byte": 2.7098774296805557, "num_chars": 3}, {"sum_logits": -3.729867935180664, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -10.847437858581543, "logits_per_token": -3.729867935180664, "logits_per_char": -1.243289311726888, "bits_per_byte": 1.7936873244199225, "num_chars": 3}, {"sum_logits": -8.675749778747559, "num_tokens": 1, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -12.086806297302246, "logits_per_token": -8.675749778747559, "logits_per_char": -2.1689374446868896, "bits_per_byte": 3.1291152954504042, "num_chars": 4}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1114, "native_id": "MCAS_1998_4_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.597947597503662, "logits_per_token_corr": -3.298973798751831, "logits_per_char_corr": -0.4398631731669108, "bits_per_byte_corr": 0.6345884185980415}, "model_output": [{"sum_logits": -11.917024612426758, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -18.647504806518555, "logits_per_token": -5.958512306213379, "logits_per_char": -0.7944683074951172, "bits_per_byte": 1.1461754873674772, "num_chars": 15}, {"sum_logits": -6.597947597503662, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -18.351346969604492, "logits_per_token": -3.298973798751831, "logits_per_char": -0.4398631731669108, "bits_per_byte": 0.6345884185980415, "num_chars": 15}, {"sum_logits": -6.615270137786865, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -17.288190841674805, "logits_per_token": -3.3076350688934326, "logits_per_char": -0.4134543836116791, "bits_per_byte": 0.596488588870801, "num_chars": 16}, {"sum_logits": -7.619212627410889, "num_tokens": 2, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -15.513108253479004, "logits_per_token": -3.8096063137054443, "logits_per_char": -0.42328959041171604, "bits_per_byte": 0.610677792947342, "num_chars": 18}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1115, "native_id": "MDSA_2008_8_20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.036543846130371, "logits_per_token_corr": -3.007308769226074, "logits_per_char_corr": -0.6014617538452148, "bits_per_byte_corr": 0.8677258895574933}, "model_output": [{"sum_logits": -16.91905975341797, "num_tokens": 4, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -33.94688415527344, "logits_per_token": -4.229764938354492, "logits_per_char": -0.8056695120675224, "bits_per_byte": 1.1623354096560805, "num_chars": 21}, {"sum_logits": -18.310514450073242, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -31.4737548828125, "logits_per_token": -3.6621028900146486, "logits_per_char": -0.7961093239162279, "bits_per_byte": 1.1485429736202326, "num_chars": 23}, {"sum_logits": -15.036543846130371, "num_tokens": 5, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -29.887123107910156, "logits_per_token": -3.007308769226074, "logits_per_char": -0.6014617538452148, "bits_per_byte": 0.8677258895574933, "num_chars": 25}, {"sum_logits": -23.198471069335938, "num_tokens": 7, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -40.311309814453125, "logits_per_token": -3.3140672956194197, "logits_per_char": -0.7999472782529634, "bits_per_byte": 1.154079971309003, "num_chars": 29}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1116, "native_id": "Mercury_SC_400134", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -26.15680503845215, "logits_per_token_corr": -2.9063116709391275, "logits_per_char_corr": -0.6883369746961092, "bits_per_byte_corr": 0.9930603398553021}, "model_output": [{"sum_logits": -9.96367073059082, "num_tokens": 3, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -16.963253021240234, "logits_per_token": -3.321223576863607, "logits_per_char": -0.7116907664707729, "bits_per_byte": 1.026752739434587, "num_chars": 14}, {"sum_logits": -2.9661009311676025, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -17.272117614746094, "logits_per_token": -0.7415252327919006, "logits_per_char": -0.15611057532461067, "bits_per_byte": 0.22521995285130061, "num_chars": 19}, {"sum_logits": -13.978618621826172, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -18.960657119750977, "logits_per_token": -2.795723724365234, "logits_per_char": -0.6077660270359205, "bits_per_byte": 0.8768210332261401, "num_chars": 23}, {"sum_logits": -26.15680503845215, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -33.779296875, "logits_per_token": -2.9063116709391275, "logits_per_char": -0.6883369746961092, "bits_per_byte": 0.9930603398553021, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1117, "native_id": "Mercury_SC_LBS10265", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.789089202880859, "logits_per_token_corr": -2.3945446014404297, "logits_per_char_corr": -0.43537174571644177, "bits_per_byte_corr": 0.6281086584887327}, "model_output": [{"sum_logits": -3.908393383026123, "num_tokens": 2, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -12.646249771118164, "logits_per_token": -1.9541966915130615, "logits_per_char": -0.3908393383026123, "bits_per_byte": 0.5638619751539078, "num_chars": 10}, {"sum_logits": -2.3819167613983154, "num_tokens": 2, "num_tokens_all": 225, "is_greedy": true, "sum_logits_uncond": -13.310273170471191, "logits_per_token": -1.1909583806991577, "logits_per_char": -0.23819167613983155, "bits_per_byte": 0.3436379499482119, "num_chars": 10}, {"sum_logits": -4.789089202880859, "num_tokens": 2, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -13.507715225219727, "logits_per_token": -2.3945446014404297, "logits_per_char": -0.43537174571644177, "bits_per_byte": 0.6281086584887327, "num_chars": 11}, {"sum_logits": -4.641540050506592, "num_tokens": 2, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -11.962862014770508, "logits_per_token": -2.320770025253296, "logits_per_char": -0.4219581864096902, "bits_per_byte": 0.6087569829961982, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1118, "native_id": "Mercury_7188580", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.710100173950195, "logits_per_token_corr": -3.5887625217437744, "logits_per_char_corr": -0.6380022260877821, "bits_per_byte_corr": 0.9204426476536238}, "model_output": [{"sum_logits": -28.710100173950195, "num_tokens": 8, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -36.85072326660156, "logits_per_token": -3.5887625217437744, "logits_per_char": -0.6380022260877821, "bits_per_byte": 0.9204426476536238, "num_chars": 45}, {"sum_logits": -26.86630630493164, "num_tokens": 12, "num_tokens_all": 208, "is_greedy": false, "sum_logits_uncond": -39.7189826965332, "logits_per_token": -2.2388588587443032, "logits_per_char": -0.48847829645330254, "bits_per_byte": 0.7047252158755748, "num_chars": 55}, {"sum_logits": -40.985897064208984, "num_tokens": 10, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -53.32172393798828, "logits_per_token": -4.098589706420898, "logits_per_char": -0.773318812532245, "bits_per_byte": 1.1156632158672133, "num_chars": 53}, {"sum_logits": -36.36115264892578, "num_tokens": 11, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -51.910125732421875, "logits_per_token": -3.3055593317205254, "logits_per_char": -0.6060192108154296, "bits_per_byte": 0.8743009101274918, "num_chars": 60}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1119, "native_id": "Mercury_402348", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -8.000553131103516, "logits_per_token_corr": -2.666851043701172, "logits_per_char_corr": -2.000138282775879, "bits_per_byte_corr": 2.8855895816550006}, "model_output": [{"sum_logits": -8.000553131103516, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.815763473510742, "logits_per_token": -2.666851043701172, "logits_per_char": -2.000138282775879, "bits_per_byte": 2.8855895816550006, "num_chars": 4}, {"sum_logits": -6.64129638671875, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.223112106323242, "logits_per_token": -2.2137654622395835, "logits_per_char": -1.6603240966796875, "bits_per_byte": 2.3953413405499537, "num_chars": 4}, {"sum_logits": -5.81409215927124, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.232734680175781, "logits_per_token": -1.93803071975708, "logits_per_char": -1.45352303981781, "bits_per_byte": 2.0969904813645126, "num_chars": 4}, {"sum_logits": -10.445117950439453, "num_tokens": 3, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -13.510377883911133, "logits_per_token": -3.481705983479818, "logits_per_char": -1.740852991739909, "bits_per_byte": 2.5115199781016866, "num_chars": 6}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1120, "native_id": "Mercury_7030555", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -6.37088680267334, "logits_per_token_corr": -1.274177360534668, "logits_per_char_corr": -0.23595877046938296, "bits_per_byte_corr": 0.34041654801068055}, "model_output": [{"sum_logits": -18.89730453491211, "num_tokens": 6, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -32.393402099609375, "logits_per_token": -3.149550755818685, "logits_per_char": -0.7558921813964844, "bits_per_byte": 1.0905219015482324, "num_chars": 25}, {"sum_logits": -6.235626220703125, "num_tokens": 5, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -26.533023834228516, "logits_per_token": -1.247125244140625, "logits_per_char": -0.249425048828125, "bits_per_byte": 0.359844281018082, "num_chars": 25}, {"sum_logits": -6.37088680267334, "num_tokens": 5, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -26.997739791870117, "logits_per_token": -1.274177360534668, "logits_per_char": -0.23595877046938296, "bits_per_byte": 0.34041654801068055, "num_chars": 27}, {"sum_logits": -14.125580787658691, "num_tokens": 6, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -34.37993240356445, "logits_per_token": -2.3542634646097818, "logits_per_char": -0.45566389637608684, "bits_per_byte": 0.6573840436143953, "num_chars": 31}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1121, "native_id": "Mercury_SC_415453", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.22806167602539, "logits_per_token_corr": -2.204676946004232, "logits_per_char_corr": -0.47243077414376394, "bits_per_byte_corr": 0.6815735350210318}, "model_output": [{"sum_logits": -13.22806167602539, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -30.241100311279297, "logits_per_token": -2.204676946004232, "logits_per_char": -0.47243077414376394, "bits_per_byte": 0.6815735350210318, "num_chars": 28}, {"sum_logits": -12.409387588500977, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.385251998901367, "logits_per_token": -1.7727696555001395, "logits_per_char": -0.37604204813639325, "bits_per_byte": 0.5425139980124931, "num_chars": 33}, {"sum_logits": -10.03386116027832, "num_tokens": 5, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -26.521827697753906, "logits_per_token": -2.0067722320556642, "logits_per_char": -0.3459952124233904, "bits_per_byte": 0.4991655771349074, "num_chars": 29}, {"sum_logits": -10.832189559936523, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -25.252649307250977, "logits_per_token": -1.8053649266560872, "logits_per_char": -0.3185938105863683, "bits_per_byte": 0.4596337105912015, "num_chars": 34}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1122, "native_id": "Mercury_7074848", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.09836769104004, "logits_per_token_corr": -4.02459192276001, "logits_per_char_corr": -0.8472825100547389, "bits_per_byte_corr": 1.2223702754888033}, "model_output": [{"sum_logits": -16.09836769104004, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -25.12201499938965, "logits_per_token": -4.02459192276001, "logits_per_char": -0.8472825100547389, "bits_per_byte": 1.2223702754888033, "num_chars": 19}, {"sum_logits": -6.274571418762207, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -20.08399200439453, "logits_per_token": -1.2549142837524414, "logits_per_char": -0.31372857093811035, "bits_per_byte": 0.45261465347791835, "num_chars": 20}, {"sum_logits": -10.650297164916992, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.062213897705078, "logits_per_token": -2.662574291229248, "logits_per_char": -0.7100198109944661, "bits_per_byte": 1.0243420602553712, "num_chars": 15}, {"sum_logits": -17.992698669433594, "num_tokens": 5, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -26.656190872192383, "logits_per_token": -3.5985397338867187, "logits_per_char": -0.856795174734933, "bits_per_byte": 1.236094149648569, "num_chars": 21}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1123, "native_id": "Mercury_SC_400582", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -11.343729019165039, "logits_per_token_corr": -3.781243006388346, "logits_per_char_corr": -0.9453107515970866, "bits_per_byte_corr": 1.3637951334291154}, "model_output": [{"sum_logits": -11.343729019165039, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -21.939722061157227, "logits_per_token": -3.781243006388346, "logits_per_char": -0.9453107515970866, "bits_per_byte": 1.3637951334291154, "num_chars": 12}, {"sum_logits": -8.237798690795898, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -17.993003845214844, "logits_per_token": -4.118899345397949, "logits_per_char": -0.588414192199707, "bits_per_byte": 0.8489022370758126, "num_chars": 14}, {"sum_logits": -11.477165222167969, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -19.25745964050293, "logits_per_token": -3.8257217407226562, "logits_per_char": -0.9564304351806641, "bits_per_byte": 1.3798374457914084, "num_chars": 12}, {"sum_logits": -11.788009643554688, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -20.34048080444336, "logits_per_token": -3.9293365478515625, "logits_per_char": -0.8420006888253349, "bits_per_byte": 1.2147502181942746, "num_chars": 14}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1124, "native_id": "Mercury_SC_401168", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.4875030517578125, "logits_per_token_corr": -2.2437515258789062, "logits_per_char_corr": -0.5609378814697266, "bits_per_byte_corr": 0.8092622998437171}, "model_output": [{"sum_logits": -6.076318264007568, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -17.0118465423584, "logits_per_token": -3.038159132003784, "logits_per_char": -1.012719710667928, "bits_per_byte": 1.4610457043921752, "num_chars": 6}, {"sum_logits": -7.146367073059082, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.19525909423828, "logits_per_token": -3.573183536529541, "logits_per_char": -0.7940407858954536, "bits_per_byte": 1.145558704075769, "num_chars": 9}, {"sum_logits": -4.4875030517578125, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.369293212890625, "logits_per_token": -2.2437515258789062, "logits_per_char": -0.5609378814697266, "bits_per_byte": 0.8092622998437171, "num_chars": 8}, {"sum_logits": -9.162513732910156, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -18.686433792114258, "logits_per_token": -4.581256866455078, "logits_per_char": -1.5270856221516926, "bits_per_byte": 2.203118854092667, "num_chars": 6}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1125, "native_id": "Mercury_180828", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.538134574890137, "logits_per_token_corr": -1.5896890958150227, "logits_per_char_corr": -0.2725181307111468, "bits_per_byte_corr": 0.39316055572958425}, "model_output": [{"sum_logits": -6.744801044464111, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -25.16815948486328, "logits_per_token": -1.3489602088928223, "logits_per_char": -0.2594154247870812, "bits_per_byte": 0.3742573468706948, "num_chars": 26}, {"sum_logits": -7.9570631980896, "num_tokens": 5, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.7762565612793, "logits_per_token": -1.59141263961792, "logits_per_char": -0.27438148958929653, "bits_per_byte": 0.39584881434248925, "num_chars": 29}, {"sum_logits": -9.538134574890137, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -30.70737075805664, "logits_per_token": -1.5896890958150227, "logits_per_char": -0.2725181307111468, "bits_per_byte": 0.39316055572958425, "num_chars": 35}, {"sum_logits": -22.253358840942383, "num_tokens": 6, "num_tokens_all": 197, "is_greedy": false, "sum_logits_uncond": -35.255306243896484, "logits_per_token": -3.708893140157064, "logits_per_char": -0.7417786280314128, "bits_per_byte": 1.070160348099107, "num_chars": 30}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1126, "native_id": "FCAT_2008_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.447175979614258, "logits_per_token_corr": -1.7411959966023762, "logits_per_char_corr": -0.34823919932047526, "bits_per_byte_corr": 0.5024029659031539}, "model_output": [{"sum_logits": -10.447175979614258, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -33.4007682800293, "logits_per_token": -1.7411959966023762, "logits_per_char": -0.34823919932047526, "bits_per_byte": 0.5024029659031539, "num_chars": 30}, {"sum_logits": -12.258626937866211, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -37.40460968017578, "logits_per_token": -1.751232419695173, "logits_per_char": -0.36054785111371207, "bits_per_byte": 0.5201605968052985, "num_chars": 34}, {"sum_logits": -13.589685440063477, "num_tokens": 6, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -29.616901397705078, "logits_per_token": -2.2649475733439126, "logits_per_char": -0.41180864969889325, "bits_per_byte": 0.5941142967162004, "num_chars": 33}, {"sum_logits": -14.859224319458008, "num_tokens": 8, "num_tokens_all": 205, "is_greedy": false, "sum_logits_uncond": -42.62265396118164, "logits_per_token": -1.857403039932251, "logits_per_char": -0.36242010535263436, "bits_per_byte": 0.5228616887110769, "num_chars": 41}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1127, "native_id": "TAKS_2009_5_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.452890396118164, "logits_per_token_corr": -3.921841485159738, "logits_per_char_corr": -0.6695826925882479, "bits_per_byte_corr": 0.9660036300628385}, "model_output": [{"sum_logits": -15.45443344116211, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -33.92692947387695, "logits_per_token": -2.5757389068603516, "logits_per_char": -0.5519440514700753, "bits_per_byte": 0.7962869459046125, "num_chars": 28}, {"sum_logits": -25.780860900878906, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -38.292640686035156, "logits_per_token": -3.6829801286969865, "logits_per_char": -0.8056519031524658, "bits_per_byte": 1.1623100053616529, "num_chars": 32}, {"sum_logits": -27.452890396118164, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -41.487815856933594, "logits_per_token": -3.921841485159738, "logits_per_char": -0.6695826925882479, "bits_per_byte": 0.9660036300628385, "num_chars": 41}, {"sum_logits": -15.621843338012695, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -29.877397537231445, "logits_per_token": -3.124368667602539, "logits_per_char": -0.6248737335205078, "bits_per_byte": 0.9015022365324559, "num_chars": 25}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1128, "native_id": "Mercury_SC_LBS10392", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.357540130615234, "logits_per_token_corr": -3.1508377922905817, "logits_per_char_corr": -0.6751795269194103, "bits_per_byte_corr": 0.9740781551970894}, "model_output": [{"sum_logits": -28.357540130615234, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -39.30918502807617, "logits_per_token": -3.1508377922905817, "logits_per_char": -0.6751795269194103, "bits_per_byte": 0.9740781551970894, "num_chars": 42}, {"sum_logits": -32.4993896484375, "num_tokens": 9, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -42.910499572753906, "logits_per_token": -3.6110432942708335, "logits_per_char": -0.7557997592659884, "bits_per_byte": 1.0903885645988973, "num_chars": 43}, {"sum_logits": -20.16698455810547, "num_tokens": 6, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -27.263351440429688, "logits_per_token": -3.361164093017578, "logits_per_char": -0.7469253540039062, "bits_per_byte": 1.0775855041364433, "num_chars": 27}, {"sum_logits": -17.159873962402344, "num_tokens": 4, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.102611541748047, "logits_per_token": -4.289968490600586, "logits_per_char": -0.6355508874963831, "bits_per_byte": 0.9169061136242701, "num_chars": 27}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1129, "native_id": "Mercury_7212905", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -22.847673416137695, "logits_per_token_corr": -2.855959177017212, "logits_per_char_corr": -0.44799359639485675, "bits_per_byte_corr": 0.646318139869336}, "model_output": [{"sum_logits": -22.29190444946289, "num_tokens": 9, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -38.207977294921875, "logits_per_token": -2.4768782721625433, "logits_per_char": -0.4128130453604239, "bits_per_byte": 0.5955633333561822, "num_chars": 54}, {"sum_logits": -22.847673416137695, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -35.36000061035156, "logits_per_token": -2.855959177017212, "logits_per_char": -0.44799359639485675, "bits_per_byte": 0.646318139869336, "num_chars": 51}, {"sum_logits": -18.78873062133789, "num_tokens": 8, "num_tokens_all": 222, "is_greedy": false, "sum_logits_uncond": -33.6678466796875, "logits_per_token": -2.3485913276672363, "logits_per_char": -0.417527347140842, "bits_per_byte": 0.6023646331560503, "num_chars": 45}, {"sum_logits": -24.09165382385254, "num_tokens": 11, "num_tokens_all": 225, "is_greedy": false, "sum_logits_uncond": -44.337799072265625, "logits_per_token": -2.190150347622958, "logits_per_char": -0.48183307647705076, "bits_per_byte": 0.6951381899702131, "num_chars": 50}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1130, "native_id": "Mercury_7212888", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -14.642534255981445, "logits_per_token_corr": -3.6606335639953613, "logits_per_char_corr": -0.45757919549942017, "bits_per_byte_corr": 0.6601472361614492}, "model_output": [{"sum_logits": -14.642534255981445, "num_tokens": 4, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -26.27859115600586, "logits_per_token": -3.6606335639953613, "logits_per_char": -0.45757919549942017, "bits_per_byte": 0.6601472361614492, "num_chars": 32}, {"sum_logits": -9.914506912231445, "num_tokens": 4, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -26.15362548828125, "logits_per_token": -2.4786267280578613, "logits_per_char": -0.36720395971227576, "bits_per_byte": 0.5297633316720716, "num_chars": 27}, {"sum_logits": -17.334701538085938, "num_tokens": 5, "num_tokens_all": 211, "is_greedy": false, "sum_logits_uncond": -32.12595748901367, "logits_per_token": -3.4669403076171874, "logits_per_char": -0.5977483288995151, "bits_per_byte": 0.8623685498036151, "num_chars": 29}, {"sum_logits": -13.39533519744873, "num_tokens": 4, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -27.460126876831055, "logits_per_token": -3.3488337993621826, "logits_per_char": -0.4059192484075373, "bits_per_byte": 0.58561768667935, "num_chars": 33}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1131, "native_id": "MDSA_2007_8_42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -31.998149871826172, "logits_per_token_corr": -2.4613961439866285, "logits_per_char_corr": -0.4506781672088193, "bits_per_byte_corr": 0.6501911568695578}, "model_output": [{"sum_logits": -23.60016632080078, "num_tokens": 7, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -32.108299255371094, "logits_per_token": -3.3714523315429688, "logits_per_char": -0.6941225388470817, "bits_per_byte": 1.001407144564661, "num_chars": 34}, {"sum_logits": -29.070423126220703, "num_tokens": 10, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -46.679115295410156, "logits_per_token": -2.90704231262207, "logits_per_char": -0.5590465985811673, "bits_per_byte": 0.8065337553994726, "num_chars": 52}, {"sum_logits": -35.35615921020508, "num_tokens": 12, "num_tokens_all": 209, "is_greedy": false, "sum_logits_uncond": -48.49312973022461, "logits_per_token": -2.9463466008504233, "logits_per_char": -0.5796091673804111, "bits_per_byte": 0.836199271434101, "num_chars": 61}, {"sum_logits": -31.998149871826172, "num_tokens": 13, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -52.605621337890625, "logits_per_token": -2.4613961439866285, "logits_per_char": -0.4506781672088193, "bits_per_byte": 0.6501911568695578, "num_chars": 71}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1132, "native_id": "Mercury_SC_415534", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.000288963317871, "logits_per_token_corr": -1.3334296544392903, "logits_per_char_corr": -1.0000722408294678, "bits_per_byte_corr": 1.1542394099011384}, "model_output": [{"sum_logits": -6.621086120605469, "num_tokens": 4, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -18.152095794677734, "logits_per_token": -1.6552715301513672, "logits_per_char": -1.1035143534342449, "bits_per_byte": 1.3646011587861597, "num_chars": 6}, {"sum_logits": -4.000288963317871, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.140216827392578, "logits_per_token": -1.3334296544392903, "logits_per_char": -1.0000722408294678, "bits_per_byte": 1.1542394099011384, "num_chars": 4}, {"sum_logits": -5.850447654724121, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.72108268737793, "logits_per_token": -1.9501492182413738, "logits_per_char": -1.1700895309448243, "bits_per_byte": 1.40673530307617, "num_chars": 5}, {"sum_logits": -8.521258354187012, "num_tokens": 3, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -17.335237503051758, "logits_per_token": -2.8404194513956704, "logits_per_char": -1.4202097256978352, "bits_per_byte": 1.756225309961155, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1133, "native_id": "Mercury_7213413", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -16.968944549560547, "logits_per_token_corr": -1.8854382832845051, "logits_per_char_corr": -0.3463049908073581, "bits_per_byte_corr": 0.49961249287323256}, "model_output": [{"sum_logits": -18.396804809570312, "num_tokens": 11, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -39.36256408691406, "logits_per_token": -1.6724368008700283, "logits_per_char": -0.33448736017400565, "bits_per_byte": 0.48256325576342524, "num_chars": 55}, {"sum_logits": -9.740057945251465, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -33.3896484375, "logits_per_token": -1.3914368493216378, "logits_per_char": -0.2319061415536063, "bits_per_byte": 0.33456984037132215, "num_chars": 42}, {"sum_logits": -10.474264144897461, "num_tokens": 7, "num_tokens_all": 210, "is_greedy": false, "sum_logits_uncond": -30.550430297851562, "logits_per_token": -1.4963234492710658, "logits_per_char": -0.30806659249698415, "bits_per_byte": 0.4444461452592795, "num_chars": 34}, {"sum_logits": -16.968944549560547, "num_tokens": 9, "num_tokens_all": 212, "is_greedy": false, "sum_logits_uncond": -35.59486389160156, "logits_per_token": -1.8854382832845051, "logits_per_char": -0.3463049908073581, "bits_per_byte": 0.49961249287323256, "num_chars": 49}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1134, "native_id": "Mercury_7068635", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.245303153991699, "logits_per_token_corr": -4.245303153991699, "logits_per_char_corr": -0.8490606307983398, "bits_per_byte_corr": 1.2249355614677}, "model_output": [{"sum_logits": -2.8682737350463867, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.446762084960938, "logits_per_token": -2.8682737350463867, "logits_per_char": -0.7170684337615967, "bits_per_byte": 1.034511073366615, "num_chars": 4}, {"sum_logits": -6.439824104309082, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -11.783369064331055, "logits_per_token": -6.439824104309082, "logits_per_char": -1.6099560260772705, "bits_per_byte": 2.3226755748726493, "num_chars": 4}, {"sum_logits": -4.245303153991699, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -12.251964569091797, "logits_per_token": -4.245303153991699, "logits_per_char": -0.8490606307983398, "bits_per_byte": 1.2249355614677, "num_chars": 5}, {"sum_logits": -6.762864112854004, "num_tokens": 1, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -13.488615989685059, "logits_per_token": -6.762864112854004, "logits_per_char": -1.3525728225708007, "bits_per_byte": 1.9513501035654839, "num_chars": 5}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1135, "native_id": "Mercury_417137", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -10.96790885925293, "logits_per_token_corr": -3.6559696197509766, "logits_per_char_corr": -1.096790885925293, "bits_per_byte_corr": 1.5823347720177698}, "model_output": [{"sum_logits": -6.654359817504883, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -14.588896751403809, "logits_per_token": -6.654359817504883, "logits_per_char": -0.9506228310721261, "bits_per_byte": 1.3714588441445683, "num_chars": 7}, {"sum_logits": -8.125543594360352, "num_tokens": 1, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -15.729378700256348, "logits_per_token": -8.125543594360352, "logits_per_char": -1.015692949295044, "bits_per_byte": 1.465335181014898, "num_chars": 8}, {"sum_logits": -10.96790885925293, "num_tokens": 3, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -18.059425354003906, "logits_per_token": -3.6559696197509766, "logits_per_char": -1.096790885925293, "bits_per_byte": 1.5823347720177698, "num_chars": 10}, {"sum_logits": -8.72292423248291, "num_tokens": 2, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -15.962931632995605, "logits_per_token": -4.361462116241455, "logits_per_char": -0.9692138036092123, "bits_per_byte": 1.3982799480291448, "num_chars": 9}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1136, "native_id": "Mercury_7268258", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.992903709411621, "logits_per_token_corr": -4.992903709411621, "logits_per_char_corr": -0.7132719584873745, "bits_per_byte_corr": 1.029033917315633}, "model_output": [{"sum_logits": -5.790853500366211, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.671343803405762, "logits_per_token": -5.790853500366211, "logits_per_char": -0.9651422500610352, "bits_per_byte": 1.3924059379164717, "num_chars": 6}, {"sum_logits": -3.764763593673706, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -10.714274406433105, "logits_per_token": -3.764763593673706, "logits_per_char": -0.6274605989456177, "bits_per_byte": 0.9052342944527118, "num_chars": 6}, {"sum_logits": -4.992903709411621, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.709002494812012, "logits_per_token": -4.992903709411621, "logits_per_char": -0.7132719584873745, "bits_per_byte": 1.029033917315633, "num_chars": 7}, {"sum_logits": -4.565566062927246, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.696199417114258, "logits_per_token": -4.565566062927246, "logits_per_char": -0.6522237232753209, "bits_per_byte": 0.940959931120117, "num_chars": 7}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1137, "native_id": "NAEP_2005_4_S13+14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.690044403076172, "logits_per_token_corr": -1.846002960205078, "logits_per_char_corr": -0.4395245143345424, "bits_per_byte_corr": 0.63409983718003}, "model_output": [{"sum_logits": -15.914599418640137, "num_tokens": 10, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -33.177642822265625, "logits_per_token": -1.5914599418640136, "logits_per_char": -0.35365776485866973, "bits_per_byte": 0.5102203035338445, "num_chars": 45}, {"sum_logits": -23.67855453491211, "num_tokens": 11, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -42.485740661621094, "logits_per_token": -2.152595866810192, "logits_per_char": -0.4305191733620384, "bits_per_byte": 0.6211078764174749, "num_chars": 55}, {"sum_logits": -27.690044403076172, "num_tokens": 15, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -43.933319091796875, "logits_per_token": -1.846002960205078, "logits_per_char": -0.4395245143345424, "bits_per_byte": 0.63409983718003, "num_chars": 63}, {"sum_logits": -22.34163475036621, "num_tokens": 14, "num_tokens_all": 218, "is_greedy": false, "sum_logits_uncond": -38.516963958740234, "logits_per_token": -1.5958310535975866, "logits_per_char": -0.3603489475865518, "bits_per_byte": 0.5198736396730488, "num_chars": 62}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1138, "native_id": "Mercury_SC_406089", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -28.956085205078125, "logits_per_token_corr": -2.6323713822798296, "logits_per_char_corr": -0.4826014200846354, "bits_per_byte_corr": 0.6962466754825751}, "model_output": [{"sum_logits": -28.119609832763672, "num_tokens": 7, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -40.546058654785156, "logits_per_token": -4.017087118966239, "logits_per_char": -0.7029902458190918, "bits_per_byte": 1.0142005414372457, "num_chars": 40}, {"sum_logits": -36.834800720214844, "num_tokens": 8, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -47.34358215332031, "logits_per_token": -4.6043500900268555, "logits_per_char": -0.7222509945140165, "bits_per_byte": 1.0419879280632423, "num_chars": 51}, {"sum_logits": -27.36592674255371, "num_tokens": 10, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -42.536895751953125, "logits_per_token": -2.736592674255371, "logits_per_char": -0.4486217498779297, "bits_per_byte": 0.6472243737842831, "num_chars": 61}, {"sum_logits": -28.956085205078125, "num_tokens": 11, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -49.948829650878906, "logits_per_token": -2.6323713822798296, "logits_per_char": -0.4826014200846354, "bits_per_byte": 0.6962466754825751, "num_chars": 60}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1139, "native_id": "Mercury_SC_400700", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -17.740224838256836, "logits_per_token_corr": -2.2175281047821045, "logits_per_char_corr": -0.4668480220593904, "bits_per_byte_corr": 0.6735193262743878}, "model_output": [{"sum_logits": -9.047945976257324, "num_tokens": 6, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -21.18621063232422, "logits_per_token": -1.5079909960428874, "logits_per_char": -0.36191783905029296, "bits_per_byte": 0.5221370716074828, "num_chars": 25}, {"sum_logits": -19.290515899658203, "num_tokens": 9, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -27.177959442138672, "logits_per_token": -2.143390655517578, "logits_per_char": -0.5213652945853569, "bits_per_byte": 0.7521711249904482, "num_chars": 37}, {"sum_logits": -17.740224838256836, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -34.08586502075195, "logits_per_token": -2.2175281047821045, "logits_per_char": -0.4668480220593904, "bits_per_byte": 0.6735193262743878, "num_chars": 38}, {"sum_logits": -8.706585884094238, "num_tokens": 8, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -24.33294677734375, "logits_per_token": -1.0883232355117798, "logits_per_char": -0.23531313200254697, "bits_per_byte": 0.33948508859636845, "num_chars": 37}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1140, "native_id": "Mercury_7223493", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -15.42007064819336, "logits_per_token_corr": -2.570011774698893, "logits_per_char_corr": -0.5140023549397786, "bits_per_byte_corr": 0.7415486484774002}, "model_output": [{"sum_logits": -15.42007064819336, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -25.20494842529297, "logits_per_token": -2.570011774698893, "logits_per_char": -0.5140023549397786, "bits_per_byte": 0.7415486484774002, "num_chars": 30}, {"sum_logits": -11.766265869140625, "num_tokens": 8, "num_tokens_all": 217, "is_greedy": false, "sum_logits_uncond": -34.77635955810547, "logits_per_token": -1.4707832336425781, "logits_per_char": -0.2941566467285156, "bits_per_byte": 0.4243783354800611, "num_chars": 40}, {"sum_logits": -20.587177276611328, "num_tokens": 6, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -32.366146087646484, "logits_per_token": -3.4311962127685547, "logits_per_char": -0.6641024927939138, "bits_per_byte": 0.9580973729964662, "num_chars": 31}, {"sum_logits": -16.37908363342285, "num_tokens": 5, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -26.163604736328125, "logits_per_token": -3.27581672668457, "logits_per_char": -0.7799563634963262, "bits_per_byte": 1.125239177726748, "num_chars": 21}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1141, "native_id": "Mercury_SC_405928", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -14.690693855285645, "logits_per_token_corr": -1.8363367319107056, "logits_per_char_corr": -0.397045779872585, "bits_per_byte_corr": 0.5728159776284809}, "model_output": [{"sum_logits": -21.781137466430664, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -35.790283203125, "logits_per_token": -2.722642183303833, "logits_per_char": -0.49502585150978784, "bits_per_byte": 0.7141713410855204, "num_chars": 44}, {"sum_logits": -21.30532455444336, "num_tokens": 6, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -34.498409271240234, "logits_per_token": -3.55088742574056, "logits_per_char": -0.5606664356432463, "bits_per_byte": 0.8088706862959837, "num_chars": 38}, {"sum_logits": -34.280364990234375, "num_tokens": 7, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -44.86659240722656, "logits_per_token": -4.897194998604911, "logits_per_char": -0.9264963510874156, "bits_per_byte": 1.3366516911164947, "num_chars": 37}, {"sum_logits": -14.690693855285645, "num_tokens": 8, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.60975646972656, "logits_per_token": -1.8363367319107056, "logits_per_char": -0.397045779872585, "bits_per_byte": 0.5728159776284809, "num_chars": 37}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1142, "native_id": "MCAS_2009_5_6518", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -17.500350952148438, "logits_per_token_corr": -2.5000501360212053, "logits_per_char_corr": -0.6034603776602909, "bits_per_byte_corr": 0.8706092942241082}, "model_output": [{"sum_logits": -18.08156967163086, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -38.392723083496094, "logits_per_token": -2.583081381661551, "logits_per_char": -0.6954449873704177, "bits_per_byte": 1.0033150344911101, "num_chars": 26}, {"sum_logits": -15.679298400878906, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -26.948009490966797, "logits_per_token": -2.2398997715541293, "logits_per_char": -0.5807147555881076, "bits_per_byte": 0.8377942980586112, "num_chars": 27}, {"sum_logits": -15.895320892333984, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -35.410545349121094, "logits_per_token": -2.2707601274762834, "logits_per_char": -0.5481145135287581, "bits_per_byte": 0.790762090507774, "num_chars": 29}, {"sum_logits": -17.500350952148438, "num_tokens": 7, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -27.806278228759766, "logits_per_token": -2.5000501360212053, "logits_per_char": -0.6034603776602909, "bits_per_byte": 0.8706092942241082, "num_chars": 29}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1143, "native_id": "MCAS_2006_9_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.678784370422363, "logits_per_token_corr": -0.8065653642018636, "logits_per_char_corr": -0.17283543518611363, "bits_per_byte_corr": 0.24934882523307114}, "model_output": [{"sum_logits": -18.851327896118164, "num_tokens": 11, "num_tokens_all": 200, "is_greedy": false, "sum_logits_uncond": -50.60504150390625, "logits_per_token": -1.7137570814652876, "logits_per_char": -0.40109208289613113, "bits_per_byte": 0.5786535589344891, "num_chars": 47}, {"sum_logits": -9.678784370422363, "num_tokens": 12, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -35.089447021484375, "logits_per_token": -0.8065653642018636, "logits_per_char": -0.17283543518611363, "bits_per_byte": 0.24934882523307114, "num_chars": 56}, {"sum_logits": -28.846248626708984, "num_tokens": 15, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -59.232398986816406, "logits_per_token": -1.923083241780599, "logits_per_char": -0.48077081044514974, "bits_per_byte": 0.6936056640338837, "num_chars": 60}, {"sum_logits": -25.64631462097168, "num_tokens": 14, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -51.54133987426758, "logits_per_token": -1.8318796157836914, "logits_per_char": -0.41365023582212385, "bits_per_byte": 0.596771143883557, "num_chars": 62}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1144, "native_id": "Mercury_7239383", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -33.011329650878906, "logits_per_token_corr": -3.6679255167643228, "logits_per_char_corr": -0.6602265930175781, "bits_per_byte_corr": 0.9525056316101602}, "model_output": [{"sum_logits": -18.896303176879883, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -33.795188903808594, "logits_per_token": -3.7792606353759766, "logits_per_char": -0.6095581669961253, "bits_per_byte": 0.8794065446593083, "num_chars": 31}, {"sum_logits": -33.011329650878906, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -44.225914001464844, "logits_per_token": -3.6679255167643228, "logits_per_char": -0.6602265930175781, "bits_per_byte": 0.9525056316101602, "num_chars": 50}, {"sum_logits": -25.995885848999023, "num_tokens": 9, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -41.17366027832031, "logits_per_token": -2.8884317609998913, "logits_per_char": -0.44820492843101767, "bits_per_byte": 0.6466230275498865, "num_chars": 58}, {"sum_logits": -35.44828796386719, "num_tokens": 10, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -44.70660400390625, "logits_per_token": -3.5448287963867187, "logits_per_char": -0.7089657592773437, "bits_per_byte": 1.0228213850702372, "num_chars": 50}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1145, "native_id": "Mercury_SC_400130", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -11.803153991699219, "logits_per_token_corr": -3.9343846638997397, "logits_per_char_corr": -0.786876932779948, "bits_per_byte_corr": 1.1352234487123647}, "model_output": [{"sum_logits": -12.874109268188477, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -26.378847122192383, "logits_per_token": -4.291369756062825, "logits_per_char": -0.6437054634094238, "bits_per_byte": 0.928670679854575, "num_chars": 20}, {"sum_logits": -11.803153991699219, "num_tokens": 3, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -20.93509292602539, "logits_per_token": -3.9343846638997397, "logits_per_char": -0.786876932779948, "bits_per_byte": 1.1352234487123647, "num_chars": 15}, {"sum_logits": -5.122625827789307, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -15.740121841430664, "logits_per_token": -2.5613129138946533, "logits_per_char": -0.5691806475321451, "bits_per_byte": 0.8211540975651848, "num_chars": 9}, {"sum_logits": -5.623476982116699, "num_tokens": 2, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.431310653686523, "logits_per_token": -2.8117384910583496, "logits_per_char": -0.9372461636861166, "bits_per_byte": 1.3521603924431376, "num_chars": 6}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1146, "native_id": "Mercury_401426", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -23.86712646484375, "logits_per_token_corr": -2.6519029405381946, "logits_per_char_corr": -0.5188505753226902, "bits_per_byte_corr": 0.7485431519809684}, "model_output": [{"sum_logits": -22.25015640258789, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.220802307128906, "logits_per_token": -3.17859377179827, "logits_per_char": -0.6544163647819968, "bits_per_byte": 0.944123244148248, "num_chars": 34}, {"sum_logits": -16.772367477416992, "num_tokens": 6, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -27.805044174194336, "logits_per_token": -2.7953945795694985, "logits_per_char": -0.46589909659491646, "bits_per_byte": 0.672150316212617, "num_chars": 36}, {"sum_logits": -17.875490188598633, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -32.4314079284668, "logits_per_token": -2.5536414555140903, "logits_per_char": -0.4157090741534566, "bits_per_byte": 0.5997414197341651, "num_chars": 43}, {"sum_logits": -23.86712646484375, "num_tokens": 9, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -37.148223876953125, "logits_per_token": -2.6519029405381946, "logits_per_char": -0.5188505753226902, "bits_per_byte": 0.7485431519809684, "num_chars": 46}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1147, "native_id": "MCAS_2010_8_12016", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -40.6965446472168, "logits_per_token_corr": -4.06965446472168, "logits_per_char_corr": -0.8305417274942204, "bits_per_byte_corr": 1.1982184315081255}, "model_output": [{"sum_logits": -20.723791122436523, "num_tokens": 8, "num_tokens_all": 221, "is_greedy": false, "sum_logits_uncond": -31.708730697631836, "logits_per_token": -2.5904738903045654, "logits_per_char": -0.6476184725761414, "bits_per_byte": 0.9343159587743556, "num_chars": 32}, {"sum_logits": -31.41994857788086, "num_tokens": 10, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -41.51189422607422, "logits_per_token": -3.141994857788086, "logits_per_char": -0.668509544210231, "bits_per_byte": 0.9644554042197344, "num_chars": 47}, {"sum_logits": -40.6965446472168, "num_tokens": 10, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -48.587554931640625, "logits_per_token": -4.06965446472168, "logits_per_char": -0.8305417274942204, "bits_per_byte": 1.1982184315081255, "num_chars": 49}, {"sum_logits": -34.090843200683594, "num_tokens": 10, "num_tokens_all": 223, "is_greedy": false, "sum_logits_uncond": -38.00825881958008, "logits_per_token": -3.4090843200683594, "logits_per_char": -0.7102259000142416, "bits_per_byte": 1.0246393838621832, "num_chars": 48}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1148, "native_id": "Mercury_SC_400324", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -18.315547943115234, "logits_per_token_corr": -2.0350608825683594, "logits_per_char_corr": -0.5087652206420898, "bits_per_byte_corr": 0.7339930607976496}, "model_output": [{"sum_logits": -17.120763778686523, "num_tokens": 7, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -31.735149383544922, "logits_per_token": -2.4458233969552174, "logits_per_char": -0.5188110235965613, "bits_per_byte": 0.7484860909018237, "num_chars": 33}, {"sum_logits": -18.315547943115234, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -33.70227813720703, "logits_per_token": -2.0350608825683594, "logits_per_char": -0.5087652206420898, "bits_per_byte": 0.7339930607976496, "num_chars": 36}, {"sum_logits": -35.04853820800781, "num_tokens": 9, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -48.34313201904297, "logits_per_token": -3.894282023111979, "logits_per_char": -0.7457135788937832, "bits_per_byte": 1.0758372821943947, "num_chars": 47}, {"sum_logits": -21.524742126464844, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -26.827896118164062, "logits_per_token": -3.587457021077474, "logits_per_char": -0.8278746971717248, "bits_per_byte": 1.1943707200879579, "num_chars": 26}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1149, "native_id": "Mercury_SC_LBS10662", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -1.783402681350708, "logits_per_token_corr": -0.891701340675354, "logits_per_char_corr": -0.148616890112559, "bits_per_byte_corr": 0.2144088503578829}, "model_output": [{"sum_logits": -8.969179153442383, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -17.565874099731445, "logits_per_token": -4.484589576721191, "logits_per_char": -0.9965754614935981, "bits_per_byte": 1.437754476169477, "num_chars": 9}, {"sum_logits": -4.453070163726807, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -18.66720962524414, "logits_per_token": -2.2265350818634033, "logits_per_char": -0.3425438587482159, "bits_per_byte": 0.4941863263033757, "num_chars": 13}, {"sum_logits": -1.783402681350708, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": true, "sum_logits_uncond": -18.19501495361328, "logits_per_token": -0.891701340675354, "logits_per_char": -0.148616890112559, "bits_per_byte": 0.2144088503578829, "num_chars": 12}, {"sum_logits": -4.46793794631958, "num_tokens": 2, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -16.50745964050293, "logits_per_token": -2.23396897315979, "logits_per_char": -0.4061761769381436, "bits_per_byte": 0.585988356196319, "num_chars": 11}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1150, "native_id": "VASoL_2009_3_8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -7.974569320678711, "logits_per_token_corr": -7.974569320678711, "logits_per_char_corr": -1.9936423301696777, "bits_per_byte_corr": 2.8762179030441777}, "model_output": [{"sum_logits": -6.641033172607422, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.492488861083984, "logits_per_token": -6.641033172607422, "logits_per_char": -1.106838862101237, "bits_per_byte": 1.5968309374177851, "num_chars": 6}, {"sum_logits": -7.8532915115356445, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -12.368083000183105, "logits_per_token": -7.8532915115356445, "logits_per_char": -1.570658302307129, "bits_per_byte": 2.265980943671201, "num_chars": 5}, {"sum_logits": -9.663590431213379, "num_tokens": 2, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.173416137695312, "logits_per_token": -4.8317952156066895, "logits_per_char": -1.9327180862426758, "bits_per_byte": 2.7883227984607197, "num_chars": 5}, {"sum_logits": -7.974569320678711, "num_tokens": 1, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -8.723467826843262, "logits_per_token": -7.974569320678711, "logits_per_char": -1.9936423301696777, "bits_per_byte": 2.8762179030441777, "num_chars": 4}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1151, "native_id": "Mercury_SC_401185", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -27.601642608642578, "logits_per_token_corr": -3.943091801234654, "logits_per_char_corr": -0.5520328521728516, "bits_per_byte_corr": 0.7964150582381354}, "model_output": [{"sum_logits": -17.291275024414062, "num_tokens": 6, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -28.06967544555664, "logits_per_token": -2.8818791707356772, "logits_per_char": -0.49403642926897323, "bits_per_byte": 0.7127439065253509, "num_chars": 35}, {"sum_logits": -16.664541244506836, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -35.280113220214844, "logits_per_token": -2.380648749215262, "logits_per_char": -0.396774791535877, "bits_per_byte": 0.5724250240989733, "num_chars": 42}, {"sum_logits": -27.601642608642578, "num_tokens": 7, "num_tokens_all": 215, "is_greedy": false, "sum_logits_uncond": -33.01506042480469, "logits_per_token": -3.943091801234654, "logits_per_char": -0.5520328521728516, "bits_per_byte": 0.7964150582381354, "num_chars": 50}, {"sum_logits": -33.13462829589844, "num_tokens": 11, "num_tokens_all": 219, "is_greedy": false, "sum_logits_uncond": -41.50788879394531, "logits_per_token": -3.012238935990767, "logits_per_char": -0.6136042277018229, "bits_per_byte": 0.8852437763745582, "num_chars": 54}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1152, "native_id": "NYSEDREGENTS_2015_8_29", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -12.497746467590332, "logits_per_token_corr": -4.165915489196777, "logits_per_char_corr": -0.6943192481994629, "bits_per_byte_corr": 1.001690936171838}, "model_output": [{"sum_logits": -17.855819702148438, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -22.364482879638672, "logits_per_token": -5.9519399007161455, "logits_per_char": -1.1159887313842773, "bits_per_byte": 1.6100314084572191, "num_chars": 16}, {"sum_logits": -15.624330520629883, "num_tokens": 2, "num_tokens_all": 180, "is_greedy": false, "sum_logits_uncond": -21.467382431030273, "logits_per_token": -7.812165260314941, "logits_per_char": -1.1160236086164201, "bits_per_byte": 1.6100817256670714, "num_chars": 14}, {"sum_logits": -12.497746467590332, "num_tokens": 3, "num_tokens_all": 181, "is_greedy": false, "sum_logits_uncond": -22.731502532958984, "logits_per_token": -4.165915489196777, "logits_per_char": -0.6943192481994629, "bits_per_byte": 1.001690936171838, "num_chars": 18}, {"sum_logits": -18.983884811401367, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -27.837419509887695, "logits_per_token": -4.745971202850342, "logits_per_char": -1.2655923207600912, "bits_per_byte": 1.8258637649490497, "num_chars": 15}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1153, "native_id": "Mercury_7234378", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.306148529052734, "logits_per_token_corr": -3.0510247548421225, "logits_per_char_corr": -0.3589440888049556, "bits_per_byte_corr": 0.5178468568756892}, "model_output": [{"sum_logits": -19.699291229248047, "num_tokens": 7, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -44.668434143066406, "logits_per_token": -2.8141844613211497, "logits_per_char": -0.42824546150539233, "bits_per_byte": 0.6178276035974789, "num_chars": 46}, {"sum_logits": -24.274715423583984, "num_tokens": 11, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -47.13479995727539, "logits_per_token": -2.2067923112349077, "logits_per_char": -0.3569811091703527, "bits_per_byte": 0.5150148758914795, "num_chars": 68}, {"sum_logits": -22.71300506591797, "num_tokens": 11, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -42.25971221923828, "logits_per_token": -2.064818642356179, "logits_per_char": -0.3291739864625793, "bits_per_byte": 0.4748976778595551, "num_chars": 69}, {"sum_logits": -18.306148529052734, "num_tokens": 6, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -39.615013122558594, "logits_per_token": -3.0510247548421225, "logits_per_char": -0.3589440888049556, "bits_per_byte": 0.5178468568756892, "num_chars": 51}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1154, "native_id": "ACTAAP_2014_7_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -21.713581085205078, "logits_per_token_corr": -1.6702754680926983, "logits_per_char_corr": -0.30157751507229275, "bits_per_byte_corr": 0.43508438543872596}, "model_output": [{"sum_logits": -15.300871849060059, "num_tokens": 14, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -31.678211212158203, "logits_per_token": -1.0929194177900041, "logits_per_char": -0.25083396473868946, "bits_per_byte": 0.3618769170152844, "num_chars": 61}, {"sum_logits": -25.219261169433594, "num_tokens": 14, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -39.002254486083984, "logits_per_token": -1.8013757978166853, "logits_per_char": -0.4203210194905599, "bits_per_byte": 0.6063950504008597, "num_chars": 60}, {"sum_logits": -26.45675277709961, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -41.41569137573242, "logits_per_token": -2.0351348290076623, "logits_per_char": -0.37263032080421987, "bits_per_byte": 0.5375919159094977, "num_chars": 71}, {"sum_logits": -21.713581085205078, "num_tokens": 13, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -35.479915618896484, "logits_per_token": -1.6702754680926983, "logits_per_char": -0.30157751507229275, "bits_per_byte": 0.43508438543872596, "num_chars": 72}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1155, "native_id": "MDSA_2008_8_27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.937053680419922, "logits_per_token_corr": -1.9910076686314173, "logits_per_char_corr": -0.3667645705373664, "bits_per_byte_corr": 0.529129427088409}, "model_output": [{"sum_logits": -7.961190700531006, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -28.43517303466797, "logits_per_token": -1.5922381401062011, "logits_per_char": -0.28432823930467876, "bits_per_byte": 0.4101989408298452, "num_chars": 28}, {"sum_logits": -7.747848987579346, "num_tokens": 5, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -28.11052131652832, "logits_per_token": -1.549569797515869, "logits_per_char": -0.27670889241354807, "bits_per_byte": 0.3992065468551903, "num_chars": 28}, {"sum_logits": -12.738293647766113, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -31.81689453125, "logits_per_token": -1.819756235395159, "logits_per_char": -0.33521825388858195, "bits_per_byte": 0.4836177125008621, "num_chars": 38}, {"sum_logits": -13.937053680419922, "num_tokens": 7, "num_tokens_all": 201, "is_greedy": false, "sum_logits_uncond": -30.189430236816406, "logits_per_token": -1.9910076686314173, "logits_per_char": -0.3667645705373664, "bits_per_byte": 0.529129427088409, "num_chars": 38}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1156, "native_id": "Mercury_7004725", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -13.872489929199219, "logits_per_token_corr": -2.7744979858398438, "logits_per_char_corr": -0.4474996751354587, "bits_per_byte_corr": 0.6456055621178123}, "model_output": [{"sum_logits": -13.872489929199219, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -31.724794387817383, "logits_per_token": -2.7744979858398438, "logits_per_char": -0.4474996751354587, "bits_per_byte": 0.6456055621178123, "num_chars": 31}, {"sum_logits": -16.692935943603516, "num_tokens": 7, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -33.95570373535156, "logits_per_token": -2.384705134800502, "logits_per_char": -0.5384818046323715, "bits_per_byte": 0.7768650291526201, "num_chars": 31}, {"sum_logits": -15.816795349121094, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -23.77971649169922, "logits_per_token": -3.1633590698242187, "logits_per_char": -0.4942748546600342, "bits_per_byte": 0.7130878816546568, "num_chars": 32}, {"sum_logits": -12.701669692993164, "num_tokens": 5, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -25.803607940673828, "logits_per_token": -2.540333938598633, "logits_per_char": -0.5522465083910071, "bits_per_byte": 0.7967232990045237, "num_chars": 23}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1157, "native_id": "Mercury_405143", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -18.760726928710938, "logits_per_token_corr": -3.7521453857421876, "logits_per_char_corr": -0.8527603149414062, "bits_per_byte_corr": 1.2302730774337614}, "model_output": [{"sum_logits": -14.070524215698242, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -22.429275512695312, "logits_per_token": -3.5176310539245605, "logits_per_char": -0.7816957897610135, "bits_per_byte": 1.1277486393728062, "num_chars": 18}, {"sum_logits": -23.627609252929688, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -26.23699188232422, "logits_per_token": -4.725521850585937, "logits_per_char": -1.125124250139509, "bits_per_byte": 1.6232111760613492, "num_chars": 21}, {"sum_logits": -18.760726928710938, "num_tokens": 5, "num_tokens_all": 191, "is_greedy": false, "sum_logits_uncond": -23.024169921875, "logits_per_token": -3.7521453857421876, "logits_per_char": -0.8527603149414062, "bits_per_byte": 1.2302730774337614, "num_chars": 22}, {"sum_logits": -22.909378051757812, "num_tokens": 4, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -27.09327507019043, "logits_per_token": -5.727344512939453, "logits_per_char": -1.0413353659889915, "bits_per_byte": 1.502329368415691, "num_chars": 22}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1158, "native_id": "MCAS_2003_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -4.730720043182373, "logits_per_token_corr": -4.730720043182373, "logits_per_char_corr": -0.39422667026519775, "bits_per_byte_corr": 0.568748862178178}, "model_output": [{"sum_logits": -4.730720043182373, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.381863594055176, "logits_per_token": -4.730720043182373, "logits_per_char": -0.39422667026519775, "bits_per_byte": 0.568748862178178, "num_chars": 12}, {"sum_logits": -9.379484176635742, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -12.841766357421875, "logits_per_token": -9.379484176635742, "logits_per_char": -1.0421649085150824, "bits_per_byte": 1.5035261453042899, "num_chars": 9}, {"sum_logits": -11.784664154052734, "num_tokens": 1, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -13.95637035369873, "logits_per_token": -11.784664154052734, "logits_per_char": -1.4730830192565918, "bits_per_byte": 2.1252095667007533, "num_chars": 8}, {"sum_logits": -9.684310913085938, "num_tokens": 2, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.493139266967773, "logits_per_token": -4.842155456542969, "logits_per_char": -0.6052694320678711, "bits_per_byte": 0.8732192080466243, "num_chars": 16}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1159, "native_id": "Mercury_SC_405341", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -40.463401794433594, "logits_per_token_corr": -4.0463401794433596, "logits_per_char_corr": -0.778142342200646, "bits_per_byte_corr": 1.1226220981994013}, "model_output": [{"sum_logits": -40.463401794433594, "num_tokens": 10, "num_tokens_all": 192, "is_greedy": false, "sum_logits_uncond": -51.748592376708984, "logits_per_token": -4.0463401794433596, "logits_per_char": -0.778142342200646, "bits_per_byte": 1.1226220981994013, "num_chars": 52}, {"sum_logits": -25.63640785217285, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -38.10528564453125, "logits_per_token": -3.2045509815216064, "logits_per_char": -0.5696979522705078, "bits_per_byte": 0.8219004105458494, "num_chars": 45}, {"sum_logits": -31.082843780517578, "num_tokens": 11, "num_tokens_all": 193, "is_greedy": false, "sum_logits_uncond": -44.50794219970703, "logits_per_token": -2.8257130709561435, "logits_per_char": -0.5864687505758034, "bits_per_byte": 0.8460955580926658, "num_chars": 53}, {"sum_logits": -24.41649627685547, "num_tokens": 8, "num_tokens_all": 190, "is_greedy": false, "sum_logits_uncond": -41.625205993652344, "logits_per_token": -3.0520620346069336, "logits_per_char": -0.5813451494489398, "bits_per_byte": 0.8387037641554412, "num_chars": 42}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1160, "native_id": "Mercury_7283833", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.880847454071045, "logits_per_token_corr": -6.880847454071045, "logits_per_char_corr": -0.4587231636047363, "bits_per_byte_corr": 0.6617976332739252}, "model_output": [{"sum_logits": -6.93921422958374, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -13.701775550842285, "logits_per_token": -6.93921422958374, "logits_per_char": -0.5782678524653116, "bits_per_byte": 0.8342641630578153, "num_chars": 12}, {"sum_logits": -6.880847454071045, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -14.197172164916992, "logits_per_token": -6.880847454071045, "logits_per_char": -0.4587231636047363, "bits_per_byte": 0.6617976332739252, "num_chars": 15}, {"sum_logits": -6.345424175262451, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -12.913460731506348, "logits_per_token": -6.345424175262451, "logits_per_char": -0.4532445839473179, "bits_per_byte": 0.6538937135710469, "num_chars": 14}, {"sum_logits": -8.145147323608398, "num_tokens": 1, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -15.630255699157715, "logits_per_token": -8.145147323608398, "logits_per_char": -0.7404679385098544, "bits_per_byte": 1.0682694228262082, "num_chars": 11}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1161, "native_id": "Mercury_7159303", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -8.457735061645508, "logits_per_token_corr": -4.228867530822754, "logits_per_char_corr": -0.44514395061292145, "bits_per_byte_corr": 0.6422069700314448}, "model_output": [{"sum_logits": -8.457735061645508, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -22.214725494384766, "logits_per_token": -4.228867530822754, "logits_per_char": -0.44514395061292145, "bits_per_byte": 0.6422069700314448, "num_chars": 19}, {"sum_logits": -12.71921157836914, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -18.39358901977539, "logits_per_token": -6.35960578918457, "logits_per_char": -0.635960578918457, "bits_per_byte": 0.9174971734071914, "num_chars": 20}, {"sum_logits": -13.742509841918945, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -23.225624084472656, "logits_per_token": -6.871254920959473, "logits_per_char": -0.7232899916799445, "bits_per_byte": 1.043486884122025, "num_chars": 19}, {"sum_logits": -13.479763984680176, "num_tokens": 2, "num_tokens_all": 238, "is_greedy": false, "sum_logits_uncond": -18.340065002441406, "logits_per_token": -6.739881992340088, "logits_per_char": -0.7929272932164809, "bits_per_byte": 1.1439522737097478, "num_chars": 17}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1162, "native_id": "Mercury_406427", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -13.220964431762695, "logits_per_token_corr": -1.888709204537528, "logits_per_char_corr": -0.3672490119934082, "bits_per_byte_corr": 0.5298283283746421}, "model_output": [{"sum_logits": -13.163684844970703, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -24.366493225097656, "logits_per_token": -1.880526406424386, "logits_per_char": -0.42463499499905494, "bits_per_byte": 0.6126188014734865, "num_chars": 31}, {"sum_logits": -20.071226119995117, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -29.5355224609375, "logits_per_token": -2.8673180171421597, "logits_per_char": -0.5903301799998564, "bits_per_byte": 0.8516664231734938, "num_chars": 34}, {"sum_logits": -13.220964431762695, "num_tokens": 7, "num_tokens_all": 213, "is_greedy": false, "sum_logits_uncond": -22.025407791137695, "logits_per_token": -1.888709204537528, "logits_per_char": -0.3672490119934082, "bits_per_byte": 0.5298283283746421, "num_chars": 36}, {"sum_logits": -19.8150634765625, "num_tokens": 8, "num_tokens_all": 214, "is_greedy": false, "sum_logits_uncond": -30.823196411132812, "logits_per_token": -2.4768829345703125, "logits_per_char": -0.5504184299045138, "bits_per_byte": 0.7940859392377021, "num_chars": 36}], "label": 2, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1163, "native_id": "Mercury_SC_414129", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_per_byte": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -10.279928207397461, "logits_per_token_corr": -5.1399641036987305, "logits_per_char_corr": -1.0279928207397462, "bits_per_byte_corr": 1.4830801445517545}, "model_output": [{"sum_logits": -11.07406997680664, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -15.285493850708008, "logits_per_token": -5.53703498840332, "logits_per_char": -1.0067336342551492, "bits_per_byte": 1.4524096216370705, "num_chars": 11}, {"sum_logits": -10.279928207397461, "num_tokens": 2, "num_tokens_all": 183, "is_greedy": false, "sum_logits_uncond": -18.05206298828125, "logits_per_token": -5.1399641036987305, "logits_per_char": -1.0279928207397462, "bits_per_byte": 1.4830801445517545, "num_chars": 10}, {"sum_logits": -12.925999641418457, "num_tokens": 3, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -19.85879135131836, "logits_per_token": -4.308666547139485, "logits_per_char": -0.9943076647244967, "bits_per_byte": 1.4344827370169482, "num_chars": 13}, {"sum_logits": -22.179019927978516, "num_tokens": 4, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -28.096050262451172, "logits_per_token": -5.544754981994629, "logits_per_char": -1.3861887454986572, "bits_per_byte": 1.999847628868443, "num_chars": 16}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1164, "native_id": "Mercury_7108990", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.702820777893066, "logits_per_token_corr": -3.351410388946533, "logits_per_char_corr": -0.7447578642103407, "bits_per_byte_corr": 1.0744584773600865}, "model_output": [{"sum_logits": -6.702820777893066, "num_tokens": 2, "num_tokens_all": 178, "is_greedy": false, "sum_logits_uncond": -16.22152328491211, "logits_per_token": -3.351410388946533, "logits_per_char": -0.7447578642103407, "bits_per_byte": 1.0744584773600865, "num_chars": 9}, {"sum_logits": -6.4843854904174805, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -15.499598503112793, "logits_per_token": -2.16146183013916, "logits_per_char": -0.4987988838782677, "bits_per_byte": 0.7196146761726437, "num_chars": 13}, {"sum_logits": -7.540942192077637, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -17.318767547607422, "logits_per_token": -2.5136473973592124, "logits_per_char": -0.5027294794718424, "bits_per_byte": 0.7252853269432381, "num_chars": 15}, {"sum_logits": -9.191673278808594, "num_tokens": 3, "num_tokens_all": 179, "is_greedy": false, "sum_logits_uncond": -18.509307861328125, "logits_per_token": -3.0638910929361978, "logits_per_char": -0.6127782185872396, "bits_per_byte": 0.8840520971212189, "num_chars": 15}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1165, "native_id": "Mercury_SC_407315", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -28.75657844543457, "logits_per_token_corr": -2.875657844543457, "logits_per_char_corr": -0.4714193187776159, "bits_per_byte_corr": 0.6801143133802084}, "model_output": [{"sum_logits": -28.75657844543457, "num_tokens": 10, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -44.16516876220703, "logits_per_token": -2.875657844543457, "logits_per_char": -0.4714193187776159, "bits_per_byte": 0.6801143133802084, "num_chars": 61}, {"sum_logits": -39.51404571533203, "num_tokens": 9, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -46.253501892089844, "logits_per_token": -4.390449523925781, "logits_per_char": -0.6812766502643454, "bits_per_byte": 0.9828744448105219, "num_chars": 58}, {"sum_logits": -31.387073516845703, "num_tokens": 10, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -44.69103240966797, "logits_per_token": -3.1387073516845705, "logits_per_char": -0.5706740639426492, "bits_per_byte": 0.8233086420146026, "num_chars": 55}, {"sum_logits": -32.76362609863281, "num_tokens": 9, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -49.819190979003906, "logits_per_token": -3.64040289984809, "logits_per_char": -0.5648901051488416, "bits_per_byte": 0.8149641533460644, "num_chars": 58}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1166, "native_id": "Mercury_SC_408663", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -12.554367065429688, "logits_per_token_corr": -1.394929673936632, "logits_per_char_corr": -0.24616406010646447, "bits_per_byte_corr": 0.35513966876094416}, "model_output": [{"sum_logits": -22.144969940185547, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -37.304203033447266, "logits_per_token": -2.460552215576172, "logits_per_char": -0.44289939880371093, "bits_per_byte": 0.6389687662672761, "num_chars": 50}, {"sum_logits": -12.554367065429688, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -30.2501220703125, "logits_per_token": -1.394929673936632, "logits_per_char": -0.24616406010646447, "bits_per_byte": 0.35513966876094416, "num_chars": 51}, {"sum_logits": -17.112136840820312, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -35.03889465332031, "logits_per_token": -1.9013485378689237, "logits_per_char": -0.33553209491804536, "bits_per_byte": 0.48407048939769676, "num_chars": 51}, {"sum_logits": -17.5734806060791, "num_tokens": 9, "num_tokens_all": 204, "is_greedy": false, "sum_logits_uncond": -32.42707061767578, "logits_per_token": -1.9526089562310114, "logits_per_char": -0.3379515501169058, "bits_per_byte": 0.4875610254147483, "num_chars": 52}], "label": 1, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1167, "native_id": "MEA_2013_8_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -9.655945777893066, "logits_per_token_corr": -2.4139864444732666, "logits_per_char_corr": -0.5082076725206877, "bits_per_byte_corr": 0.7331886888878452}, "model_output": [{"sum_logits": -9.655945777893066, "num_tokens": 4, "num_tokens_all": 182, "is_greedy": false, "sum_logits_uncond": -27.150592803955078, "logits_per_token": -2.4139864444732666, "logits_per_char": -0.5082076725206877, "bits_per_byte": 0.7331886888878452, "num_chars": 19}, {"sum_logits": -16.39834976196289, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -34.6353874206543, "logits_per_token": -2.733058293660482, "logits_per_char": -0.6832645734151205, "bits_per_byte": 0.9857424116818155, "num_chars": 24}, {"sum_logits": -10.87271499633789, "num_tokens": 6, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.27190399169922, "logits_per_token": -1.8121191660563152, "logits_per_char": -0.3507327418173513, "bits_per_byte": 0.5060003872976454, "num_chars": 31}, {"sum_logits": -12.786537170410156, "num_tokens": 8, "num_tokens_all": 186, "is_greedy": false, "sum_logits_uncond": -24.30376434326172, "logits_per_token": -1.5983171463012695, "logits_per_char": -0.33648782027395147, "bits_per_byte": 0.48544930962911537, "num_chars": 38}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1168, "native_id": "Mercury_7111125", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_per_byte": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -32.452354431152344, "logits_per_token_corr": -4.056544303894043, "logits_per_char_corr": -0.8321116520808294, "bits_per_byte_corr": 1.2004833539237976}, "model_output": [{"sum_logits": -32.452354431152344, "num_tokens": 8, "num_tokens_all": 187, "is_greedy": false, "sum_logits_uncond": -43.26393127441406, "logits_per_token": -4.056544303894043, "logits_per_char": -0.8321116520808294, "bits_per_byte": 1.2004833539237976, "num_chars": 39}, {"sum_logits": -21.05739974975586, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -29.271364212036133, "logits_per_token": -4.211479949951172, "logits_per_char": -0.6016399928501674, "bits_per_byte": 0.8679830340860316, "num_chars": 35}, {"sum_logits": -29.2064208984375, "num_tokens": 6, "num_tokens_all": 185, "is_greedy": false, "sum_logits_uncond": -37.68607711791992, "logits_per_token": -4.86773681640625, "logits_per_char": -0.8344691685267858, "bits_per_byte": 1.2038845312091955, "num_chars": 35}, {"sum_logits": -33.03318786621094, "num_tokens": 5, "num_tokens_all": 184, "is_greedy": false, "sum_logits_uncond": -32.93962478637695, "logits_per_token": -6.606637573242187, "logits_per_char": -0.7865044730050224, "bits_per_byte": 1.1346861028421484, "num_chars": 42}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1169, "native_id": "LEAP_2009_8_10430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_per_byte": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 1, "no_answer": 0, "sum_logits_corr": -34.75286865234375, "logits_per_token_corr": -2.673297588641827, "logits_per_char_corr": -0.48267873128255206, "bits_per_byte_corr": 0.6963582119644147}, "model_output": [{"sum_logits": -34.75286865234375, "num_tokens": 13, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -59.56840515136719, "logits_per_token": -2.673297588641827, "logits_per_char": -0.48267873128255206, "bits_per_byte": 0.6963582119644147, "num_chars": 72}, {"sum_logits": -31.600374221801758, "num_tokens": 9, "num_tokens_all": 195, "is_greedy": false, "sum_logits_uncond": -54.961395263671875, "logits_per_token": -3.5111526913113065, "logits_per_char": -0.6449055963633011, "bits_per_byte": 0.9304021057155425, "num_chars": 49}, {"sum_logits": -38.8671989440918, "num_tokens": 13, "num_tokens_all": 199, "is_greedy": false, "sum_logits_uncond": -54.5330924987793, "logits_per_token": -2.9897845341609073, "logits_per_char": -0.6169396657792349, "bits_per_byte": 0.8900557963480361, "num_chars": 63}, {"sum_logits": -42.828285217285156, "num_tokens": 17, "num_tokens_all": 203, "is_greedy": false, "sum_logits_uncond": -59.52294921875, "logits_per_token": -2.5193108951344207, "logits_per_char": -0.6691919565200806, "bits_per_byte": 0.9654399170749967, "num_chars": 64}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1170, "native_id": "Mercury_7165218", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_per_byte": 1, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -4.828833103179932, "logits_per_token_corr": -4.828833103179932, "logits_per_char_corr": -0.6898333004542759, "bits_per_byte_corr": 0.9952190816061652}, "model_output": [{"sum_logits": -7.037858486175537, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -14.787230491638184, "logits_per_token": -7.037858486175537, "logits_per_char": -1.0054083551679338, "bits_per_byte": 1.4504976480701497, "num_chars": 7}, {"sum_logits": -8.630500793457031, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -16.19397735595703, "logits_per_token": -8.630500793457031, "logits_per_char": -1.078812599182129, "bits_per_byte": 1.5563975868897084, "num_chars": 8}, {"sum_logits": -4.998935222625732, "num_tokens": 2, "num_tokens_all": 207, "is_greedy": false, "sum_logits_uncond": -17.473323822021484, "logits_per_token": -2.499467611312866, "logits_per_char": -0.9997870445251464, "bits_per_byte": 1.4423878110824984, "num_chars": 5}, {"sum_logits": -4.828833103179932, "num_tokens": 1, "num_tokens_all": 206, "is_greedy": false, "sum_logits_uncond": -16.484729766845703, "logits_per_token": -4.828833103179932, "logits_per_char": -0.6898333004542759, "bits_per_byte": 0.9952190816061652, "num_chars": 7}], "label": 3, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"} {"doc_id": 1171, "native_id": "MEA_2013_8_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_per_byte": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_per_byte": 0, "acc_uncond": 0, "no_answer": 0, "sum_logits_corr": -6.032631874084473, "logits_per_token_corr": -3.0163159370422363, "logits_per_char_corr": -0.6032631874084473, "bits_per_byte_corr": 0.8703248088256615}, "model_output": [{"sum_logits": -6.032631874084473, "num_tokens": 2, "num_tokens_all": 194, "is_greedy": false, "sum_logits_uncond": -14.354490280151367, "logits_per_token": -3.0163159370422363, "logits_per_char": -0.6032631874084473, "bits_per_byte": 0.8703248088256615, "num_chars": 10}, {"sum_logits": -9.420001029968262, "num_tokens": 4, "num_tokens_all": 196, "is_greedy": false, "sum_logits_uncond": -21.195255279541016, "logits_per_token": -2.3550002574920654, "logits_per_char": -0.47100005149841306, "bits_per_byte": 0.6795094385556951, "num_chars": 20}, {"sum_logits": -12.743922233581543, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -31.32278060913086, "logits_per_token": -2.1239870389302573, "logits_per_char": -0.43944559426143254, "bits_per_byte": 0.6339859795819277, "num_chars": 29}, {"sum_logits": -13.923645973205566, "num_tokens": 6, "num_tokens_all": 198, "is_greedy": false, "sum_logits_uncond": -25.349576950073242, "logits_per_token": -2.3206076622009277, "logits_per_char": -0.43511393666267395, "bits_per_byte": 0.6277367186453652, "num_chars": 32}], "label": 0, "task_hash": "3059c7ebe96cbc93600127cf3237e8e1", "model_hash": "91bf664a9e1a7082cab09f7cd7b429f6"}