[ { "loss": 0.7143, "learning_rate": 7.2e-05, "epoch": 0.02, "step": 1 }, { "loss": 0.6911, "learning_rate": 7.2e-05, "epoch": 0.04, "step": 2 }, { "loss": 0.6963, "learning_rate": 7.2e-05, "epoch": 0.05, "step": 3 }, { "loss": 0.678, "learning_rate": 7.2e-05, "epoch": 0.07, "step": 4 }, { "loss": 0.6763, "learning_rate": 7.2e-05, "epoch": 0.09, "step": 5 }, { "loss": 0.6766, "learning_rate": 7.2e-05, "epoch": 0.11, "step": 6 }, { "loss": 0.6475, "learning_rate": 7.2e-05, "epoch": 0.12, "step": 7 }, { "loss": 0.6429, "learning_rate": 7.2e-05, "epoch": 0.14, "step": 8 }, { "loss": 0.6151, "learning_rate": 7.2e-05, "epoch": 0.16, "step": 9 }, { "loss": 0.6278, "learning_rate": 7.2e-05, "epoch": 0.18, "step": 10 }, { "loss": 0.6226, "learning_rate": 7.2e-05, "epoch": 0.19, "step": 11 }, { "loss": 0.6017, "learning_rate": 7.2e-05, "epoch": 0.21, "step": 12 }, { "loss": 0.6445, "learning_rate": 7.2e-05, "epoch": 0.23, "step": 13 }, { "loss": 0.5827, "learning_rate": 7.2e-05, "epoch": 0.25, "step": 14 }, { "loss": 0.5437, "learning_rate": 7.2e-05, "epoch": 0.26, "step": 15 }, { "loss": 0.5466, "learning_rate": 7.2e-05, "epoch": 0.28, "step": 16 }, { "loss": 0.5452, "learning_rate": 7.2e-05, "epoch": 0.3, "step": 17 }, { "loss": 0.5531, "learning_rate": 7.2e-05, "epoch": 0.32, "step": 18 }, { "loss": 0.5615, "learning_rate": 7.2e-05, "epoch": 0.33, "step": 19 }, { "loss": 0.5586, "learning_rate": 7.2e-05, "epoch": 0.35, "step": 20 }, { "loss": 0.5116, "learning_rate": 7.2e-05, "epoch": 0.37, "step": 21 }, { "loss": 0.4264, "learning_rate": 7.2e-05, "epoch": 0.39, "step": 22 }, { "loss": 0.3866, "learning_rate": 7.2e-05, "epoch": 0.4, "step": 23 }, { "loss": 0.5063, "learning_rate": 7.2e-05, "epoch": 0.42, "step": 24 }, { "loss": 0.4082, "learning_rate": 7.2e-05, "epoch": 0.44, "step": 25 }, { "eval_code_easy_loss": 0.4812281131744385, "eval_code_easy_score": -0.14910461008548737, "eval_code_easy_brier_score": 0.14910461008548737, "eval_code_easy_average_probability": 0.6539512872695923, "eval_code_easy_accuracy": 0.85, "eval_code_easy_probabilities": [ 0.7876701354980469, 0.7523844242095947, 0.6472867131233215, 0.7906888723373413, 0.8586155772209167, 0.6918468475341797, 0.4946542978286743, 0.578050434589386, 0.5266337990760803, 0.5107197761535645, 0.505793035030365, 0.45479127764701843, 0.4748489260673523, 0.5174643397331238, 0.4289233088493347, 0.5851827263832092, 0.8242431879043579, 0.9364657402038574, 0.5783424377441406, 0.48775342106819153, 0.4637766480445862, 0.8675320148468018, 0.7406874299049377, 0.7409279346466064, 0.7196134924888611, 0.6729203462600708, 0.7667352557182312, 0.7618107795715332, 0.5047318935394287, 0.8554519414901733, 0.48701024055480957, 0.5184937119483948, 0.26061391830444336, 0.6388080716133118, 0.557035505771637, 0.541324257850647, 0.7926197052001953, 0.9460112452507019, 0.548197865486145, 0.7201337814331055, 0.7818894982337952, 0.6181257367134094, 0.6234219670295715, 0.6032276749610901, 0.5777942538261414, 0.7809272408485413, 0.41414424777030945, 0.5810375809669495, 0.5093526244163513, 0.5954176783561707, 0.5138558745384216, 0.43075305223464966, 0.6626322865486145, 0.6255874037742615, 0.7210639119148254, 0.9159994125366211, 0.8485010266304016, 0.8202032446861267, 0.5092943906784058, 0.8338488936424255, 0.5957618355751038, 0.6853424906730652, 0.6718733310699463, 0.9674582481384277, 0.9621556401252747, 0.9909061193466187, 0.5818647742271423, 0.5197274088859558, 0.5295705795288086, 0.5335188508033752, 0.35347408056259155, 0.20989884436130524, 0.8785598278045654, 0.9439873695373535, 0.8205457329750061, 0.5045193433761597, 0.5271017551422119, 0.7975461483001709, 0.5877244472503662, 0.5780063271522522, 0.4087151885032654, 0.502793550491333, 0.5167631506919861, 0.5050711035728455, 0.6043635010719299, 0.6559382677078247, 0.7178013920783997, 0.6761601567268372, 0.4603518843650818, 0.4613737463951111, 0.8092925548553467, 0.5021629929542542, 0.8891451358795166, 0.8492684960365295, 0.8078585863113403, 0.7930970191955566, 0.9665018916130066, 0.920259416103363, 0.9725727438926697, 0.6322199702262878 ], "eval_code_easy_runtime": 12.1001, "eval_code_easy_samples_per_second": 8.264, "eval_code_easy_steps_per_second": 0.165, "epoch": 0.44, "step": 25 }, { "eval_code_hard_loss": 0.6784273386001587, "eval_code_hard_score": -0.23437322676181793, "eval_code_hard_brier_score": 0.23437322676181793, "eval_code_hard_average_probability": 0.5313071608543396, "eval_code_hard_accuracy": 0.63, "eval_code_hard_probabilities": [ 0.6153736114501953, 0.5013247132301331, 0.5044454336166382, 0.9211879968643188, 0.8884113430976868, 0.9285586476325989, 0.4261794984340668, 0.4360635578632355, 0.5060375928878784, 0.4633253514766693, 0.49218645691871643, 0.5567600727081299, 0.878082811832428, 0.8190033435821533, 0.8284511566162109, 0.5461653470993042, 0.5189316868782043, 0.535224974155426, 0.42352545261383057, 0.37337490916252136, 0.3578466475009918, 0.5396072268486023, 0.5384335517883301, 0.5416436195373535, 0.4356071949005127, 0.5550840497016907, 0.38927942514419556, 0.19190631806850433, 0.525556743144989, 0.563071072101593, 0.5183289647102356, 0.5080891847610474, 0.4980936646461487, 0.4459080696105957, 0.4500540792942047, 0.44474658370018005, 0.35595303773880005, 0.3355133831501007, 0.7131393551826477, 0.5476568937301636, 0.6050811409950256, 0.6322606801986694, 0.5295588374137878, 0.5178883075714111, 0.5216538906097412, 0.6891106367111206, 0.4794466197490692, 0.5025456547737122, 0.7308195233345032, 0.37424957752227783, 0.37053611874580383, 0.5303871631622314, 0.5796085000038147, 0.7150017619132996, 0.4277779161930084, 0.4290866553783417, 0.430403470993042, 0.5324957370758057, 0.5010586380958557, 0.520031750202179, 0.6008161902427673, 0.5889564156532288, 0.5865479111671448, 0.5397530794143677, 0.5804182887077332, 0.44839367270469666, 0.5159912109375, 0.7570854425430298, 0.49450379610061646, 0.6515185236930847, 0.6266943216323853, 0.4965268075466156, 0.5120733976364136, 0.540576696395874, 0.5366629958152771, 0.4482656717300415, 0.46240511536598206, 0.5012778639793396, 0.4999728500843048, 0.4921656548976898, 0.5284487009048462, 0.5068923234939575, 0.46083176136016846, 0.5137518644332886, 0.5790409445762634, 0.515843391418457, 0.542677640914917, 0.5102853178977966, 0.4863445460796356, 0.49948781728744507, 0.5162002444267273, 0.5039041042327881, 0.4971469044685364, 0.4223201274871826, 0.4179053008556366, 0.43632903695106506, 0.5268281102180481, 0.5249642729759216, 0.4914223849773407, 0.532348096370697 ], "eval_code_hard_runtime": 53.2874, "eval_code_hard_samples_per_second": 1.877, "eval_code_hard_steps_per_second": 0.038, "epoch": 0.44, "step": 25 }, { "loss": 0.4493, "learning_rate": 7.2e-05, "epoch": 0.46, "step": 26 }, { "loss": 0.4695, "learning_rate": 7.2e-05, "epoch": 0.47, "step": 27 }, { "loss": 0.3858, "learning_rate": 7.2e-05, "epoch": 0.49, "step": 28 }, { "loss": 0.3683, "learning_rate": 7.2e-05, "epoch": 0.51, "step": 29 }, { "loss": 0.4011, "learning_rate": 7.2e-05, "epoch": 0.53, "step": 30 }, { "loss": 0.346, "learning_rate": 7.2e-05, "epoch": 0.54, "step": 31 }, { "loss": 0.2892, "learning_rate": 7.2e-05, "epoch": 0.56, "step": 32 }, { "loss": 0.4798, "learning_rate": 7.2e-05, "epoch": 0.58, "step": 33 }, { "loss": 0.3073, "learning_rate": 7.2e-05, "epoch": 0.6, "step": 34 }, { "loss": 0.3376, "learning_rate": 7.2e-05, "epoch": 0.61, "step": 35 }, { "loss": 0.2565, "learning_rate": 7.2e-05, "epoch": 0.63, "step": 36 }, { "loss": 0.1994, "learning_rate": 7.2e-05, "epoch": 0.65, "step": 37 }, { "loss": 0.3296, "learning_rate": 7.2e-05, "epoch": 0.67, "step": 38 }, { "loss": 0.2029, "learning_rate": 7.2e-05, "epoch": 0.68, "step": 39 }, { "loss": 0.2739, "learning_rate": 7.2e-05, "epoch": 0.7, "step": 40 }, { "loss": 0.1792, "learning_rate": 7.2e-05, "epoch": 0.72, "step": 41 }, { "loss": 0.1962, "learning_rate": 7.2e-05, "epoch": 0.74, "step": 42 }, { "loss": 0.2281, "learning_rate": 7.2e-05, "epoch": 0.75, "step": 43 }, { "loss": 0.1416, "learning_rate": 7.2e-05, "epoch": 0.77, "step": 44 }, { "loss": 0.2981, "learning_rate": 7.2e-05, "epoch": 0.79, "step": 45 }, { "loss": 0.2767, "learning_rate": 7.2e-05, "epoch": 0.81, "step": 46 }, { "loss": 0.2888, "learning_rate": 7.2e-05, "epoch": 0.82, "step": 47 }, { "loss": 0.3354, "learning_rate": 7.2e-05, "epoch": 0.84, "step": 48 }, { "loss": 0.2017, "learning_rate": 7.2e-05, "epoch": 0.86, "step": 49 }, { "loss": 0.1569, "learning_rate": 7.2e-05, "epoch": 0.88, "step": 50 }, { "eval_code_easy_loss": 0.366973876953125, "eval_code_easy_score": -0.11782870441675186, "eval_code_easy_brier_score": 0.11782870441675186, "eval_code_easy_average_probability": 0.7787162065505981, "eval_code_easy_accuracy": 0.82, "eval_code_easy_probabilities": [ 0.9949461817741394, 0.9871755242347717, 0.9638071060180664, 0.9957897067070007, 0.9965255856513977, 0.9852939248085022, 0.6930150985717773, 0.9989522695541382, 0.8558874130249023, 0.4613070785999298, 0.510718822479248, 0.2324236035346985, 0.3973901569843292, 0.7853201031684875, 0.5421555042266846, 0.7354709506034851, 0.9784907698631287, 0.9970309734344482, 0.5461677312850952, 0.41464078426361084, 0.30067187547683716, 0.9925328493118286, 0.9674832224845886, 0.9843401908874512, 0.9914712309837341, 0.9825579524040222, 0.9267136454582214, 0.9984757304191589, 0.9873777031898499, 0.9998621940612793, 0.47989073395729065, 0.6140918731689453, 0.39963647723197937, 0.7302553653717041, 0.6465033292770386, 0.5506258606910706, 0.9697403311729431, 0.961764395236969, 0.4946594834327698, 0.8810425400733948, 0.8602923154830933, 0.7874042987823486, 0.9162008166313171, 0.9948663711547852, 0.9439269304275513, 0.9370085000991821, 0.34556975960731506, 0.10199112445116043, 0.6533010005950928, 0.9573982954025269, 0.7248471975326538, 0.2611381411552429, 0.9998348951339722, 0.9983940720558167, 0.9555704593658447, 0.999995231628418, 0.999896764755249, 0.9813823699951172, 0.5570517778396606, 0.9944754242897034, 0.6928525567054749, 0.9708505272865295, 0.9566367268562317, 0.999937891960144, 0.9999886751174927, 0.9999959468841553, 0.9741113781929016, 0.7828025221824646, 0.8869524002075195, 0.6031803488731384, 0.13089396059513092, 0.015088181011378765, 0.9989626407623291, 0.9993138313293457, 0.9666383862495422, 0.7539190649986267, 0.6800207495689392, 0.7904426455497742, 0.9921445250511169, 0.8796939849853516, 0.9624642133712769, 0.48169296979904175, 0.5490749478340149, 0.5619942545890808, 0.7675090432167053, 0.7946746349334717, 0.8194403648376465, 0.787605881690979, 0.2022314816713333, 0.3003339469432831, 0.999944806098938, 0.3906439542770386, 0.9998724460601807, 0.9990270137786865, 0.9992762207984924, 0.9981435537338257, 0.9994109869003296, 0.9930939674377441, 0.9995924830436707, 0.2884075939655304 ], "eval_code_easy_runtime": 12.0912, "eval_code_easy_samples_per_second": 8.27, "eval_code_easy_steps_per_second": 0.165, "epoch": 0.88, "step": 50 }, { "eval_code_hard_loss": 0.7648758888244629, "eval_code_hard_score": -0.2455763965845108, "eval_code_hard_brier_score": 0.2455763965845108, "eval_code_hard_average_probability": 0.5492634773254395, "eval_code_hard_accuracy": 0.58, "eval_code_hard_probabilities": [ 0.9692575335502625, 0.5152681469917297, 0.5391058921813965, 0.9931036829948425, 0.9909272193908691, 0.9973450303077698, 0.37631839513778687, 0.33356037735939026, 0.5611117482185364, 0.4919166564941406, 0.4896278977394104, 0.546688973903656, 0.9802632927894592, 0.9941868782043457, 0.9953175783157349, 0.5917196869850159, 0.556675374507904, 0.5831913352012634, 0.7293359041213989, 0.47196894884109497, 0.4489763081073761, 0.768726646900177, 0.5400140285491943, 0.6983548998832703, 0.38701897859573364, 0.6672759056091309, 0.38983210921287537, 0.005684987176209688, 0.6010024547576904, 0.6203393936157227, 0.590334415435791, 0.5500776767730713, 0.4848686158657074, 0.49481847882270813, 0.4460141360759735, 0.41459348797798157, 0.2456727772951126, 0.24025699496269226, 0.5689725875854492, 0.4383637607097626, 0.5424249172210693, 0.4941108524799347, 0.5239763855934143, 0.4846762716770172, 0.4753747582435608, 0.9676743149757385, 0.40851080417633057, 0.4941249489784241, 0.8557487726211548, 0.8221828937530518, 0.8013899922370911, 0.49702978134155273, 0.3039790391921997, 0.30129650235176086, 0.02055172249674797, 0.021210504695773125, 0.021720753982663155, 0.575912594795227, 0.5162267088890076, 0.5338138341903687, 0.8233920335769653, 0.8342075347900391, 0.8165725469589233, 0.6269811391830444, 0.6718840599060059, 0.5006963014602661, 0.3764708340167999, 0.48138299584388733, 0.33396115899086, 0.749821126461029, 0.6757022142410278, 0.48911863565444946, 0.5064474940299988, 0.62013179063797, 0.5993181467056274, 0.4782550632953644, 0.4940183758735657, 0.5602287650108337, 0.5086959004402161, 0.46642473340034485, 0.5594733953475952, 0.5183232426643372, 0.5857313275337219, 0.5656166076660156, 0.5952458381652832, 0.5976989269256592, 0.5781928300857544, 0.48465627431869507, 0.5290895700454712, 0.5022053718566895, 0.47955581545829773, 0.465986043214798, 0.46290040016174316, 0.33050066232681274, 0.34588003158569336, 0.387125700712204, 0.6185724139213562, 0.683625340461731, 0.32840457558631897, 0.723832905292511 ], "eval_code_hard_runtime": 53.3211, "eval_code_hard_samples_per_second": 1.875, "eval_code_hard_steps_per_second": 0.038, "epoch": 0.88, "step": 50 }, { "loss": 0.2551, "learning_rate": 7.2e-05, "epoch": 0.89, "step": 51 }, { "loss": 0.1634, "learning_rate": 7.2e-05, "epoch": 0.91, "step": 52 }, { "loss": 0.3866, "learning_rate": 7.2e-05, "epoch": 0.93, "step": 53 }, { "loss": 0.4277, "learning_rate": 7.2e-05, "epoch": 0.95, "step": 54 }, { "loss": 0.2225, "learning_rate": 7.2e-05, "epoch": 0.96, "step": 55 }, { "loss": 0.1702, "learning_rate": 7.2e-05, "epoch": 0.98, "step": 56 }, { "loss": 0.0772, "learning_rate": 7.2e-05, "epoch": 1.0, "step": 57 }, { "loss": 0.082, "learning_rate": 7.2e-05, "epoch": 1.02, "step": 58 }, { "loss": 0.3056, "learning_rate": 7.2e-05, "epoch": 1.04, "step": 59 }, { "loss": 0.1856, "learning_rate": 7.2e-05, "epoch": 1.05, "step": 60 }, { "loss": 0.1862, "learning_rate": 7.2e-05, "epoch": 1.07, "step": 61 }, { "loss": 0.1939, "learning_rate": 7.2e-05, "epoch": 1.09, "step": 62 }, { "loss": 0.1331, "learning_rate": 7.2e-05, "epoch": 1.11, "step": 63 }, { "loss": 0.2338, "learning_rate": 7.2e-05, "epoch": 1.12, "step": 64 }, { "loss": 0.1673, "learning_rate": 7.2e-05, "epoch": 1.14, "step": 65 }, { "loss": 0.3696, "learning_rate": 7.2e-05, "epoch": 1.16, "step": 66 }, { "loss": 0.1056, "learning_rate": 7.2e-05, "epoch": 1.18, "step": 67 }, { "loss": 0.1094, "learning_rate": 7.2e-05, "epoch": 1.19, "step": 68 }, { "loss": 0.0796, "learning_rate": 7.2e-05, "epoch": 1.21, "step": 69 }, { "loss": 0.2261, "learning_rate": 7.2e-05, "epoch": 1.23, "step": 70 }, { "loss": 0.1961, "learning_rate": 7.2e-05, "epoch": 1.25, "step": 71 }, { "loss": 0.1452, "learning_rate": 7.2e-05, "epoch": 1.26, "step": 72 }, { "loss": 0.1716, "learning_rate": 7.2e-05, "epoch": 1.28, "step": 73 }, { "loss": 0.1421, "learning_rate": 7.2e-05, "epoch": 1.3, "step": 74 }, { "loss": 0.3305, "learning_rate": 7.2e-05, "epoch": 1.32, "step": 75 }, { "eval_code_easy_loss": 0.32117652893066406, "eval_code_easy_score": -0.09214462339878082, "eval_code_easy_brier_score": 0.09214462339878082, "eval_code_easy_average_probability": 0.8216841220855713, "eval_code_easy_accuracy": 0.88, "eval_code_easy_probabilities": [ 0.9994999170303345, 0.9912821054458618, 0.9577344059944153, 0.9980210065841675, 0.9973551034927368, 0.9871847033500671, 0.6745311617851257, 0.9999939203262329, 0.9304938912391663, 0.5086526870727539, 0.5627270340919495, 0.2480248361825943, 0.49823904037475586, 0.8804905414581299, 0.5820022821426392, 0.748561680316925, 0.9955660700798035, 0.9997114539146423, 0.8057821393013, 0.5374618768692017, 0.2833597958087921, 0.9967227578163147, 0.9863321781158447, 0.9948417544364929, 0.9996953010559082, 0.9988725781440735, 0.9955350160598755, 0.9988835453987122, 0.9974863529205322, 0.9998946189880371, 0.8057632446289062, 0.7699967622756958, 0.6971409916877747, 0.7715607285499573, 0.6907534599304199, 0.5492137670516968, 0.9612501263618469, 0.9800459146499634, 0.6639916896820068, 0.992854654788971, 0.9864367842674255, 0.9040735363960266, 0.9259136319160461, 0.9870310425758362, 0.8883805274963379, 0.9945270419120789, 0.48652422428131104, 0.044553183019161224, 0.6629449129104614, 0.9678350687026978, 0.7551115155220032, 0.35034820437431335, 0.9999849796295166, 0.9999747276306152, 0.9967097043991089, 0.9999997615814209, 0.9999991655349731, 0.9840741157531738, 0.5545885562896729, 0.9983722567558289, 0.6852497458457947, 0.9241886138916016, 0.8779072165489197, 0.9999986886978149, 0.9999998807907104, 1.0, 0.9829393625259399, 0.9291936755180359, 0.9386154413223267, 0.6534070372581482, 0.10347169637680054, 0.0036570930387824774, 0.9999821186065674, 0.9999948740005493, 0.9956468939781189, 0.9652343988418579, 0.9181939363479614, 0.9902083873748779, 0.9989393353462219, 0.853695273399353, 0.9876751899719238, 0.4735547602176666, 0.5543529391288757, 0.5561679601669312, 0.8694782853126526, 0.9327576756477356, 0.9288151264190674, 0.8425791263580322, 0.2500445544719696, 0.4530153274536133, 0.9999988079071045, 0.4624493420124054, 0.9999938011169434, 0.9999393224716187, 0.9999752044677734, 0.9998465776443481, 0.999996542930603, 0.9989469647407532, 0.999995231628418, 0.5134104490280151 ], "eval_code_easy_runtime": 12.0923, "eval_code_easy_samples_per_second": 8.27, "eval_code_easy_steps_per_second": 0.165, "epoch": 1.32, "step": 75 }, { "eval_code_hard_loss": 0.8514172434806824, "eval_code_hard_score": -0.2549550235271454, "eval_code_hard_brier_score": 0.2549550235271454, "eval_code_hard_average_probability": 0.5471080541610718, "eval_code_hard_accuracy": 0.63, "eval_code_hard_probabilities": [ 0.9908287525177002, 0.5119895339012146, 0.5428490042686462, 0.9895175099372864, 0.9891449809074402, 0.9949464201927185, 0.38589784502983093, 0.2820265591144562, 0.5328211784362793, 0.5014193058013916, 0.5086326003074646, 0.5764115452766418, 0.9859844446182251, 0.9957466721534729, 0.9967710375785828, 0.5794450640678406, 0.5391579270362854, 0.6133323311805725, 0.6606585383415222, 0.34289708733558655, 0.2988581657409668, 0.788067102432251, 0.542411744594574, 0.7060100436210632, 0.42542076110839844, 0.7323933839797974, 0.42784348130226135, 0.0037993849255144596, 0.6944400072097778, 0.7083581686019897, 0.5933657288551331, 0.5459165573120117, 0.5039231181144714, 0.4072609841823578, 0.3503766357898712, 0.35972559452056885, 0.1359257698059082, 0.13460229337215424, 0.4615738093852997, 0.38321685791015625, 0.37138813734054565, 0.5392828583717346, 0.5346622467041016, 0.4690147042274475, 0.4728128910064697, 0.9811822772026062, 0.36270952224731445, 0.5393079519271851, 0.8588457703590393, 0.9537001252174377, 0.9503171443939209, 0.4992208480834961, 0.16286799311637878, 0.3865070939064026, 0.0036291300784796476, 0.0037814583629369736, 0.003941776696592569, 0.6453168392181396, 0.5069921016693115, 0.5409190654754639, 0.620768666267395, 0.6463491320610046, 0.6067190766334534, 0.6744793653488159, 0.7356851696968079, 0.6201030611991882, 0.34589684009552, 0.4320986270904541, 0.2923872470855713, 0.8295382857322693, 0.7723568677902222, 0.48901495337486267, 0.5491488575935364, 0.6467846035957336, 0.6319132447242737, 0.5377576351165771, 0.5089011788368225, 0.6081604957580566, 0.5122814774513245, 0.45855069160461426, 0.5822516083717346, 0.5393768548965454, 0.579654335975647, 0.5982511043548584, 0.6504716873168945, 0.658900797367096, 0.6075130105018616, 0.4958891272544861, 0.5496628880500793, 0.5090362429618835, 0.46470561623573303, 0.4267897605895996, 0.43224194645881653, 0.3274325728416443, 0.35687658190727234, 0.4236689805984497, 0.6108702421188354, 0.7473148107528687, 0.3625641465187073, 0.7580684423446655 ], "eval_code_hard_runtime": 53.2977, "eval_code_hard_samples_per_second": 1.876, "eval_code_hard_steps_per_second": 0.038, "epoch": 1.32, "step": 75 }, { "loss": 0.149, "learning_rate": 7.2e-05, "epoch": 1.33, "step": 76 }, { "loss": 0.2295, "learning_rate": 7.2e-05, "epoch": 1.35, "step": 77 }, { "loss": 0.1289, "learning_rate": 7.2e-05, "epoch": 1.37, "step": 78 }, { "loss": 0.0937, "learning_rate": 7.2e-05, "epoch": 1.39, "step": 79 }, { "loss": 0.2041, "learning_rate": 7.2e-05, "epoch": 1.4, "step": 80 }, { "loss": 0.2389, "learning_rate": 7.2e-05, "epoch": 1.42, "step": 81 }, { "loss": 0.1343, "learning_rate": 7.2e-05, "epoch": 1.44, "step": 82 }, { "loss": 0.0283, "learning_rate": 7.2e-05, "epoch": 1.46, "step": 83 }, { "loss": 0.0841, "learning_rate": 7.2e-05, "epoch": 1.47, "step": 84 }, { "loss": 0.3316, "learning_rate": 7.2e-05, "epoch": 1.49, "step": 85 }, { "loss": 0.0393, "learning_rate": 7.2e-05, "epoch": 1.51, "step": 86 }, { "loss": 0.0755, "learning_rate": 7.2e-05, "epoch": 1.53, "step": 87 }, { "loss": 0.1595, "learning_rate": 7.2e-05, "epoch": 1.54, "step": 88 }, { "loss": 0.1434, "learning_rate": 7.2e-05, "epoch": 1.56, "step": 89 }, { "loss": 0.1001, "learning_rate": 7.2e-05, "epoch": 1.58, "step": 90 }, { "loss": 0.1532, "learning_rate": 7.2e-05, "epoch": 1.6, "step": 91 }, { "loss": 0.1773, "learning_rate": 7.2e-05, "epoch": 1.61, "step": 92 }, { "loss": 0.1351, "learning_rate": 7.2e-05, "epoch": 1.63, "step": 93 }, { "loss": 0.0533, "learning_rate": 7.2e-05, "epoch": 1.65, "step": 94 }, { "loss": 0.1834, "learning_rate": 7.2e-05, "epoch": 1.67, "step": 95 }, { "loss": 0.1847, "learning_rate": 7.2e-05, "epoch": 1.68, "step": 96 }, { "loss": 0.072, "learning_rate": 7.2e-05, "epoch": 1.7, "step": 97 }, { "loss": 0.0431, "learning_rate": 7.2e-05, "epoch": 1.72, "step": 98 }, { "loss": 0.0796, "learning_rate": 7.2e-05, "epoch": 1.74, "step": 99 }, { "loss": 0.1992, "learning_rate": 7.2e-05, "epoch": 1.75, "step": 100 }, { "eval_code_easy_loss": 0.3075363039970398, "eval_code_easy_score": -0.07319054007530212, "eval_code_easy_brier_score": 0.07319054007530212, "eval_code_easy_average_probability": 0.8647841811180115, "eval_code_easy_accuracy": 0.92, "eval_code_easy_probabilities": [ 0.9999574422836304, 0.9999752044677734, 0.9896870255470276, 0.9995444416999817, 0.9996813535690308, 0.9912283420562744, 0.8707631230354309, 1.0, 0.9232298135757446, 0.8876208066940308, 0.7544382810592651, 0.8350661993026733, 0.8029730319976807, 0.9684423804283142, 0.8641867637634277, 0.840520977973938, 0.9997034668922424, 0.9998855590820312, 0.8682356476783752, 0.5141866207122803, 0.25204265117645264, 0.9994714856147766, 0.99544358253479, 0.9964646100997925, 0.9997562766075134, 0.999864935874939, 0.9721921682357788, 0.9998952150344849, 0.9750569462776184, 0.9999854564666748, 0.7429996132850647, 0.822045624256134, 0.7596055865287781, 0.8471194505691528, 0.7925592660903931, 0.795616865158081, 0.976042628288269, 0.9971067309379578, 0.5366230607032776, 0.9954087138175964, 0.977104127407074, 0.9664822816848755, 0.9948425889015198, 0.9999990463256836, 0.9974696636199951, 0.9896783828735352, 0.7055782675743103, 0.010203487239778042, 0.9344508051872253, 0.999211311340332, 0.9515018463134766, 0.5253821611404419, 1.0, 0.9999994039535522, 0.9999967813491821, 1.0, 1.0, 0.9986414313316345, 0.5329476594924927, 0.9997962117195129, 0.9566650986671448, 0.9977226853370667, 0.9972772002220154, 1.0, 1.0, 1.0, 0.9998082518577576, 0.9984453320503235, 0.9989882111549377, 0.6647514700889587, 0.06555651873350143, 0.0002535696839913726, 0.9999556541442871, 0.9999914169311523, 0.9952349066734314, 0.9737288951873779, 0.9164130091667175, 0.9624624252319336, 0.9998660087585449, 0.9491363763809204, 0.9998786449432373, 0.4198010265827179, 0.5238525867462158, 0.5886799693107605, 0.8423507213592529, 0.9458127617835999, 0.9036057591438293, 0.9418210387229919, 0.18787764012813568, 0.38167285919189453, 1.0, 0.3317376375198364, 0.9999998807907104, 0.9999892711639404, 0.9999970197677612, 0.9999692440032959, 0.9997808337211609, 0.9896361231803894, 0.9996919631958008, 0.7980904579162598 ], "eval_code_easy_runtime": 12.0952, "eval_code_easy_samples_per_second": 8.268, "eval_code_easy_steps_per_second": 0.165, "epoch": 1.75, "step": 100 }, { "eval_code_hard_loss": 0.8713796734809875, "eval_code_hard_score": -0.23470951616764069, "eval_code_hard_brier_score": 0.23470951616764069, "eval_code_hard_average_probability": 0.5856627821922302, "eval_code_hard_accuracy": 0.65, "eval_code_hard_probabilities": [ 0.9855136275291443, 0.49629148840904236, 0.5387540459632874, 0.9978917241096497, 0.9978112578392029, 0.9988699555397034, 0.47865962982177734, 0.3495783805847168, 0.606380045413971, 0.5135536789894104, 0.5702087879180908, 0.6042059659957886, 0.9980809688568115, 0.99955815076828, 0.9996216297149658, 0.5243353247642517, 0.48392897844314575, 0.6072529554367065, 0.9629855751991272, 0.8894397616386414, 0.8946234583854675, 0.8234318494796753, 0.5752303004264832, 0.7489052414894104, 0.4938078820705414, 0.764939546585083, 0.49246543645858765, 0.0005044332938268781, 0.6989362835884094, 0.6961206197738647, 0.695743203163147, 0.5966794490814209, 0.4954136610031128, 0.428554505109787, 0.3025328516960144, 0.31304794549942017, 0.05157984420657158, 0.057667434215545654, 0.295099675655365, 0.4117719233036041, 0.4178787171840668, 0.44591596722602844, 0.5260471701622009, 0.46641460061073303, 0.44238483905792236, 0.9921270608901978, 0.3312618136405945, 0.49680960178375244, 0.8725456595420837, 0.990997314453125, 0.9901727437973022, 0.42896220088005066, 0.10010050982236862, 0.30535706877708435, 0.000553447927813977, 0.000573858036659658, 0.000650162051897496, 0.6373094916343689, 0.5249813199043274, 0.5481424331665039, 0.9771735668182373, 0.9809019565582275, 0.9766740798950195, 0.7092797756195068, 0.7087807059288025, 0.700412392616272, 0.4039199948310852, 0.626448929309845, 0.3165664076805115, 0.8700190782546997, 0.7991524934768677, 0.5164951086044312, 0.5391688346862793, 0.6079928874969482, 0.6740086674690247, 0.5694684386253357, 0.5098953247070312, 0.6005474925041199, 0.5051020979881287, 0.4350145161151886, 0.6045871376991272, 0.5573720932006836, 0.763244092464447, 0.6566476225852966, 0.655601978302002, 0.6529899835586548, 0.6917677521705627, 0.4615822732448578, 0.5680032968521118, 0.5078142285346985, 0.48479217290878296, 0.437853068113327, 0.4693388044834137, 0.5179882645606995, 0.5434749126434326, 0.650637686252594, 0.6607488393783569, 0.7874378561973572, 0.1389518529176712, 0.76926189661026 ], "eval_code_hard_runtime": 53.3137, "eval_code_hard_samples_per_second": 1.876, "eval_code_hard_steps_per_second": 0.038, "epoch": 1.75, "step": 100 }, { "train_runtime": 1129.7886, "train_samples_per_second": 2.832, "train_steps_per_second": 0.089, "total_flos": 0.0, "train_loss": 0.30551302678883074, "epoch": 1.75, "step": 100 } ]