[ { "epoch": 0.02, "learning_rate": 0.0002, "loss": 0.7109, "step": 1 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 0.7151, "step": 2 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 0.7036, "step": 3 }, { "epoch": 0.07, "learning_rate": 0.0002, "loss": 0.6805, "step": 4 }, { "epoch": 0.09, "learning_rate": 0.0002, "loss": 0.6678, "step": 5 }, { "epoch": 0.11, "learning_rate": 0.0002, "loss": 0.6692, "step": 6 }, { "epoch": 0.12, "learning_rate": 0.0002, "loss": 0.6459, "step": 7 }, { "epoch": 0.14, "learning_rate": 0.0002, "loss": 0.6262, "step": 8 }, { "epoch": 0.16, "learning_rate": 0.0002, "loss": 0.6574, "step": 9 }, { "epoch": 0.18, "learning_rate": 0.0002, "loss": 0.6311, "step": 10 }, { "epoch": 0.19, "learning_rate": 0.0002, "loss": 0.5219, "step": 11 }, { "epoch": 0.21, "learning_rate": 0.0002, "loss": 0.5933, "step": 12 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.5881, "step": 13 }, { "epoch": 0.25, "learning_rate": 0.0002, "loss": 0.6345, "step": 14 }, { "epoch": 0.26, "learning_rate": 0.0002, "loss": 0.559, "step": 15 }, { "epoch": 0.28, "learning_rate": 0.0002, "loss": 0.5686, "step": 16 }, { "epoch": 0.3, "learning_rate": 0.0002, "loss": 0.577, "step": 17 }, { "epoch": 0.32, "learning_rate": 0.0002, "loss": 0.4884, "step": 18 }, { "epoch": 0.33, "learning_rate": 0.0002, "loss": 0.4593, "step": 19 }, { "epoch": 0.35, "learning_rate": 0.0002, "loss": 0.4582, "step": 20 }, { "epoch": 0.37, "learning_rate": 0.0002, "loss": 0.428, "step": 21 }, { "epoch": 0.39, "learning_rate": 0.0002, "loss": 0.495, "step": 22 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.4683, "step": 23 }, { "epoch": 0.42, "learning_rate": 0.0002, "loss": 0.3779, "step": 24 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 0.5694, "step": 25 }, { "epoch": 0.44, "eval_code_accuracy": 0.81, "eval_code_average_probability": 0.6998826861381531, "eval_code_brier_score": 0.13871249556541443, "eval_code_loss": 0.392591655254364, "eval_code_probabilities": [ 0.9998756647109985, 0.9996539354324341, 0.9968274235725403, 0.867797315120697, 0.604396641254425, 0.8194839358329773, 0.9978196620941162, 0.9973512887954712, 0.8165198564529419, 0.929094672203064, 0.9696783423423767, 0.8749808073043823, 0.9949828386306763, 0.9988803267478943, 0.9974466562271118, 0.5661922693252563, 0.7031828761100769, 0.7817732095718384, 0.7952773571014404, 0.700941801071167, 0.5993526577949524, 0.4928004741668701, 0.7662289142608643, 0.7449039816856384, 0.26918813586235046, 0.23834775388240814, 0.21881021559238434, 0.5174687504768372, 0.48715656995773315, 0.5120030045509338, 0.3874703049659729, 0.46171045303344727, 0.6353215575218201, 0.9995631575584412, 0.9988592863082886, 0.9959748387336731, 0.9921092391014099, 0.9928039908409119, 0.9970149993896484, 0.5361520648002625, 0.6019002795219421, 0.4536624550819397, 0.6162904500961304, 0.5367547869682312, 0.620251476764679, 0.5513021349906921, 0.5809685587882996, 0.5933180451393127, 0.726946234703064, 0.6555966138839722, 0.999262273311615, 0.9996010661125183, 0.9998406171798706, 0.7900382280349731, 0.5323582291603088, 0.5186650156974792, 0.5038493275642395, 0.9872013330459595, 0.5808623433113098, 0.9900431632995605, 0.9054338335990906, 0.8925741910934448, 0.8963204026222229, 0.48921719193458557, 0.565790057182312, 0.544992208480835, 0.6663545370101929, 0.49584266543388367, 0.4396786093711853, 0.5050247311592102, 0.4423765242099762, 0.5252822637557983, 0.5083388686180115, 0.5204058885574341, 0.4985828101634979, 0.9953283071517944, 0.9802095890045166, 0.9042825698852539, 0.9987348914146423, 0.9832821488380432, 0.9921707510948181, 0.5249463319778442, 0.496512770652771, 0.529527485370636, 0.6597751975059509, 0.9941974878311157, 0.8405609130859375, 0.6745859384536743, 0.6913660168647766, 0.4706657826900482, 0.6228535771369934, 0.7330396175384521, 0.5534575581550598, 0.4912925362586975, 0.5196889042854309, 0.5061907768249512, 0.527052640914917, 0.46116262674331665, 0.40944358706474304, 0.4376073181629181 ], "eval_code_runtime": 105.7673, "eval_code_samples_per_second": 0.945, "eval_code_score": -0.13871249556541443, "eval_code_steps_per_second": 0.038, "step": 25 }, { "epoch": 0.46, "learning_rate": 0.0002, "loss": 0.3947, "step": 26 }, { "epoch": 0.47, "learning_rate": 0.0002, "loss": 0.4236, "step": 27 }, { "epoch": 0.49, "learning_rate": 0.0002, "loss": 0.349, "step": 28 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 0.3414, "step": 29 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.5019, "step": 30 }, { "epoch": 0.54, "learning_rate": 0.0002, "loss": 0.3768, "step": 31 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.3902, "step": 32 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 0.2838, "step": 33 }, { "epoch": 0.6, "learning_rate": 0.0002, "loss": 0.1972, "step": 34 }, { "epoch": 0.61, "learning_rate": 0.0002, "loss": 0.3796, "step": 35 }, { "epoch": 0.63, "learning_rate": 0.0002, "loss": 0.3741, "step": 36 }, { "epoch": 0.65, "learning_rate": 0.0002, "loss": 0.2624, "step": 37 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.2505, "step": 38 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 0.2687, "step": 39 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 0.2856, "step": 40 }, { "epoch": 0.72, "learning_rate": 0.0002, "loss": 0.1638, "step": 41 }, { "epoch": 0.74, "learning_rate": 0.0002, "loss": 0.2595, "step": 42 }, { "epoch": 0.75, "learning_rate": 0.0002, "loss": 0.2886, "step": 43 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 0.1604, "step": 44 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.2069, "step": 45 }, { "epoch": 0.81, "learning_rate": 0.0002, "loss": 0.2285, "step": 46 }, { "epoch": 0.82, "learning_rate": 0.0002, "loss": 0.1975, "step": 47 }, { "epoch": 0.84, "learning_rate": 0.0002, "loss": 0.1726, "step": 48 }, { "epoch": 0.86, "learning_rate": 0.0002, "loss": 0.0881, "step": 49 }, { "epoch": 0.88, "learning_rate": 0.0002, "loss": 0.2712, "step": 50 }, { "epoch": 0.88, "eval_code_accuracy": 0.93, "eval_code_average_probability": 0.8494662642478943, "eval_code_brier_score": 0.06509324163198471, "eval_code_loss": 0.18686418235301971, "eval_code_probabilities": [ 0.9999998807907104, 0.9999940395355225, 0.9999998807907104, 0.999998927116394, 0.9994799494743347, 0.9999616146087646, 1.0, 0.9999617338180542, 0.997948944568634, 0.9999988079071045, 1.0, 0.9999945163726807, 0.9999972581863403, 0.9999998807907104, 0.9972482323646545, 0.8824266195297241, 0.5207855105400085, 0.9811423420906067, 0.9948011636734009, 0.9970309734344482, 0.9518866539001465, 0.9435040950775146, 0.9998762607574463, 0.9999566078186035, 0.5264937281608582, 0.3567848801612854, 0.2969517111778259, 0.5564099550247192, 0.5566191673278809, 0.6488668918609619, 0.2498033493757248, 0.8209301233291626, 0.9682015776634216, 0.999983549118042, 0.998921275138855, 0.9983420372009277, 0.9999994039535522, 1.0, 1.0, 0.9042081236839294, 0.9448812007904053, 0.8440216779708862, 0.9969866871833801, 0.935389518737793, 0.9999938011169434, 0.8614246249198914, 0.9949424862861633, 0.9876769781112671, 0.99991774559021, 0.9980294108390808, 0.9999949932098389, 1.0, 1.0, 0.9999998807907104, 0.6444177627563477, 0.5885342359542847, 0.6353741884231567, 0.9999998807907104, 0.6010631322860718, 0.999997615814209, 0.9806910753250122, 0.9655673503875732, 0.9658817648887634, 0.829046368598938, 0.9939888715744019, 0.8807443380355835, 0.9380287528038025, 0.7914523482322693, 0.3484261929988861, 0.6201522350311279, 0.34111207723617554, 0.7237049341201782, 0.6027003526687622, 0.8865858912467957, 0.632958710193634, 1.0, 0.9925962090492249, 0.9999537467956543, 1.0, 0.999997615814209, 0.9999998807907104, 0.6945015788078308, 0.5074270963668823, 0.7571401000022888, 0.9955568909645081, 0.9997404217720032, 0.9955176711082458, 0.7693279385566711, 0.8310465812683105, 0.5401557683944702, 0.9749163389205933, 0.963936448097229, 0.9640229940414429, 0.5689054131507874, 0.8011016249656677, 0.6685754060745239, 0.6061564087867737, 0.44276222586631775, 0.4091780185699463, 0.7819089889526367 ], "eval_code_runtime": 105.8373, "eval_code_samples_per_second": 0.945, "eval_code_score": -0.06509324163198471, "eval_code_steps_per_second": 0.038, "step": 50 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 0.181, "step": 51 }, { "epoch": 0.91, "learning_rate": 0.0002, "loss": 0.2069, "step": 52 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.2289, "step": 53 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 0.2987, "step": 54 }, { "epoch": 0.96, "learning_rate": 0.0002, "loss": 0.3474, "step": 55 }, { "epoch": 0.98, "learning_rate": 0.0002, "loss": 0.284, "step": 56 }, { "epoch": 1.0, "learning_rate": 0.0002, "loss": 0.7535, "step": 57 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 0.1286, "step": 58 }, { "epoch": 1.04, "learning_rate": 0.0002, "loss": 0.0816, "step": 59 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 0.0607, "step": 60 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.0772, "step": 61 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 0.1129, "step": 62 }, { "epoch": 1.11, "learning_rate": 0.0002, "loss": 0.0509, "step": 63 }, { "epoch": 1.12, "learning_rate": 0.0002, "loss": 0.1928, "step": 64 }, { "epoch": 1.14, "learning_rate": 0.0002, "loss": 0.0945, "step": 65 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 0.1268, "step": 66 }, { "epoch": 1.18, "learning_rate": 0.0002, "loss": 0.1186, "step": 67 }, { "epoch": 1.19, "learning_rate": 0.0002, "loss": 0.1123, "step": 68 }, { "epoch": 1.21, "learning_rate": 0.0002, "loss": 0.1512, "step": 69 }, { "epoch": 1.23, "learning_rate": 0.0002, "loss": 0.1303, "step": 70 }, { "epoch": 1.25, "learning_rate": 0.0002, "loss": 0.0441, "step": 71 }, { "epoch": 1.26, "learning_rate": 0.0002, "loss": 0.1391, "step": 72 }, { "epoch": 1.28, "learning_rate": 0.0002, "loss": 0.0685, "step": 73 }, { "epoch": 1.3, "learning_rate": 0.0002, "loss": 0.0762, "step": 74 }, { "epoch": 1.32, "learning_rate": 0.0002, "loss": 0.1011, "step": 75 }, { "epoch": 1.32, "eval_code_accuracy": 0.91, "eval_code_average_probability": 0.889110267162323, "eval_code_brier_score": 0.06802475452423096, "eval_code_loss": 0.21632270514965057, "eval_code_probabilities": [ 0.9983546137809753, 0.8964507579803467, 0.9998843669891357, 1.0, 0.9999983310699463, 0.9999896287918091, 1.0, 1.0, 0.9999992847442627, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9493870139122009, 0.22386565804481506, 0.9954581260681152, 0.9999986886978149, 1.0, 0.9999638795852661, 0.9999364614486694, 1.0, 1.0, 0.14285829663276672, 0.05121216922998428, 0.06197505444288254, 0.8036693334579468, 0.784625768661499, 0.9341561198234558, 0.2160544991493225, 0.9977596998214722, 0.9990569949150085, 0.9999992847442627, 0.5561187863349915, 0.9838144183158875, 1.0, 1.0, 1.0, 0.9999961853027344, 0.999188244342804, 0.9992071986198425, 0.9999998807907104, 0.9616034030914307, 0.9999998807907104, 0.9486005902290344, 0.9999855756759644, 0.9963693618774414, 0.9999858140945435, 0.9972230195999146, 1.0, 1.0, 1.0, 1.0, 0.7309120893478394, 0.8254848122596741, 0.7734106183052063, 1.0, 0.5664382576942444, 1.0, 0.9999998807907104, 0.9999990463256836, 0.9999992847442627, 0.9987679123878479, 0.9999841451644897, 0.9796060919761658, 0.9999995231628418, 0.4220101237297058, 0.517703652381897, 0.8652507066726685, 0.007634077221155167, 0.9999922513961792, 0.9957844614982605, 0.9999879598617554, 0.9737687706947327, 1.0, 0.9995587468147278, 1.0, 1.0, 1.0, 1.0, 0.9666233658790588, 0.6209402084350586, 0.978766143321991, 0.9995095729827881, 0.9826872944831848, 1.0, 0.9902674555778503, 0.9968993663787842, 0.8134205937385559, 0.9999436140060425, 0.9293919205665588, 0.9999128580093384, 0.8822279572486877, 0.9978179931640625, 0.9373266100883484, 0.7644922733306885, 0.43501412868499756, 0.4749135673046112, 0.9878302216529846 ], "eval_code_runtime": 105.7625, "eval_code_samples_per_second": 0.946, "eval_code_score": -0.06802475452423096, "eval_code_steps_per_second": 0.038, "step": 75 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.0665, "step": 76 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.0808, "step": 77 }, { "epoch": 1.37, "learning_rate": 0.0002, "loss": 0.0637, "step": 78 }, { "epoch": 1.39, "learning_rate": 0.0002, "loss": 0.0323, "step": 79 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 0.0441, "step": 80 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 0.1698, "step": 81 }, { "epoch": 1.44, "learning_rate": 0.0002, "loss": 0.156, "step": 82 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 0.3788, "step": 83 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.1222, "step": 84 }, { "epoch": 1.49, "learning_rate": 0.0002, "loss": 0.1639, "step": 85 }, { "epoch": 1.51, "learning_rate": 0.0002, "loss": 0.0538, "step": 86 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 0.0328, "step": 87 }, { "epoch": 1.54, "learning_rate": 0.0002, "loss": 0.1745, "step": 88 }, { "epoch": 1.56, "learning_rate": 0.0002, "loss": 0.0874, "step": 89 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.1177, "step": 90 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.1185, "step": 91 }, { "epoch": 1.61, "learning_rate": 0.0002, "loss": 0.0543, "step": 92 }, { "epoch": 1.63, "learning_rate": 0.0002, "loss": 0.0392, "step": 93 }, { "epoch": 1.65, "learning_rate": 0.0002, "loss": 0.2006, "step": 94 }, { "epoch": 1.67, "learning_rate": 0.0002, "loss": 0.1249, "step": 95 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 0.0341, "step": 96 }, { "epoch": 1.7, "learning_rate": 0.0002, "loss": 0.0666, "step": 97 }, { "epoch": 1.72, "learning_rate": 0.0002, "loss": 0.1614, "step": 98 }, { "epoch": 1.74, "learning_rate": 0.0002, "loss": 0.0831, "step": 99 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 0.0273, "step": 100 }, { "epoch": 1.75, "eval_code_accuracy": 0.96, "eval_code_average_probability": 0.9304368495941162, "eval_code_brier_score": 0.03531455993652344, "eval_code_loss": 0.10408665984869003, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 0.9999995231628418, 1.0, 1.0, 0.9999160766601562, 0.9988158941268921, 0.9999899864196777, 0.9999986886978149, 0.9999879598617554, 0.9999833106994629, 0.9999370574951172, 0.9992890357971191, 0.9659052491188049, 0.6254108548164368, 0.997655987739563, 0.9994457364082336, 0.999336302280426, 0.9953096508979797, 0.9922773838043213, 0.9999829530715942, 0.9999885559082031, 0.9871056079864502, 0.9849441647529602, 0.9815734028816223, 0.7201115489006042, 0.6514120101928711, 0.8476458191871643, 0.1292794942855835, 0.9985151886940002, 0.9995579123497009, 1.0, 0.9999856948852539, 0.9999977350234985, 0.9999998807907104, 1.0, 1.0, 0.9999991655349731, 0.9998373985290527, 0.999966025352478, 1.0, 0.9999432563781738, 1.0, 0.997358500957489, 1.0, 0.9999942779541016, 0.9999959468841553, 0.9998447895050049, 1.0, 1.0, 1.0, 1.0, 0.9860179424285889, 0.8810455203056335, 0.8414822816848755, 1.0, 0.9645673632621765, 1.0, 1.0, 1.0, 1.0, 0.9987167119979858, 0.9995377063751221, 0.969241201877594, 0.9999886751174927, 0.2950672507286072, 0.44002464413642883, 0.8467398881912231, 0.06261960417032242, 0.9991006851196289, 0.9985753297805786, 0.9999589920043945, 0.9881368279457092, 1.0, 0.9875013828277588, 0.9999996423721313, 1.0, 0.9999927282333374, 0.9999997615814209, 0.9641613960266113, 0.5904451012611389, 0.991640567779541, 0.9999064207077026, 0.996203601360321, 0.999998927116394, 0.9644280672073364, 0.9944401979446411, 0.8003358840942383, 0.9963403940200806, 0.9804515242576599, 0.989639163017273, 0.852015495300293, 0.9918712377548218, 0.9663655757904053, 0.8847030997276306, 0.5973413586616516, 0.5435407757759094, 0.811240017414093 ], "eval_code_runtime": 105.7613, "eval_code_samples_per_second": 0.946, "eval_code_score": -0.03531455993652344, "eval_code_steps_per_second": 0.038, "step": 100 }, { "epoch": 1.77, "learning_rate": 0.0002, "loss": 0.0341, "step": 101 }, { "epoch": 1.79, "learning_rate": 0.0002, "loss": 0.0952, "step": 102 }, { "epoch": 1.81, "learning_rate": 0.0002, "loss": 0.0358, "step": 103 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 0.1143, "step": 104 }, { "epoch": 1.84, "learning_rate": 0.0002, "loss": 0.07, "step": 105 }, { "epoch": 1.86, "learning_rate": 0.0002, "loss": 0.1232, "step": 106 }, { "epoch": 1.88, "learning_rate": 0.0002, "loss": 0.065, "step": 107 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 0.0167, "step": 108 }, { "epoch": 1.91, "learning_rate": 0.0002, "loss": 0.0486, "step": 109 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 0.011, "step": 110 }, { "epoch": 1.95, "learning_rate": 0.0002, "loss": 0.0425, "step": 111 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 0.1082, "step": 112 }, { "epoch": 1.98, "learning_rate": 0.0002, "loss": 0.0196, "step": 113 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.0152, "step": 114 }, { "epoch": 2.02, "learning_rate": 0.0002, "loss": 0.012, "step": 115 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 0.0186, "step": 116 }, { "epoch": 2.05, "learning_rate": 0.0002, "loss": 0.0186, "step": 117 }, { "epoch": 2.07, "learning_rate": 0.0002, "loss": 0.0238, "step": 118 }, { "epoch": 2.09, "learning_rate": 0.0002, "loss": 0.0097, "step": 119 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 0.0007, "step": 120 }, { "epoch": 2.12, "learning_rate": 0.0002, "loss": 0.0409, "step": 121 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.0238, "step": 122 }, { "epoch": 2.16, "learning_rate": 0.0002, "loss": 0.0045, "step": 123 }, { "epoch": 2.18, "learning_rate": 0.0002, "loss": 0.0008, "step": 124 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 0.0307, "step": 125 }, { "epoch": 2.19, "eval_code_accuracy": 0.96, "eval_code_average_probability": 0.9499993324279785, "eval_code_brier_score": 0.028195565566420555, "eval_code_loss": 0.11333052814006805, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 0.9999997615814209, 0.9999997615814209, 0.9999985694885254, 0.9999561309814453, 0.8770335912704468, 0.4641159176826477, 0.9271451234817505, 1.0, 1.0, 0.9999996423721313, 0.9998699426651001, 0.9999997615814209, 0.9999998807907104, 0.9999991655349731, 0.9999380111694336, 0.9999819993972778, 0.7533411979675293, 0.7846699357032776, 0.89438796043396, 0.015444471500813961, 0.999987006187439, 1.0, 1.0, 0.9969304203987122, 0.9999985694885254, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999716281890869, 0.9999990463256836, 1.0, 1.0, 1.0, 0.9998655319213867, 1.0, 0.9999997615814209, 0.9999998807907104, 0.9999963045120239, 1.0, 1.0, 1.0, 1.0, 0.9978950023651123, 0.7412748336791992, 0.8329392671585083, 1.0, 0.999669075012207, 1.0, 1.0, 1.0, 1.0, 0.9999936819076538, 0.9999263286590576, 0.9996693134307861, 1.0, 0.9178900122642517, 0.8888350129127502, 0.9699975848197937, 0.014796692878007889, 0.9991298317909241, 0.9968131184577942, 0.9999946355819702, 0.9282417297363281, 1.0, 0.9971838593482971, 1.0, 1.0, 0.9999544620513916, 0.9999992847442627, 0.9987694621086121, 0.8008559346199036, 0.9995920062065125, 1.0, 0.9999116659164429, 1.0, 0.9975823163986206, 0.9999178647994995, 0.9812232255935669, 0.9962044358253479, 0.9992068409919739, 0.9994266033172607, 0.9672200679779053, 0.998022198677063, 0.9971696734428406, 0.9978540539741516, 0.9273316860198975, 0.8601265549659729, 0.4826805889606476 ], "eval_code_runtime": 105.781, "eval_code_samples_per_second": 0.945, "eval_code_score": -0.028195565566420555, "eval_code_steps_per_second": 0.038, "step": 125 }, { "epoch": 2.21, "learning_rate": 0.0002, "loss": 0.0048, "step": 126 }, { "epoch": 2.23, "learning_rate": 0.0002, "loss": 0.0093, "step": 127 }, { "epoch": 2.25, "learning_rate": 0.0002, "loss": 0.0056, "step": 128 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.0123, "step": 129 }, { "epoch": 2.28, "learning_rate": 0.0002, "loss": 0.0825, "step": 130 }, { "epoch": 2.3, "learning_rate": 0.0002, "loss": 0.0199, "step": 131 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 0.025, "step": 132 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 0.0477, "step": 133 }, { "epoch": 2.35, "learning_rate": 0.0002, "loss": 0.0403, "step": 134 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.0209, "step": 135 }, { "epoch": 2.39, "learning_rate": 0.0002, "loss": 0.0004, "step": 136 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.0003, "step": 137 }, { "epoch": 2.42, "learning_rate": 0.0002, "loss": 0.024, "step": 138 }, { "epoch": 2.44, "learning_rate": 0.0002, "loss": 0.0215, "step": 139 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 0.0321, "step": 140 }, { "epoch": 2.47, "learning_rate": 0.0002, "loss": 0.0067, "step": 141 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 0.0144, "step": 142 }, { "epoch": 2.51, "learning_rate": 0.0002, "loss": 0.0017, "step": 143 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.025, "step": 144 }, { "epoch": 2.54, "learning_rate": 0.0002, "loss": 0.0605, "step": 145 }, { "epoch": 2.56, "learning_rate": 0.0002, "loss": 0.0154, "step": 146 }, { "epoch": 2.58, "learning_rate": 0.0002, "loss": 0.002, "step": 147 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 0.0012, "step": 148 }, { "epoch": 2.61, "learning_rate": 0.0002, "loss": 0.0066, "step": 149 }, { "epoch": 2.63, "learning_rate": 0.0002, "loss": 0.0065, "step": 150 }, { "epoch": 2.63, "eval_code_accuracy": 0.95, "eval_code_average_probability": 0.9435591101646423, "eval_code_brier_score": 0.03846995532512665, "eval_code_loss": 0.17103993892669678, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999970197677612, 0.9055734872817993, 0.42537397146224976, 0.9499343633651733, 1.0, 1.0, 1.0, 0.9999983310699463, 1.0, 1.0, 1.0, 0.9999964237213135, 0.9999994039535522, 0.8147205710411072, 0.8424564599990845, 0.9474385976791382, 0.003861561883240938, 1.0, 1.0, 1.0, 0.035812485963106155, 0.9962039589881897, 1.0, 1.0, 1.0, 1.0, 0.9999990463256836, 0.9999998807907104, 1.0, 1.0, 1.0, 0.9999754428863525, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 0.9926579594612122, 0.7481365203857422, 0.9441766142845154, 1.0, 0.9999790191650391, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999998807907104, 0.9856025576591492, 1.0, 0.9969327449798584, 0.9738338589668274, 0.9861930012702942, 0.007046875543892384, 0.9993067979812622, 0.9975935816764832, 0.9999728202819824, 0.3661898970603943, 1.0, 0.9999417066574097, 1.0, 1.0, 0.9999967813491821, 1.0, 0.9991926550865173, 0.8271266222000122, 0.9992584586143494, 1.0, 0.9992876648902893, 1.0, 0.9999542236328125, 0.9999967813491821, 0.9966945648193359, 0.9999995231628418, 0.9955873489379883, 0.9999998807907104, 0.8881388306617737, 0.9999102354049683, 0.9994561076164246, 0.9989873766899109, 0.9133449792861938, 0.9107068181037903, 0.9093677401542664 ], "eval_code_runtime": 105.8053, "eval_code_samples_per_second": 0.945, "eval_code_score": -0.03846995532512665, "eval_code_steps_per_second": 0.038, "step": 150 }, { "loss": 0.0046, "learning_rate": 0.0002, "epoch": 2.65, "step": 151 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 2.67, "step": 152 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 2.68, "step": 153 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 2.7, "step": 154 }, { "loss": 0.0167, "learning_rate": 0.0002, "epoch": 2.72, "step": 155 }, { "loss": 0.0033, "learning_rate": 0.0002, "epoch": 2.74, "step": 156 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 2.75, "step": 157 }, { "loss": 0.0013, "learning_rate": 0.0002, "epoch": 2.77, "step": 158 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 2.79, "step": 159 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 2.81, "step": 160 }, { "loss": 0.002, "learning_rate": 0.0002, "epoch": 2.82, "step": 161 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 2.84, "step": 162 }, { "loss": 0.0052, "learning_rate": 0.0002, "epoch": 2.86, "step": 163 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 2.88, "step": 164 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.89, "step": 165 }, { "loss": 0.0043, "learning_rate": 0.0002, "epoch": 2.91, "step": 166 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.93, "step": 167 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 2.95, "step": 168 }, { "loss": 0.0024, "learning_rate": 0.0002, "epoch": 2.96, "step": 169 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 2.98, "step": 170 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 3.0, "step": 171 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 3.02, "step": 172 }, { "loss": 0.0007, "learning_rate": 0.0002, "epoch": 3.04, "step": 173 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.05, "step": 174 }, { "loss": 0.0182, "learning_rate": 0.0002, "epoch": 3.07, "step": 175 }, { "eval_code_loss": 0.24447500705718994, "eval_code_score": -0.04580119252204895, "eval_code_brier_score": 0.04580119252204895, "eval_code_average_probability": 0.9384365677833557, "eval_code_accuracy": 0.95, "eval_code_probabilities": [ 1.0, 0.9999996423721313, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9705566167831421, 0.28825750946998596, 0.990113377571106, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.8121438026428223, 0.8320725560188293, 0.9545630216598511, 0.022090904414653778, 1.0, 1.0, 1.0, 0.0003882711462210864, 0.9997542500495911, 1.0, 1.0, 1.0, 1.0, 0.9999994039535522, 0.9999996423721313, 1.0, 1.0, 1.0, 0.9999974966049194, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9805604219436646, 0.656412661075592, 0.9261404871940613, 1.0, 0.9999086856842041, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 0.9739339351654053, 1.0, 0.9995070695877075, 0.9991480112075806, 0.9978482723236084, 0.0005162784364074469, 0.999909520149231, 1.0, 1.0, 0.09138258546590805, 1.0, 0.9999922513961792, 1.0, 1.0, 1.0, 1.0, 0.9993377327919006, 0.829042375087738, 0.993126630783081, 1.0, 0.9995529055595398, 1.0, 0.9999717473983765, 0.9999934434890747, 0.9994831085205078, 1.0, 0.9715592861175537, 1.0, 0.7726230025291443, 0.9999821186065674, 0.9998100399971008, 0.9989909529685974, 0.8984997272491455, 0.8994256854057312, 0.9870588183403015 ], "eval_code_runtime": 104.8369, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 3.07, "step": 175 }, { "loss": 0.0027, "learning_rate": 0.0002, "epoch": 3.09, "step": 176 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.11, "step": 177 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 3.12, "step": 178 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.14, "step": 179 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.16, "step": 180 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.18, "step": 181 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 3.19, "step": 182 }, { "loss": 0.001, "learning_rate": 0.0002, "epoch": 3.21, "step": 183 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.23, "step": 184 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.25, "step": 185 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 3.26, "step": 186 }, { "loss": 0.0315, "learning_rate": 0.0002, "epoch": 3.28, "step": 187 }, { "loss": 0.0148, "learning_rate": 0.0002, "epoch": 3.3, "step": 188 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.32, "step": 189 }, { "loss": 0.0016, "learning_rate": 0.0002, "epoch": 3.33, "step": 190 }, { "loss": 0.0053, "learning_rate": 0.0002, "epoch": 3.35, "step": 191 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.37, "step": 192 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 3.39, "step": 193 }, { "loss": 0.1843, "learning_rate": 0.0002, "epoch": 3.4, "step": 194 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.42, "step": 195 }, { "loss": 0.012, "learning_rate": 0.0002, "epoch": 3.44, "step": 196 }, { "loss": 0.0031, "learning_rate": 0.0002, "epoch": 3.46, "step": 197 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 3.47, "step": 198 }, { "loss": 0.0062, "learning_rate": 0.0002, "epoch": 3.49, "step": 199 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 3.51, "step": 200 }, { "eval_code_loss": 0.20703525841236115, "eval_code_score": -0.036074623465538025, "eval_code_brier_score": 0.036074623465538025, "eval_code_average_probability": 0.9416353702545166, "eval_code_accuracy": 0.96, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999991655349731, 0.999995231628418, 0.9498335719108582, 0.30828458070755005, 0.987034261226654, 0.999970555305481, 0.9999929666519165, 0.9999411106109619, 0.999946117401123, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 0.717918872833252, 0.6784900426864624, 0.8770557641983032, 7.296191597561119e-06, 0.9999998807907104, 0.9999997615814209, 1.0, 0.9999603033065796, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999985694885254, 0.9999916553497314, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 0.9999997615814209, 0.9999793767929077, 1.0, 1.0, 1.0, 1.0, 0.9963011741638184, 0.5810772180557251, 0.9937513470649719, 1.0, 0.9999946355819702, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999996423721313, 0.999351441860199, 1.0, 0.9979580640792847, 0.9855725169181824, 0.9849231243133545, 0.017488110810518265, 0.999018669128418, 0.9788364768028259, 0.9989467263221741, 0.29412388801574707, 1.0, 0.9881665110588074, 1.0, 1.0, 0.9999973773956299, 1.0, 0.9999986886978149, 0.8509193658828735, 0.9998080134391785, 1.0, 0.9999957084655762, 1.0, 0.9992415904998779, 0.9998071789741516, 0.9979315996170044, 0.9996376037597656, 0.9590405821800232, 0.9996711015701294, 0.5990511775016785, 0.9998351335525513, 0.9997956156730652, 0.9722345471382141, 0.7871147990226746, 0.8062353730201721, 0.8593048453330994 ], "eval_code_runtime": 104.8009, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 3.51, "step": 200 }, { "loss": 0.0071, "learning_rate": 0.0002, "epoch": 3.53, "step": 201 }, { "loss": 0.0311, "learning_rate": 0.0002, "epoch": 3.54, "step": 202 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 3.56, "step": 203 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 3.58, "step": 204 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 3.6, "step": 205 }, { "loss": 0.0018, "learning_rate": 0.0002, "epoch": 3.61, "step": 206 }, { "loss": 0.0065, "learning_rate": 0.0002, "epoch": 3.63, "step": 207 }, { "loss": 0.001, "learning_rate": 0.0002, "epoch": 3.65, "step": 208 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.67, "step": 209 }, { "loss": 0.0302, "learning_rate": 0.0002, "epoch": 3.68, "step": 210 }, { "loss": 0.0662, "learning_rate": 0.0002, "epoch": 3.7, "step": 211 }, { "loss": 0.0071, "learning_rate": 0.0002, "epoch": 3.72, "step": 212 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 3.74, "step": 213 }, { "loss": 0.0527, "learning_rate": 0.0002, "epoch": 3.75, "step": 214 }, { "loss": 0.0069, "learning_rate": 0.0002, "epoch": 3.77, "step": 215 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 3.79, "step": 216 }, { "loss": 0.0056, "learning_rate": 0.0002, "epoch": 3.81, "step": 217 }, { "loss": 0.0205, "learning_rate": 0.0002, "epoch": 3.82, "step": 218 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 3.84, "step": 219 }, { "loss": 0.0048, "learning_rate": 0.0002, "epoch": 3.86, "step": 220 }, { "loss": 0.0307, "learning_rate": 0.0002, "epoch": 3.88, "step": 221 }, { "loss": 0.0092, "learning_rate": 0.0002, "epoch": 3.89, "step": 222 }, { "loss": 0.0425, "learning_rate": 0.0002, "epoch": 3.91, "step": 223 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 3.93, "step": 224 }, { "loss": 0.0021, "learning_rate": 0.0002, "epoch": 3.95, "step": 225 }, { "eval_code_loss": 0.1442316770553589, "eval_code_score": -0.03884093835949898, "eval_code_brier_score": 0.03884093835949898, "eval_code_average_probability": 0.9314249157905579, "eval_code_accuracy": 0.95, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999983310699463, 0.9166925549507141, 0.46314355731010437, 0.9695256948471069, 0.9999979734420776, 0.9999997615814209, 0.9999912977218628, 0.9998043179512024, 0.9999998807907104, 1.0, 0.999934196472168, 0.9998651742935181, 0.9999654293060303, 0.7587816119194031, 0.8365660309791565, 0.8947399258613586, 0.007870269939303398, 0.9999408721923828, 0.9807198643684387, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999988079071045, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 0.9998663663864136, 0.9656995534896851, 0.935349702835083, 1.0, 0.9999972581863403, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9999998807907104, 0.9999998807907104, 0.9994795918464661, 0.9999734163284302, 0.4865659773349762, 0.5832042694091797, 0.9857308268547058, 0.015312162227928638, 0.999519944190979, 0.7279074788093567, 0.9087716937065125, 0.6908769011497498, 1.0, 0.9999642372131348, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 0.8065006136894226, 0.9999997615814209, 1.0, 0.999992847442627, 1.0, 0.998808741569519, 0.9994845390319824, 0.9905843734741211, 0.9999678134918213, 0.9825097322463989, 0.9999821186065674, 0.316112756729126, 0.9999034404754639, 0.9957683086395264, 0.8763726353645325, 0.557384192943573, 0.5571630001068115, 0.936220109462738 ], "eval_code_runtime": 104.8083, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 3.95, "step": 225 }, { "loss": 0.0037, "learning_rate": 0.0002, "epoch": 3.96, "step": 226 }, { "loss": 0.0061, "learning_rate": 0.0002, "epoch": 3.98, "step": 227 }, { "loss": 0.0211, "learning_rate": 0.0002, "epoch": 4.0, "step": 228 }, { "loss": 0.0028, "learning_rate": 0.0002, "epoch": 4.02, "step": 229 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 4.04, "step": 230 }, { "loss": 0.0146, "learning_rate": 0.0002, "epoch": 4.05, "step": 231 }, { "loss": 0.0155, "learning_rate": 0.0002, "epoch": 4.07, "step": 232 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 4.09, "step": 233 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 4.11, "step": 234 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.12, "step": 235 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.14, "step": 236 }, { "loss": 0.0073, "learning_rate": 0.0002, "epoch": 4.16, "step": 237 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.18, "step": 238 }, { "loss": 0.0011, "learning_rate": 0.0002, "epoch": 4.19, "step": 239 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.21, "step": 240 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.23, "step": 241 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.25, "step": 242 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.26, "step": 243 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 4.28, "step": 244 }, { "loss": 0.0026, "learning_rate": 0.0002, "epoch": 4.3, "step": 245 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.32, "step": 246 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.33, "step": 247 }, { "loss": 0.0172, "learning_rate": 0.0002, "epoch": 4.35, "step": 248 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.37, "step": 249 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.39, "step": 250 }, { "eval_code_loss": 0.1744336634874344, "eval_code_score": -0.03369342163205147, "eval_code_brier_score": 0.03369342163205147, "eval_code_average_probability": 0.9495358467102051, "eval_code_accuracy": 0.97, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9451481699943542, 0.5480159521102905, 0.9964591860771179, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.7053650617599487, 0.8087413311004639, 0.9013814330101013, 0.0008012360776774585, 1.0, 0.9999994039535522, 1.0, 0.9998193383216858, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999983310699463, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9992625117301941, 0.9287911653518677, 0.872682511806488, 1.0, 0.9996117949485779, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999651908874512, 1.0, 0.9498048424720764, 0.9871782064437866, 0.9963241815567017, 0.0026193673256784678, 0.999943733215332, 0.9986830353736877, 0.9999673366546631, 0.9674615859985352, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999785423278809, 0.9016892313957214, 0.9997751116752625, 1.0, 0.9999997615814209, 1.0, 0.9999815225601196, 0.9999998807907104, 0.9999561309814453, 1.0, 0.9956650137901306, 1.0, 0.08844359964132309, 0.9999833106994629, 0.9990620017051697, 0.9781332612037659, 0.747072160243988, 0.6776840090751648, 0.9581350088119507 ], "eval_code_runtime": 104.7916, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 4.39, "step": 250 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.4, "step": 251 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 4.42, "step": 252 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.44, "step": 253 }, { "loss": 0.0034, "learning_rate": 0.0002, "epoch": 4.46, "step": 254 }, { "loss": 0.0006, "learning_rate": 0.0002, "epoch": 4.47, "step": 255 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.49, "step": 256 }, { "loss": 0.0004, "learning_rate": 0.0002, "epoch": 4.51, "step": 257 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 4.53, "step": 258 }, { "loss": 0.0042, "learning_rate": 0.0002, "epoch": 4.54, "step": 259 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.56, "step": 260 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.58, "step": 261 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.6, "step": 262 }, { "loss": 0.0023, "learning_rate": 0.0002, "epoch": 4.61, "step": 263 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.63, "step": 264 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.65, "step": 265 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.67, "step": 266 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 4.68, "step": 267 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.7, "step": 268 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.72, "step": 269 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.74, "step": 270 }, { "loss": 0.0013, "learning_rate": 0.0002, "epoch": 4.75, "step": 271 }, { "loss": 0.006, "learning_rate": 0.0002, "epoch": 4.77, "step": 272 }, { "loss": 0.0002, "learning_rate": 0.0002, "epoch": 4.79, "step": 273 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 4.81, "step": 274 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.82, "step": 275 }, { "eval_code_loss": 0.18856269121170044, "eval_code_score": -0.03526609018445015, "eval_code_brier_score": 0.03526609018445015, "eval_code_average_probability": 0.949932873249054, "eval_code_accuracy": 0.96, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9667010307312012, 0.4576479494571686, 0.9961231350898743, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.7199408411979675, 0.8098323345184326, 0.8810715079307556, 0.00041508462163619697, 1.0, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999830722808838, 0.9893618226051331, 0.948550820350647, 1.0, 0.9997448325157166, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999991655349731, 1.0, 0.9401667714118958, 0.9961704611778259, 0.9991104006767273, 0.0038628315087407827, 0.9999805688858032, 0.9974443912506104, 0.9999669790267944, 0.9498675465583801, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999998807907104, 0.9562873244285583, 0.9999939203262329, 1.0, 0.9999998807907104, 1.0, 0.9999774694442749, 0.9999997615814209, 0.9999823570251465, 1.0, 0.9991405010223389, 1.0, 0.027186574414372444, 0.9999986886978149, 0.9999057054519653, 0.9804957509040833, 0.7432851791381836, 0.7164115905761719, 0.9146708250045776 ], "eval_code_runtime": 104.8208, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 4.82, "step": 275 }, { "loss": 0.0018, "learning_rate": 0.0002, "epoch": 4.84, "step": 276 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.86, "step": 277 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 4.88, "step": 278 }, { "loss": 0.0019, "learning_rate": 0.0002, "epoch": 4.89, "step": 279 }, { "loss": 0.0009, "learning_rate": 0.0002, "epoch": 4.91, "step": 280 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.93, "step": 281 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 4.95, "step": 282 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 4.96, "step": 283 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 4.98, "step": 284 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.0, "step": 285 }, { "loss": 0.0021, "learning_rate": 0.0002, "epoch": 5.02, "step": 286 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.04, "step": 287 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.05, "step": 288 }, { "loss": 0.0003, "learning_rate": 0.0002, "epoch": 5.07, "step": 289 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 5.09, "step": 290 }, { "loss": 0.0005, "learning_rate": 0.0002, "epoch": 5.11, "step": 291 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.12, "step": 292 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.14, "step": 293 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.16, "step": 294 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.18, "step": 295 }, { "loss": 0.0, "learning_rate": 0.0002, "epoch": 5.19, "step": 296 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.21, "step": 297 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.23, "step": 298 }, { "loss": 0.0008, "learning_rate": 0.0002, "epoch": 5.25, "step": 299 }, { "loss": 0.0001, "learning_rate": 0.0002, "epoch": 5.26, "step": 300 }, { "eval_code_loss": 0.17653299868106842, "eval_code_score": -0.035850197076797485, "eval_code_brier_score": 0.035850197076797485, "eval_code_average_probability": 0.949984610080719, "eval_code_accuracy": 0.96, "eval_code_probabilities": [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9785318374633789, 0.42346829175949097, 0.9971503615379333, 1.0, 1.0, 0.9999997615814209, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.7221306562423706, 0.8104501962661743, 0.8751990795135498, 0.0004177717200946063, 1.0, 1.0, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9999911785125732, 0.9935014843940735, 0.9632013440132141, 1.0, 0.9997548460960388, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.999998927116394, 1.0, 0.9625283479690552, 0.9952881336212158, 0.9994906187057495, 0.011133184656500816, 0.9999599456787109, 0.9987699389457703, 0.9999761581420898, 0.9492788314819336, 1.0, 0.9999998807907104, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9763202667236328, 0.9999985694885254, 1.0, 0.9999996423721313, 1.0, 0.9999876022338867, 0.9999998807907104, 0.9999854564666748, 1.0, 0.9988405108451843, 1.0, 0.03199751302599907, 0.9999868869781494, 0.9996381998062134, 0.9803478121757507, 0.6885609030723572, 0.6760653853416443, 0.9665107727050781 ], "eval_code_runtime": 104.8074, "eval_code_samples_per_second": 0.954, "eval_code_steps_per_second": 0.038, "epoch": 5.26, "step": 300 }, { "train_runtime": 13610.3633, "train_samples_per_second": 0.705, "train_steps_per_second": 0.022, "total_flos": 0.0, "train_loss": 0.002647306595269659, "epoch": 5.26, "step": 300 } ]