llama-13b-code / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
481ef75
[
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 0.7109,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 0.0002,
"loss": 0.7151,
"step": 2
},
{
"epoch": 0.05,
"learning_rate": 0.0002,
"loss": 0.7036,
"step": 3
},
{
"epoch": 0.07,
"learning_rate": 0.0002,
"loss": 0.6805,
"step": 4
},
{
"epoch": 0.09,
"learning_rate": 0.0002,
"loss": 0.6678,
"step": 5
},
{
"epoch": 0.11,
"learning_rate": 0.0002,
"loss": 0.6692,
"step": 6
},
{
"epoch": 0.12,
"learning_rate": 0.0002,
"loss": 0.6459,
"step": 7
},
{
"epoch": 0.14,
"learning_rate": 0.0002,
"loss": 0.6262,
"step": 8
},
{
"epoch": 0.16,
"learning_rate": 0.0002,
"loss": 0.6574,
"step": 9
},
{
"epoch": 0.18,
"learning_rate": 0.0002,
"loss": 0.6311,
"step": 10
},
{
"epoch": 0.19,
"learning_rate": 0.0002,
"loss": 0.5219,
"step": 11
},
{
"epoch": 0.21,
"learning_rate": 0.0002,
"loss": 0.5933,
"step": 12
},
{
"epoch": 0.23,
"learning_rate": 0.0002,
"loss": 0.5881,
"step": 13
},
{
"epoch": 0.25,
"learning_rate": 0.0002,
"loss": 0.6345,
"step": 14
},
{
"epoch": 0.26,
"learning_rate": 0.0002,
"loss": 0.559,
"step": 15
},
{
"epoch": 0.28,
"learning_rate": 0.0002,
"loss": 0.5686,
"step": 16
},
{
"epoch": 0.3,
"learning_rate": 0.0002,
"loss": 0.577,
"step": 17
},
{
"epoch": 0.32,
"learning_rate": 0.0002,
"loss": 0.4884,
"step": 18
},
{
"epoch": 0.33,
"learning_rate": 0.0002,
"loss": 0.4593,
"step": 19
},
{
"epoch": 0.35,
"learning_rate": 0.0002,
"loss": 0.4582,
"step": 20
},
{
"epoch": 0.37,
"learning_rate": 0.0002,
"loss": 0.428,
"step": 21
},
{
"epoch": 0.39,
"learning_rate": 0.0002,
"loss": 0.495,
"step": 22
},
{
"epoch": 0.4,
"learning_rate": 0.0002,
"loss": 0.4683,
"step": 23
},
{
"epoch": 0.42,
"learning_rate": 0.0002,
"loss": 0.3779,
"step": 24
},
{
"epoch": 0.44,
"learning_rate": 0.0002,
"loss": 0.5694,
"step": 25
},
{
"epoch": 0.44,
"eval_code_accuracy": 0.81,
"eval_code_average_probability": 0.6998826861381531,
"eval_code_brier_score": 0.13871249556541443,
"eval_code_loss": 0.392591655254364,
"eval_code_probabilities": [
0.9998756647109985,
0.9996539354324341,
0.9968274235725403,
0.867797315120697,
0.604396641254425,
0.8194839358329773,
0.9978196620941162,
0.9973512887954712,
0.8165198564529419,
0.929094672203064,
0.9696783423423767,
0.8749808073043823,
0.9949828386306763,
0.9988803267478943,
0.9974466562271118,
0.5661922693252563,
0.7031828761100769,
0.7817732095718384,
0.7952773571014404,
0.700941801071167,
0.5993526577949524,
0.4928004741668701,
0.7662289142608643,
0.7449039816856384,
0.26918813586235046,
0.23834775388240814,
0.21881021559238434,
0.5174687504768372,
0.48715656995773315,
0.5120030045509338,
0.3874703049659729,
0.46171045303344727,
0.6353215575218201,
0.9995631575584412,
0.9988592863082886,
0.9959748387336731,
0.9921092391014099,
0.9928039908409119,
0.9970149993896484,
0.5361520648002625,
0.6019002795219421,
0.4536624550819397,
0.6162904500961304,
0.5367547869682312,
0.620251476764679,
0.5513021349906921,
0.5809685587882996,
0.5933180451393127,
0.726946234703064,
0.6555966138839722,
0.999262273311615,
0.9996010661125183,
0.9998406171798706,
0.7900382280349731,
0.5323582291603088,
0.5186650156974792,
0.5038493275642395,
0.9872013330459595,
0.5808623433113098,
0.9900431632995605,
0.9054338335990906,
0.8925741910934448,
0.8963204026222229,
0.48921719193458557,
0.565790057182312,
0.544992208480835,
0.6663545370101929,
0.49584266543388367,
0.4396786093711853,
0.5050247311592102,
0.4423765242099762,
0.5252822637557983,
0.5083388686180115,
0.5204058885574341,
0.4985828101634979,
0.9953283071517944,
0.9802095890045166,
0.9042825698852539,
0.9987348914146423,
0.9832821488380432,
0.9921707510948181,
0.5249463319778442,
0.496512770652771,
0.529527485370636,
0.6597751975059509,
0.9941974878311157,
0.8405609130859375,
0.6745859384536743,
0.6913660168647766,
0.4706657826900482,
0.6228535771369934,
0.7330396175384521,
0.5534575581550598,
0.4912925362586975,
0.5196889042854309,
0.5061907768249512,
0.527052640914917,
0.46116262674331665,
0.40944358706474304,
0.4376073181629181
],
"eval_code_runtime": 105.7673,
"eval_code_samples_per_second": 0.945,
"eval_code_score": -0.13871249556541443,
"eval_code_steps_per_second": 0.038,
"step": 25
},
{
"epoch": 0.46,
"learning_rate": 0.0002,
"loss": 0.3947,
"step": 26
},
{
"epoch": 0.47,
"learning_rate": 0.0002,
"loss": 0.4236,
"step": 27
},
{
"epoch": 0.49,
"learning_rate": 0.0002,
"loss": 0.349,
"step": 28
},
{
"epoch": 0.51,
"learning_rate": 0.0002,
"loss": 0.3414,
"step": 29
},
{
"epoch": 0.53,
"learning_rate": 0.0002,
"loss": 0.5019,
"step": 30
},
{
"epoch": 0.54,
"learning_rate": 0.0002,
"loss": 0.3768,
"step": 31
},
{
"epoch": 0.56,
"learning_rate": 0.0002,
"loss": 0.3902,
"step": 32
},
{
"epoch": 0.58,
"learning_rate": 0.0002,
"loss": 0.2838,
"step": 33
},
{
"epoch": 0.6,
"learning_rate": 0.0002,
"loss": 0.1972,
"step": 34
},
{
"epoch": 0.61,
"learning_rate": 0.0002,
"loss": 0.3796,
"step": 35
},
{
"epoch": 0.63,
"learning_rate": 0.0002,
"loss": 0.3741,
"step": 36
},
{
"epoch": 0.65,
"learning_rate": 0.0002,
"loss": 0.2624,
"step": 37
},
{
"epoch": 0.67,
"learning_rate": 0.0002,
"loss": 0.2505,
"step": 38
},
{
"epoch": 0.68,
"learning_rate": 0.0002,
"loss": 0.2687,
"step": 39
},
{
"epoch": 0.7,
"learning_rate": 0.0002,
"loss": 0.2856,
"step": 40
},
{
"epoch": 0.72,
"learning_rate": 0.0002,
"loss": 0.1638,
"step": 41
},
{
"epoch": 0.74,
"learning_rate": 0.0002,
"loss": 0.2595,
"step": 42
},
{
"epoch": 0.75,
"learning_rate": 0.0002,
"loss": 0.2886,
"step": 43
},
{
"epoch": 0.77,
"learning_rate": 0.0002,
"loss": 0.1604,
"step": 44
},
{
"epoch": 0.79,
"learning_rate": 0.0002,
"loss": 0.2069,
"step": 45
},
{
"epoch": 0.81,
"learning_rate": 0.0002,
"loss": 0.2285,
"step": 46
},
{
"epoch": 0.82,
"learning_rate": 0.0002,
"loss": 0.1975,
"step": 47
},
{
"epoch": 0.84,
"learning_rate": 0.0002,
"loss": 0.1726,
"step": 48
},
{
"epoch": 0.86,
"learning_rate": 0.0002,
"loss": 0.0881,
"step": 49
},
{
"epoch": 0.88,
"learning_rate": 0.0002,
"loss": 0.2712,
"step": 50
},
{
"epoch": 0.88,
"eval_code_accuracy": 0.93,
"eval_code_average_probability": 0.8494662642478943,
"eval_code_brier_score": 0.06509324163198471,
"eval_code_loss": 0.18686418235301971,
"eval_code_probabilities": [
0.9999998807907104,
0.9999940395355225,
0.9999998807907104,
0.999998927116394,
0.9994799494743347,
0.9999616146087646,
1.0,
0.9999617338180542,
0.997948944568634,
0.9999988079071045,
1.0,
0.9999945163726807,
0.9999972581863403,
0.9999998807907104,
0.9972482323646545,
0.8824266195297241,
0.5207855105400085,
0.9811423420906067,
0.9948011636734009,
0.9970309734344482,
0.9518866539001465,
0.9435040950775146,
0.9998762607574463,
0.9999566078186035,
0.5264937281608582,
0.3567848801612854,
0.2969517111778259,
0.5564099550247192,
0.5566191673278809,
0.6488668918609619,
0.2498033493757248,
0.8209301233291626,
0.9682015776634216,
0.999983549118042,
0.998921275138855,
0.9983420372009277,
0.9999994039535522,
1.0,
1.0,
0.9042081236839294,
0.9448812007904053,
0.8440216779708862,
0.9969866871833801,
0.935389518737793,
0.9999938011169434,
0.8614246249198914,
0.9949424862861633,
0.9876769781112671,
0.99991774559021,
0.9980294108390808,
0.9999949932098389,
1.0,
1.0,
0.9999998807907104,
0.6444177627563477,
0.5885342359542847,
0.6353741884231567,
0.9999998807907104,
0.6010631322860718,
0.999997615814209,
0.9806910753250122,
0.9655673503875732,
0.9658817648887634,
0.829046368598938,
0.9939888715744019,
0.8807443380355835,
0.9380287528038025,
0.7914523482322693,
0.3484261929988861,
0.6201522350311279,
0.34111207723617554,
0.7237049341201782,
0.6027003526687622,
0.8865858912467957,
0.632958710193634,
1.0,
0.9925962090492249,
0.9999537467956543,
1.0,
0.999997615814209,
0.9999998807907104,
0.6945015788078308,
0.5074270963668823,
0.7571401000022888,
0.9955568909645081,
0.9997404217720032,
0.9955176711082458,
0.7693279385566711,
0.8310465812683105,
0.5401557683944702,
0.9749163389205933,
0.963936448097229,
0.9640229940414429,
0.5689054131507874,
0.8011016249656677,
0.6685754060745239,
0.6061564087867737,
0.44276222586631775,
0.4091780185699463,
0.7819089889526367
],
"eval_code_runtime": 105.8373,
"eval_code_samples_per_second": 0.945,
"eval_code_score": -0.06509324163198471,
"eval_code_steps_per_second": 0.038,
"step": 50
},
{
"epoch": 0.89,
"learning_rate": 0.0002,
"loss": 0.181,
"step": 51
},
{
"epoch": 0.91,
"learning_rate": 0.0002,
"loss": 0.2069,
"step": 52
},
{
"epoch": 0.93,
"learning_rate": 0.0002,
"loss": 0.2289,
"step": 53
},
{
"epoch": 0.95,
"learning_rate": 0.0002,
"loss": 0.2987,
"step": 54
},
{
"epoch": 0.96,
"learning_rate": 0.0002,
"loss": 0.3474,
"step": 55
},
{
"epoch": 0.98,
"learning_rate": 0.0002,
"loss": 0.284,
"step": 56
},
{
"epoch": 1.0,
"learning_rate": 0.0002,
"loss": 0.7535,
"step": 57
},
{
"epoch": 1.02,
"learning_rate": 0.0002,
"loss": 0.1286,
"step": 58
},
{
"epoch": 1.04,
"learning_rate": 0.0002,
"loss": 0.0816,
"step": 59
},
{
"epoch": 1.05,
"learning_rate": 0.0002,
"loss": 0.0607,
"step": 60
},
{
"epoch": 1.07,
"learning_rate": 0.0002,
"loss": 0.0772,
"step": 61
},
{
"epoch": 1.09,
"learning_rate": 0.0002,
"loss": 0.1129,
"step": 62
},
{
"epoch": 1.11,
"learning_rate": 0.0002,
"loss": 0.0509,
"step": 63
},
{
"epoch": 1.12,
"learning_rate": 0.0002,
"loss": 0.1928,
"step": 64
},
{
"epoch": 1.14,
"learning_rate": 0.0002,
"loss": 0.0945,
"step": 65
},
{
"epoch": 1.16,
"learning_rate": 0.0002,
"loss": 0.1268,
"step": 66
},
{
"epoch": 1.18,
"learning_rate": 0.0002,
"loss": 0.1186,
"step": 67
},
{
"epoch": 1.19,
"learning_rate": 0.0002,
"loss": 0.1123,
"step": 68
},
{
"epoch": 1.21,
"learning_rate": 0.0002,
"loss": 0.1512,
"step": 69
},
{
"epoch": 1.23,
"learning_rate": 0.0002,
"loss": 0.1303,
"step": 70
},
{
"epoch": 1.25,
"learning_rate": 0.0002,
"loss": 0.0441,
"step": 71
},
{
"epoch": 1.26,
"learning_rate": 0.0002,
"loss": 0.1391,
"step": 72
},
{
"epoch": 1.28,
"learning_rate": 0.0002,
"loss": 0.0685,
"step": 73
},
{
"epoch": 1.3,
"learning_rate": 0.0002,
"loss": 0.0762,
"step": 74
},
{
"epoch": 1.32,
"learning_rate": 0.0002,
"loss": 0.1011,
"step": 75
},
{
"epoch": 1.32,
"eval_code_accuracy": 0.91,
"eval_code_average_probability": 0.889110267162323,
"eval_code_brier_score": 0.06802475452423096,
"eval_code_loss": 0.21632270514965057,
"eval_code_probabilities": [
0.9983546137809753,
0.8964507579803467,
0.9998843669891357,
1.0,
0.9999983310699463,
0.9999896287918091,
1.0,
1.0,
0.9999992847442627,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9493870139122009,
0.22386565804481506,
0.9954581260681152,
0.9999986886978149,
1.0,
0.9999638795852661,
0.9999364614486694,
1.0,
1.0,
0.14285829663276672,
0.05121216922998428,
0.06197505444288254,
0.8036693334579468,
0.784625768661499,
0.9341561198234558,
0.2160544991493225,
0.9977596998214722,
0.9990569949150085,
0.9999992847442627,
0.5561187863349915,
0.9838144183158875,
1.0,
1.0,
1.0,
0.9999961853027344,
0.999188244342804,
0.9992071986198425,
0.9999998807907104,
0.9616034030914307,
0.9999998807907104,
0.9486005902290344,
0.9999855756759644,
0.9963693618774414,
0.9999858140945435,
0.9972230195999146,
1.0,
1.0,
1.0,
1.0,
0.7309120893478394,
0.8254848122596741,
0.7734106183052063,
1.0,
0.5664382576942444,
1.0,
0.9999998807907104,
0.9999990463256836,
0.9999992847442627,
0.9987679123878479,
0.9999841451644897,
0.9796060919761658,
0.9999995231628418,
0.4220101237297058,
0.517703652381897,
0.8652507066726685,
0.007634077221155167,
0.9999922513961792,
0.9957844614982605,
0.9999879598617554,
0.9737687706947327,
1.0,
0.9995587468147278,
1.0,
1.0,
1.0,
1.0,
0.9666233658790588,
0.6209402084350586,
0.978766143321991,
0.9995095729827881,
0.9826872944831848,
1.0,
0.9902674555778503,
0.9968993663787842,
0.8134205937385559,
0.9999436140060425,
0.9293919205665588,
0.9999128580093384,
0.8822279572486877,
0.9978179931640625,
0.9373266100883484,
0.7644922733306885,
0.43501412868499756,
0.4749135673046112,
0.9878302216529846
],
"eval_code_runtime": 105.7625,
"eval_code_samples_per_second": 0.946,
"eval_code_score": -0.06802475452423096,
"eval_code_steps_per_second": 0.038,
"step": 75
},
{
"epoch": 1.33,
"learning_rate": 0.0002,
"loss": 0.0665,
"step": 76
},
{
"epoch": 1.35,
"learning_rate": 0.0002,
"loss": 0.0808,
"step": 77
},
{
"epoch": 1.37,
"learning_rate": 0.0002,
"loss": 0.0637,
"step": 78
},
{
"epoch": 1.39,
"learning_rate": 0.0002,
"loss": 0.0323,
"step": 79
},
{
"epoch": 1.4,
"learning_rate": 0.0002,
"loss": 0.0441,
"step": 80
},
{
"epoch": 1.42,
"learning_rate": 0.0002,
"loss": 0.1698,
"step": 81
},
{
"epoch": 1.44,
"learning_rate": 0.0002,
"loss": 0.156,
"step": 82
},
{
"epoch": 1.46,
"learning_rate": 0.0002,
"loss": 0.3788,
"step": 83
},
{
"epoch": 1.47,
"learning_rate": 0.0002,
"loss": 0.1222,
"step": 84
},
{
"epoch": 1.49,
"learning_rate": 0.0002,
"loss": 0.1639,
"step": 85
},
{
"epoch": 1.51,
"learning_rate": 0.0002,
"loss": 0.0538,
"step": 86
},
{
"epoch": 1.53,
"learning_rate": 0.0002,
"loss": 0.0328,
"step": 87
},
{
"epoch": 1.54,
"learning_rate": 0.0002,
"loss": 0.1745,
"step": 88
},
{
"epoch": 1.56,
"learning_rate": 0.0002,
"loss": 0.0874,
"step": 89
},
{
"epoch": 1.58,
"learning_rate": 0.0002,
"loss": 0.1177,
"step": 90
},
{
"epoch": 1.6,
"learning_rate": 0.0002,
"loss": 0.1185,
"step": 91
},
{
"epoch": 1.61,
"learning_rate": 0.0002,
"loss": 0.0543,
"step": 92
},
{
"epoch": 1.63,
"learning_rate": 0.0002,
"loss": 0.0392,
"step": 93
},
{
"epoch": 1.65,
"learning_rate": 0.0002,
"loss": 0.2006,
"step": 94
},
{
"epoch": 1.67,
"learning_rate": 0.0002,
"loss": 0.1249,
"step": 95
},
{
"epoch": 1.68,
"learning_rate": 0.0002,
"loss": 0.0341,
"step": 96
},
{
"epoch": 1.7,
"learning_rate": 0.0002,
"loss": 0.0666,
"step": 97
},
{
"epoch": 1.72,
"learning_rate": 0.0002,
"loss": 0.1614,
"step": 98
},
{
"epoch": 1.74,
"learning_rate": 0.0002,
"loss": 0.0831,
"step": 99
},
{
"epoch": 1.75,
"learning_rate": 0.0002,
"loss": 0.0273,
"step": 100
},
{
"epoch": 1.75,
"eval_code_accuracy": 0.96,
"eval_code_average_probability": 0.9304368495941162,
"eval_code_brier_score": 0.03531455993652344,
"eval_code_loss": 0.10408665984869003,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
1.0,
0.9999160766601562,
0.9988158941268921,
0.9999899864196777,
0.9999986886978149,
0.9999879598617554,
0.9999833106994629,
0.9999370574951172,
0.9992890357971191,
0.9659052491188049,
0.6254108548164368,
0.997655987739563,
0.9994457364082336,
0.999336302280426,
0.9953096508979797,
0.9922773838043213,
0.9999829530715942,
0.9999885559082031,
0.9871056079864502,
0.9849441647529602,
0.9815734028816223,
0.7201115489006042,
0.6514120101928711,
0.8476458191871643,
0.1292794942855835,
0.9985151886940002,
0.9995579123497009,
1.0,
0.9999856948852539,
0.9999977350234985,
0.9999998807907104,
1.0,
1.0,
0.9999991655349731,
0.9998373985290527,
0.999966025352478,
1.0,
0.9999432563781738,
1.0,
0.997358500957489,
1.0,
0.9999942779541016,
0.9999959468841553,
0.9998447895050049,
1.0,
1.0,
1.0,
1.0,
0.9860179424285889,
0.8810455203056335,
0.8414822816848755,
1.0,
0.9645673632621765,
1.0,
1.0,
1.0,
1.0,
0.9987167119979858,
0.9995377063751221,
0.969241201877594,
0.9999886751174927,
0.2950672507286072,
0.44002464413642883,
0.8467398881912231,
0.06261960417032242,
0.9991006851196289,
0.9985753297805786,
0.9999589920043945,
0.9881368279457092,
1.0,
0.9875013828277588,
0.9999996423721313,
1.0,
0.9999927282333374,
0.9999997615814209,
0.9641613960266113,
0.5904451012611389,
0.991640567779541,
0.9999064207077026,
0.996203601360321,
0.999998927116394,
0.9644280672073364,
0.9944401979446411,
0.8003358840942383,
0.9963403940200806,
0.9804515242576599,
0.989639163017273,
0.852015495300293,
0.9918712377548218,
0.9663655757904053,
0.8847030997276306,
0.5973413586616516,
0.5435407757759094,
0.811240017414093
],
"eval_code_runtime": 105.7613,
"eval_code_samples_per_second": 0.946,
"eval_code_score": -0.03531455993652344,
"eval_code_steps_per_second": 0.038,
"step": 100
},
{
"epoch": 1.77,
"learning_rate": 0.0002,
"loss": 0.0341,
"step": 101
},
{
"epoch": 1.79,
"learning_rate": 0.0002,
"loss": 0.0952,
"step": 102
},
{
"epoch": 1.81,
"learning_rate": 0.0002,
"loss": 0.0358,
"step": 103
},
{
"epoch": 1.82,
"learning_rate": 0.0002,
"loss": 0.1143,
"step": 104
},
{
"epoch": 1.84,
"learning_rate": 0.0002,
"loss": 0.07,
"step": 105
},
{
"epoch": 1.86,
"learning_rate": 0.0002,
"loss": 0.1232,
"step": 106
},
{
"epoch": 1.88,
"learning_rate": 0.0002,
"loss": 0.065,
"step": 107
},
{
"epoch": 1.89,
"learning_rate": 0.0002,
"loss": 0.0167,
"step": 108
},
{
"epoch": 1.91,
"learning_rate": 0.0002,
"loss": 0.0486,
"step": 109
},
{
"epoch": 1.93,
"learning_rate": 0.0002,
"loss": 0.011,
"step": 110
},
{
"epoch": 1.95,
"learning_rate": 0.0002,
"loss": 0.0425,
"step": 111
},
{
"epoch": 1.96,
"learning_rate": 0.0002,
"loss": 0.1082,
"step": 112
},
{
"epoch": 1.98,
"learning_rate": 0.0002,
"loss": 0.0196,
"step": 113
},
{
"epoch": 2.0,
"learning_rate": 0.0002,
"loss": 0.0152,
"step": 114
},
{
"epoch": 2.02,
"learning_rate": 0.0002,
"loss": 0.012,
"step": 115
},
{
"epoch": 2.04,
"learning_rate": 0.0002,
"loss": 0.0186,
"step": 116
},
{
"epoch": 2.05,
"learning_rate": 0.0002,
"loss": 0.0186,
"step": 117
},
{
"epoch": 2.07,
"learning_rate": 0.0002,
"loss": 0.0238,
"step": 118
},
{
"epoch": 2.09,
"learning_rate": 0.0002,
"loss": 0.0097,
"step": 119
},
{
"epoch": 2.11,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 120
},
{
"epoch": 2.12,
"learning_rate": 0.0002,
"loss": 0.0409,
"step": 121
},
{
"epoch": 2.14,
"learning_rate": 0.0002,
"loss": 0.0238,
"step": 122
},
{
"epoch": 2.16,
"learning_rate": 0.0002,
"loss": 0.0045,
"step": 123
},
{
"epoch": 2.18,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 124
},
{
"epoch": 2.19,
"learning_rate": 0.0002,
"loss": 0.0307,
"step": 125
},
{
"epoch": 2.19,
"eval_code_accuracy": 0.96,
"eval_code_average_probability": 0.9499993324279785,
"eval_code_brier_score": 0.028195565566420555,
"eval_code_loss": 0.11333052814006805,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
0.9999997615814209,
0.9999997615814209,
0.9999985694885254,
0.9999561309814453,
0.8770335912704468,
0.4641159176826477,
0.9271451234817505,
1.0,
1.0,
0.9999996423721313,
0.9998699426651001,
0.9999997615814209,
0.9999998807907104,
0.9999991655349731,
0.9999380111694336,
0.9999819993972778,
0.7533411979675293,
0.7846699357032776,
0.89438796043396,
0.015444471500813961,
0.999987006187439,
1.0,
1.0,
0.9969304203987122,
0.9999985694885254,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999716281890869,
0.9999990463256836,
1.0,
1.0,
1.0,
0.9998655319213867,
1.0,
0.9999997615814209,
0.9999998807907104,
0.9999963045120239,
1.0,
1.0,
1.0,
1.0,
0.9978950023651123,
0.7412748336791992,
0.8329392671585083,
1.0,
0.999669075012207,
1.0,
1.0,
1.0,
1.0,
0.9999936819076538,
0.9999263286590576,
0.9996693134307861,
1.0,
0.9178900122642517,
0.8888350129127502,
0.9699975848197937,
0.014796692878007889,
0.9991298317909241,
0.9968131184577942,
0.9999946355819702,
0.9282417297363281,
1.0,
0.9971838593482971,
1.0,
1.0,
0.9999544620513916,
0.9999992847442627,
0.9987694621086121,
0.8008559346199036,
0.9995920062065125,
1.0,
0.9999116659164429,
1.0,
0.9975823163986206,
0.9999178647994995,
0.9812232255935669,
0.9962044358253479,
0.9992068409919739,
0.9994266033172607,
0.9672200679779053,
0.998022198677063,
0.9971696734428406,
0.9978540539741516,
0.9273316860198975,
0.8601265549659729,
0.4826805889606476
],
"eval_code_runtime": 105.781,
"eval_code_samples_per_second": 0.945,
"eval_code_score": -0.028195565566420555,
"eval_code_steps_per_second": 0.038,
"step": 125
},
{
"epoch": 2.21,
"learning_rate": 0.0002,
"loss": 0.0048,
"step": 126
},
{
"epoch": 2.23,
"learning_rate": 0.0002,
"loss": 0.0093,
"step": 127
},
{
"epoch": 2.25,
"learning_rate": 0.0002,
"loss": 0.0056,
"step": 128
},
{
"epoch": 2.26,
"learning_rate": 0.0002,
"loss": 0.0123,
"step": 129
},
{
"epoch": 2.28,
"learning_rate": 0.0002,
"loss": 0.0825,
"step": 130
},
{
"epoch": 2.3,
"learning_rate": 0.0002,
"loss": 0.0199,
"step": 131
},
{
"epoch": 2.32,
"learning_rate": 0.0002,
"loss": 0.025,
"step": 132
},
{
"epoch": 2.33,
"learning_rate": 0.0002,
"loss": 0.0477,
"step": 133
},
{
"epoch": 2.35,
"learning_rate": 0.0002,
"loss": 0.0403,
"step": 134
},
{
"epoch": 2.37,
"learning_rate": 0.0002,
"loss": 0.0209,
"step": 135
},
{
"epoch": 2.39,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 136
},
{
"epoch": 2.4,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 137
},
{
"epoch": 2.42,
"learning_rate": 0.0002,
"loss": 0.024,
"step": 138
},
{
"epoch": 2.44,
"learning_rate": 0.0002,
"loss": 0.0215,
"step": 139
},
{
"epoch": 2.46,
"learning_rate": 0.0002,
"loss": 0.0321,
"step": 140
},
{
"epoch": 2.47,
"learning_rate": 0.0002,
"loss": 0.0067,
"step": 141
},
{
"epoch": 2.49,
"learning_rate": 0.0002,
"loss": 0.0144,
"step": 142
},
{
"epoch": 2.51,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 143
},
{
"epoch": 2.53,
"learning_rate": 0.0002,
"loss": 0.025,
"step": 144
},
{
"epoch": 2.54,
"learning_rate": 0.0002,
"loss": 0.0605,
"step": 145
},
{
"epoch": 2.56,
"learning_rate": 0.0002,
"loss": 0.0154,
"step": 146
},
{
"epoch": 2.58,
"learning_rate": 0.0002,
"loss": 0.002,
"step": 147
},
{
"epoch": 2.6,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 148
},
{
"epoch": 2.61,
"learning_rate": 0.0002,
"loss": 0.0066,
"step": 149
},
{
"epoch": 2.63,
"learning_rate": 0.0002,
"loss": 0.0065,
"step": 150
},
{
"epoch": 2.63,
"eval_code_accuracy": 0.95,
"eval_code_average_probability": 0.9435591101646423,
"eval_code_brier_score": 0.03846995532512665,
"eval_code_loss": 0.17103993892669678,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999970197677612,
0.9055734872817993,
0.42537397146224976,
0.9499343633651733,
1.0,
1.0,
1.0,
0.9999983310699463,
1.0,
1.0,
1.0,
0.9999964237213135,
0.9999994039535522,
0.8147205710411072,
0.8424564599990845,
0.9474385976791382,
0.003861561883240938,
1.0,
1.0,
1.0,
0.035812485963106155,
0.9962039589881897,
1.0,
1.0,
1.0,
1.0,
0.9999990463256836,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999754428863525,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
0.9926579594612122,
0.7481365203857422,
0.9441766142845154,
1.0,
0.9999790191650391,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999998807907104,
0.9856025576591492,
1.0,
0.9969327449798584,
0.9738338589668274,
0.9861930012702942,
0.007046875543892384,
0.9993067979812622,
0.9975935816764832,
0.9999728202819824,
0.3661898970603943,
1.0,
0.9999417066574097,
1.0,
1.0,
0.9999967813491821,
1.0,
0.9991926550865173,
0.8271266222000122,
0.9992584586143494,
1.0,
0.9992876648902893,
1.0,
0.9999542236328125,
0.9999967813491821,
0.9966945648193359,
0.9999995231628418,
0.9955873489379883,
0.9999998807907104,
0.8881388306617737,
0.9999102354049683,
0.9994561076164246,
0.9989873766899109,
0.9133449792861938,
0.9107068181037903,
0.9093677401542664
],
"eval_code_runtime": 105.8053,
"eval_code_samples_per_second": 0.945,
"eval_code_score": -0.03846995532512665,
"eval_code_steps_per_second": 0.038,
"step": 150
},
{
"loss": 0.0046,
"learning_rate": 0.0002,
"epoch": 2.65,
"step": 151
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 2.67,
"step": 152
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 2.68,
"step": 153
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 2.7,
"step": 154
},
{
"loss": 0.0167,
"learning_rate": 0.0002,
"epoch": 2.72,
"step": 155
},
{
"loss": 0.0033,
"learning_rate": 0.0002,
"epoch": 2.74,
"step": 156
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 2.75,
"step": 157
},
{
"loss": 0.0013,
"learning_rate": 0.0002,
"epoch": 2.77,
"step": 158
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 2.79,
"step": 159
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 2.81,
"step": 160
},
{
"loss": 0.002,
"learning_rate": 0.0002,
"epoch": 2.82,
"step": 161
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.84,
"step": 162
},
{
"loss": 0.0052,
"learning_rate": 0.0002,
"epoch": 2.86,
"step": 163
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 2.88,
"step": 164
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.89,
"step": 165
},
{
"loss": 0.0043,
"learning_rate": 0.0002,
"epoch": 2.91,
"step": 166
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.93,
"step": 167
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 2.95,
"step": 168
},
{
"loss": 0.0024,
"learning_rate": 0.0002,
"epoch": 2.96,
"step": 169
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.98,
"step": 170
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 3.0,
"step": 171
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 3.02,
"step": 172
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 3.04,
"step": 173
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.05,
"step": 174
},
{
"loss": 0.0182,
"learning_rate": 0.0002,
"epoch": 3.07,
"step": 175
},
{
"eval_code_loss": 0.24447500705718994,
"eval_code_score": -0.04580119252204895,
"eval_code_brier_score": 0.04580119252204895,
"eval_code_average_probability": 0.9384365677833557,
"eval_code_accuracy": 0.95,
"eval_code_probabilities": [
1.0,
0.9999996423721313,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9705566167831421,
0.28825750946998596,
0.990113377571106,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.8121438026428223,
0.8320725560188293,
0.9545630216598511,
0.022090904414653778,
1.0,
1.0,
1.0,
0.0003882711462210864,
0.9997542500495911,
1.0,
1.0,
1.0,
1.0,
0.9999994039535522,
0.9999996423721313,
1.0,
1.0,
1.0,
0.9999974966049194,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9805604219436646,
0.656412661075592,
0.9261404871940613,
1.0,
0.9999086856842041,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
0.9739339351654053,
1.0,
0.9995070695877075,
0.9991480112075806,
0.9978482723236084,
0.0005162784364074469,
0.999909520149231,
1.0,
1.0,
0.09138258546590805,
1.0,
0.9999922513961792,
1.0,
1.0,
1.0,
1.0,
0.9993377327919006,
0.829042375087738,
0.993126630783081,
1.0,
0.9995529055595398,
1.0,
0.9999717473983765,
0.9999934434890747,
0.9994831085205078,
1.0,
0.9715592861175537,
1.0,
0.7726230025291443,
0.9999821186065674,
0.9998100399971008,
0.9989909529685974,
0.8984997272491455,
0.8994256854057312,
0.9870588183403015
],
"eval_code_runtime": 104.8369,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 3.07,
"step": 175
},
{
"loss": 0.0027,
"learning_rate": 0.0002,
"epoch": 3.09,
"step": 176
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.11,
"step": 177
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 3.12,
"step": 178
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.14,
"step": 179
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.16,
"step": 180
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.18,
"step": 181
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 3.19,
"step": 182
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 3.21,
"step": 183
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.23,
"step": 184
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 3.25,
"step": 185
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 3.26,
"step": 186
},
{
"loss": 0.0315,
"learning_rate": 0.0002,
"epoch": 3.28,
"step": 187
},
{
"loss": 0.0148,
"learning_rate": 0.0002,
"epoch": 3.3,
"step": 188
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.32,
"step": 189
},
{
"loss": 0.0016,
"learning_rate": 0.0002,
"epoch": 3.33,
"step": 190
},
{
"loss": 0.0053,
"learning_rate": 0.0002,
"epoch": 3.35,
"step": 191
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.37,
"step": 192
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 3.39,
"step": 193
},
{
"loss": 0.1843,
"learning_rate": 0.0002,
"epoch": 3.4,
"step": 194
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.42,
"step": 195
},
{
"loss": 0.012,
"learning_rate": 0.0002,
"epoch": 3.44,
"step": 196
},
{
"loss": 0.0031,
"learning_rate": 0.0002,
"epoch": 3.46,
"step": 197
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 3.47,
"step": 198
},
{
"loss": 0.0062,
"learning_rate": 0.0002,
"epoch": 3.49,
"step": 199
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 3.51,
"step": 200
},
{
"eval_code_loss": 0.20703525841236115,
"eval_code_score": -0.036074623465538025,
"eval_code_brier_score": 0.036074623465538025,
"eval_code_average_probability": 0.9416353702545166,
"eval_code_accuracy": 0.96,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999991655349731,
0.999995231628418,
0.9498335719108582,
0.30828458070755005,
0.987034261226654,
0.999970555305481,
0.9999929666519165,
0.9999411106109619,
0.999946117401123,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
0.717918872833252,
0.6784900426864624,
0.8770557641983032,
7.296191597561119e-06,
0.9999998807907104,
0.9999997615814209,
1.0,
0.9999603033065796,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999985694885254,
0.9999916553497314,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
0.9999997615814209,
0.9999793767929077,
1.0,
1.0,
1.0,
1.0,
0.9963011741638184,
0.5810772180557251,
0.9937513470649719,
1.0,
0.9999946355819702,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999996423721313,
0.999351441860199,
1.0,
0.9979580640792847,
0.9855725169181824,
0.9849231243133545,
0.017488110810518265,
0.999018669128418,
0.9788364768028259,
0.9989467263221741,
0.29412388801574707,
1.0,
0.9881665110588074,
1.0,
1.0,
0.9999973773956299,
1.0,
0.9999986886978149,
0.8509193658828735,
0.9998080134391785,
1.0,
0.9999957084655762,
1.0,
0.9992415904998779,
0.9998071789741516,
0.9979315996170044,
0.9996376037597656,
0.9590405821800232,
0.9996711015701294,
0.5990511775016785,
0.9998351335525513,
0.9997956156730652,
0.9722345471382141,
0.7871147990226746,
0.8062353730201721,
0.8593048453330994
],
"eval_code_runtime": 104.8009,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 3.51,
"step": 200
},
{
"loss": 0.0071,
"learning_rate": 0.0002,
"epoch": 3.53,
"step": 201
},
{
"loss": 0.0311,
"learning_rate": 0.0002,
"epoch": 3.54,
"step": 202
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 3.56,
"step": 203
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 3.58,
"step": 204
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 3.6,
"step": 205
},
{
"loss": 0.0018,
"learning_rate": 0.0002,
"epoch": 3.61,
"step": 206
},
{
"loss": 0.0065,
"learning_rate": 0.0002,
"epoch": 3.63,
"step": 207
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 3.65,
"step": 208
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.67,
"step": 209
},
{
"loss": 0.0302,
"learning_rate": 0.0002,
"epoch": 3.68,
"step": 210
},
{
"loss": 0.0662,
"learning_rate": 0.0002,
"epoch": 3.7,
"step": 211
},
{
"loss": 0.0071,
"learning_rate": 0.0002,
"epoch": 3.72,
"step": 212
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 3.74,
"step": 213
},
{
"loss": 0.0527,
"learning_rate": 0.0002,
"epoch": 3.75,
"step": 214
},
{
"loss": 0.0069,
"learning_rate": 0.0002,
"epoch": 3.77,
"step": 215
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 3.79,
"step": 216
},
{
"loss": 0.0056,
"learning_rate": 0.0002,
"epoch": 3.81,
"step": 217
},
{
"loss": 0.0205,
"learning_rate": 0.0002,
"epoch": 3.82,
"step": 218
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 3.84,
"step": 219
},
{
"loss": 0.0048,
"learning_rate": 0.0002,
"epoch": 3.86,
"step": 220
},
{
"loss": 0.0307,
"learning_rate": 0.0002,
"epoch": 3.88,
"step": 221
},
{
"loss": 0.0092,
"learning_rate": 0.0002,
"epoch": 3.89,
"step": 222
},
{
"loss": 0.0425,
"learning_rate": 0.0002,
"epoch": 3.91,
"step": 223
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 3.93,
"step": 224
},
{
"loss": 0.0021,
"learning_rate": 0.0002,
"epoch": 3.95,
"step": 225
},
{
"eval_code_loss": 0.1442316770553589,
"eval_code_score": -0.03884093835949898,
"eval_code_brier_score": 0.03884093835949898,
"eval_code_average_probability": 0.9314249157905579,
"eval_code_accuracy": 0.95,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999983310699463,
0.9166925549507141,
0.46314355731010437,
0.9695256948471069,
0.9999979734420776,
0.9999997615814209,
0.9999912977218628,
0.9998043179512024,
0.9999998807907104,
1.0,
0.999934196472168,
0.9998651742935181,
0.9999654293060303,
0.7587816119194031,
0.8365660309791565,
0.8947399258613586,
0.007870269939303398,
0.9999408721923828,
0.9807198643684387,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999988079071045,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
0.9998663663864136,
0.9656995534896851,
0.935349702835083,
1.0,
0.9999972581863403,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999998807907104,
0.9999998807907104,
0.9994795918464661,
0.9999734163284302,
0.4865659773349762,
0.5832042694091797,
0.9857308268547058,
0.015312162227928638,
0.999519944190979,
0.7279074788093567,
0.9087716937065125,
0.6908769011497498,
1.0,
0.9999642372131348,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
0.8065006136894226,
0.9999997615814209,
1.0,
0.999992847442627,
1.0,
0.998808741569519,
0.9994845390319824,
0.9905843734741211,
0.9999678134918213,
0.9825097322463989,
0.9999821186065674,
0.316112756729126,
0.9999034404754639,
0.9957683086395264,
0.8763726353645325,
0.557384192943573,
0.5571630001068115,
0.936220109462738
],
"eval_code_runtime": 104.8083,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 3.95,
"step": 225
},
{
"loss": 0.0037,
"learning_rate": 0.0002,
"epoch": 3.96,
"step": 226
},
{
"loss": 0.0061,
"learning_rate": 0.0002,
"epoch": 3.98,
"step": 227
},
{
"loss": 0.0211,
"learning_rate": 0.0002,
"epoch": 4.0,
"step": 228
},
{
"loss": 0.0028,
"learning_rate": 0.0002,
"epoch": 4.02,
"step": 229
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 4.04,
"step": 230
},
{
"loss": 0.0146,
"learning_rate": 0.0002,
"epoch": 4.05,
"step": 231
},
{
"loss": 0.0155,
"learning_rate": 0.0002,
"epoch": 4.07,
"step": 232
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 4.09,
"step": 233
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 4.11,
"step": 234
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.12,
"step": 235
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.14,
"step": 236
},
{
"loss": 0.0073,
"learning_rate": 0.0002,
"epoch": 4.16,
"step": 237
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.18,
"step": 238
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 4.19,
"step": 239
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.21,
"step": 240
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.23,
"step": 241
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.25,
"step": 242
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.26,
"step": 243
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 4.28,
"step": 244
},
{
"loss": 0.0026,
"learning_rate": 0.0002,
"epoch": 4.3,
"step": 245
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.32,
"step": 246
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.33,
"step": 247
},
{
"loss": 0.0172,
"learning_rate": 0.0002,
"epoch": 4.35,
"step": 248
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.37,
"step": 249
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.39,
"step": 250
},
{
"eval_code_loss": 0.1744336634874344,
"eval_code_score": -0.03369342163205147,
"eval_code_brier_score": 0.03369342163205147,
"eval_code_average_probability": 0.9495358467102051,
"eval_code_accuracy": 0.97,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9451481699943542,
0.5480159521102905,
0.9964591860771179,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.7053650617599487,
0.8087413311004639,
0.9013814330101013,
0.0008012360776774585,
1.0,
0.9999994039535522,
1.0,
0.9998193383216858,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999983310699463,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9992625117301941,
0.9287911653518677,
0.872682511806488,
1.0,
0.9996117949485779,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999651908874512,
1.0,
0.9498048424720764,
0.9871782064437866,
0.9963241815567017,
0.0026193673256784678,
0.999943733215332,
0.9986830353736877,
0.9999673366546631,
0.9674615859985352,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999785423278809,
0.9016892313957214,
0.9997751116752625,
1.0,
0.9999997615814209,
1.0,
0.9999815225601196,
0.9999998807907104,
0.9999561309814453,
1.0,
0.9956650137901306,
1.0,
0.08844359964132309,
0.9999833106994629,
0.9990620017051697,
0.9781332612037659,
0.747072160243988,
0.6776840090751648,
0.9581350088119507
],
"eval_code_runtime": 104.7916,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 4.39,
"step": 250
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.4,
"step": 251
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 4.42,
"step": 252
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.44,
"step": 253
},
{
"loss": 0.0034,
"learning_rate": 0.0002,
"epoch": 4.46,
"step": 254
},
{
"loss": 0.0006,
"learning_rate": 0.0002,
"epoch": 4.47,
"step": 255
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.49,
"step": 256
},
{
"loss": 0.0004,
"learning_rate": 0.0002,
"epoch": 4.51,
"step": 257
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 4.53,
"step": 258
},
{
"loss": 0.0042,
"learning_rate": 0.0002,
"epoch": 4.54,
"step": 259
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.56,
"step": 260
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.58,
"step": 261
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 4.6,
"step": 262
},
{
"loss": 0.0023,
"learning_rate": 0.0002,
"epoch": 4.61,
"step": 263
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.63,
"step": 264
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.65,
"step": 265
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.67,
"step": 266
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 4.68,
"step": 267
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.7,
"step": 268
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.72,
"step": 269
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.74,
"step": 270
},
{
"loss": 0.0013,
"learning_rate": 0.0002,
"epoch": 4.75,
"step": 271
},
{
"loss": 0.006,
"learning_rate": 0.0002,
"epoch": 4.77,
"step": 272
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 4.79,
"step": 273
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 4.81,
"step": 274
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.82,
"step": 275
},
{
"eval_code_loss": 0.18856269121170044,
"eval_code_score": -0.03526609018445015,
"eval_code_brier_score": 0.03526609018445015,
"eval_code_average_probability": 0.949932873249054,
"eval_code_accuracy": 0.96,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9667010307312012,
0.4576479494571686,
0.9961231350898743,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.7199408411979675,
0.8098323345184326,
0.8810715079307556,
0.00041508462163619697,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999830722808838,
0.9893618226051331,
0.948550820350647,
1.0,
0.9997448325157166,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999991655349731,
1.0,
0.9401667714118958,
0.9961704611778259,
0.9991104006767273,
0.0038628315087407827,
0.9999805688858032,
0.9974443912506104,
0.9999669790267944,
0.9498675465583801,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9562873244285583,
0.9999939203262329,
1.0,
0.9999998807907104,
1.0,
0.9999774694442749,
0.9999997615814209,
0.9999823570251465,
1.0,
0.9991405010223389,
1.0,
0.027186574414372444,
0.9999986886978149,
0.9999057054519653,
0.9804957509040833,
0.7432851791381836,
0.7164115905761719,
0.9146708250045776
],
"eval_code_runtime": 104.8208,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 4.82,
"step": 275
},
{
"loss": 0.0018,
"learning_rate": 0.0002,
"epoch": 4.84,
"step": 276
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.86,
"step": 277
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 4.88,
"step": 278
},
{
"loss": 0.0019,
"learning_rate": 0.0002,
"epoch": 4.89,
"step": 279
},
{
"loss": 0.0009,
"learning_rate": 0.0002,
"epoch": 4.91,
"step": 280
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.93,
"step": 281
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 4.95,
"step": 282
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 4.96,
"step": 283
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 4.98,
"step": 284
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.0,
"step": 285
},
{
"loss": 0.0021,
"learning_rate": 0.0002,
"epoch": 5.02,
"step": 286
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.04,
"step": 287
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.05,
"step": 288
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 5.07,
"step": 289
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 5.09,
"step": 290
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 5.11,
"step": 291
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.12,
"step": 292
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.14,
"step": 293
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.16,
"step": 294
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.18,
"step": 295
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 5.19,
"step": 296
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.21,
"step": 297
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.23,
"step": 298
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 5.25,
"step": 299
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 5.26,
"step": 300
},
{
"eval_code_loss": 0.17653299868106842,
"eval_code_score": -0.035850197076797485,
"eval_code_brier_score": 0.035850197076797485,
"eval_code_average_probability": 0.949984610080719,
"eval_code_accuracy": 0.96,
"eval_code_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9785318374633789,
0.42346829175949097,
0.9971503615379333,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.7221306562423706,
0.8104501962661743,
0.8751990795135498,
0.0004177717200946063,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999911785125732,
0.9935014843940735,
0.9632013440132141,
1.0,
0.9997548460960388,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.999998927116394,
1.0,
0.9625283479690552,
0.9952881336212158,
0.9994906187057495,
0.011133184656500816,
0.9999599456787109,
0.9987699389457703,
0.9999761581420898,
0.9492788314819336,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9763202667236328,
0.9999985694885254,
1.0,
0.9999996423721313,
1.0,
0.9999876022338867,
0.9999998807907104,
0.9999854564666748,
1.0,
0.9988405108451843,
1.0,
0.03199751302599907,
0.9999868869781494,
0.9996381998062134,
0.9803478121757507,
0.6885609030723572,
0.6760653853416443,
0.9665107727050781
],
"eval_code_runtime": 104.8074,
"eval_code_samples_per_second": 0.954,
"eval_code_steps_per_second": 0.038,
"epoch": 5.26,
"step": 300
},
{
"train_runtime": 13610.3633,
"train_samples_per_second": 0.705,
"train_steps_per_second": 0.022,
"total_flos": 0.0,
"train_loss": 0.002647306595269659,
"epoch": 5.26,
"step": 300
}
]