llama-30b-code_low_quality / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
5b3d076
[
{
"loss": 0.7435,
"learning_rate": 0.0002,
"epoch": 0.03,
"step": 1
},
{
"loss": 0.5463,
"learning_rate": 0.0002,
"epoch": 0.05,
"step": 2
},
{
"loss": 0.4573,
"learning_rate": 0.0002,
"epoch": 0.08,
"step": 3
},
{
"loss": 0.3088,
"learning_rate": 0.0002,
"epoch": 0.11,
"step": 4
},
{
"loss": 0.2566,
"learning_rate": 0.0002,
"epoch": 0.13,
"step": 5
},
{
"loss": 0.235,
"learning_rate": 0.0002,
"epoch": 0.16,
"step": 6
},
{
"loss": 0.113,
"learning_rate": 0.0002,
"epoch": 0.18,
"step": 7
},
{
"loss": 0.1435,
"learning_rate": 0.0002,
"epoch": 0.21,
"step": 8
},
{
"loss": 0.1263,
"learning_rate": 0.0002,
"epoch": 0.24,
"step": 9
},
{
"loss": 0.0532,
"learning_rate": 0.0002,
"epoch": 0.26,
"step": 10
},
{
"loss": 0.0326,
"learning_rate": 0.0002,
"epoch": 0.29,
"step": 11
},
{
"loss": 0.0732,
"learning_rate": 0.0002,
"epoch": 0.32,
"step": 12
},
{
"loss": 0.0714,
"learning_rate": 0.0002,
"epoch": 0.34,
"step": 13
},
{
"loss": 0.01,
"learning_rate": 0.0002,
"epoch": 0.37,
"step": 14
},
{
"loss": 0.4883,
"learning_rate": 0.0002,
"epoch": 0.39,
"step": 15
},
{
"loss": 0.0547,
"learning_rate": 0.0002,
"epoch": 0.42,
"step": 16
},
{
"loss": 0.0418,
"learning_rate": 0.0002,
"epoch": 0.45,
"step": 17
},
{
"loss": 0.0242,
"learning_rate": 0.0002,
"epoch": 0.47,
"step": 18
},
{
"loss": 0.1662,
"learning_rate": 0.0002,
"epoch": 0.5,
"step": 19
},
{
"loss": 0.0201,
"learning_rate": 0.0002,
"epoch": 0.53,
"step": 20
},
{
"loss": 0.0896,
"learning_rate": 0.0002,
"epoch": 0.55,
"step": 21
},
{
"loss": 0.0298,
"learning_rate": 0.0002,
"epoch": 0.58,
"step": 22
},
{
"loss": 0.0283,
"learning_rate": 0.0002,
"epoch": 0.61,
"step": 23
},
{
"loss": 0.0214,
"learning_rate": 0.0002,
"epoch": 0.63,
"step": 24
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 0.66,
"step": 25
},
{
"eval_code_low_quality_loss": 0.0006595517043024302,
"eval_code_low_quality_score": -1.1128052392450627e-05,
"eval_code_low_quality_brier_score": 1.1128052392450627e-05,
"eval_code_low_quality_average_probability": 0.9993544220924377,
"eval_code_low_quality_accuracy": 1.0,
"eval_code_low_quality_probabilities": [
1.0,
1.0,
0.9999339580535889,
0.9999250173568726,
0.9999997615814209,
1.0,
1.0,
0.9999995231628418,
1.0,
0.9999998807907104,
1.0,
1.0,
0.9999914169311523,
1.0,
0.9999998807907104,
0.9999998807907104,
0.9999932050704956,
0.9999402761459351,
0.9999994039535522,
0.9999994039535522,
0.9999964237213135,
0.9999980926513672,
0.9999585151672363,
0.9999517202377319,
0.9974417686462402,
0.9972801208496094,
0.9999949932098389,
0.9999983310699463,
1.0,
1.0,
0.9707977771759033,
0.9999984502792358,
0.9999997615814209,
0.9999998807907104,
0.9999961853027344,
0.9999938011169434,
0.9999990463256836,
0.9999834299087524,
0.9982851147651672,
0.9977701902389526,
0.9946922659873962,
0.9993091821670532,
0.9999990463256836,
0.9999549388885498,
0.9999979734420776,
1.0,
0.9999107122421265,
0.9998675584793091,
0.999998927116394,
0.9999998807907104,
0.9999997615814209,
1.0,
0.9999986886978149,
0.9999995231628418,
0.9999984502792358,
0.999998927116394,
1.0,
1.0,
0.9999972581863403,
0.9999957084655762,
0.9999988079071045,
0.9999986886978149,
0.9999998807907104,
0.9999723434448242,
0.9999982118606567,
0.9999983310699463,
1.0,
1.0,
1.0,
1.0,
0.9999990463256836,
1.0,
1.0,
1.0,
0.9999997615814209,
0.9999998807907104,
0.9999725818634033,
0.9999992847442627,
0.9999661445617676,
0.999969482421875,
0.9999997615814209,
0.9999995231628418,
1.0,
1.0,
1.0,
1.0,
0.9960858821868896,
0.9996801614761353,
0.9999926090240479,
0.9999170303344727,
0.9999935626983643,
0.9999808073043823,
0.9999854564666748,
0.9861019849777222,
0.9998243451118469,
0.9990695118904114,
0.9999963045120239,
0.9999998807907104,
0.9999969005584717,
1.0
],
"eval_code_low_quality_runtime": 141.0153,
"eval_code_low_quality_samples_per_second": 0.709,
"eval_code_low_quality_steps_per_second": 0.028,
"epoch": 0.66,
"step": 25
},
{
"eval_code_loss": 0.5833292603492737,
"eval_code_score": -0.1804097294807434,
"eval_code_brier_score": 0.1804097294807434,
"eval_code_average_probability": 0.6770716309547424,
"eval_code_accuracy": 0.76,
"eval_code_probabilities": [
0.10096369683742523,
0.21281138062477112,
0.2073233723640442,
0.9848010540008545,
0.9974289536476135,
0.7294337749481201,
0.6149591207504272,
0.10706400871276855,
0.5733828544616699,
0.6794034242630005,
0.9404621124267578,
0.8340038061141968,
0.8182052969932556,
0.9913336634635925,
0.8865165710449219,
0.473355770111084,
0.44256484508514404,
0.44847366213798523,
0.9831279516220093,
0.9921797513961792,
0.9801183938980103,
0.5592747330665588,
0.836647093296051,
0.8669078946113586,
0.017949247732758522,
0.02475733682513237,
0.025172699242830276,
0.9788206815719604,
0.9780603051185608,
0.9747424125671387,
0.4794495105743408,
0.978365421295166,
0.7862324714660645,
0.19819535315036774,
0.08767852187156677,
0.06654077768325806,
0.8256481885910034,
0.9054414629936218,
0.698613166809082,
0.9710825085639954,
0.9051369428634644,
0.9175053238868713,
0.5884507298469543,
0.6432308554649353,
0.5494475960731506,
0.6364181041717529,
0.985031008720398,
0.934455394744873,
0.5507091879844666,
0.41536206007003784,
0.9477699995040894,
0.9997536540031433,
0.9998169541358948,
0.9843502044677734,
0.5441336631774902,
0.5056722164154053,
0.45167604088783264,
0.4324477016925812,
0.46727290749549866,
0.4145377576351166,
0.7619684934616089,
0.5765150785446167,
0.6369282007217407,
0.8258550763130188,
0.5172879695892334,
0.4960772395133972,
0.9996997117996216,
0.7111136317253113,
0.681095540523529,
0.7522112727165222,
0.7496448755264282,
0.7440780997276306,
0.5330413579940796,
0.5731443166732788,
0.5385348796844482,
0.9997851252555847,
0.9988688826560974,
0.9994134902954102,
0.991698145866394,
0.7425008416175842,
0.7211678624153137,
0.9967381358146667,
0.5310153365135193,
0.997730553150177,
0.7988360524177551,
0.7310686707496643,
0.9935532808303833,
0.9999699592590332,
0.9999909400939941,
0.9834601283073425,
0.5250475406646729,
0.5600137114524841,
0.5266534090042114,
0.48367124795913696,
0.5329293608665466,
0.5332629084587097,
0.6026731729507446,
0.34615248441696167,
0.3938570022583008,
0.4892289638519287
],
"eval_code_runtime": 134.9932,
"eval_code_samples_per_second": 0.741,
"eval_code_steps_per_second": 0.03,
"epoch": 0.66,
"step": 25
},
{
"loss": 0.0016,
"learning_rate": 0.0002,
"epoch": 0.68,
"step": 26
},
{
"loss": 0.0076,
"learning_rate": 0.0002,
"epoch": 0.71,
"step": 27
},
{
"loss": 0.0014,
"learning_rate": 0.0002,
"epoch": 0.74,
"step": 28
},
{
"loss": 0.0359,
"learning_rate": 0.0002,
"epoch": 0.76,
"step": 29
},
{
"loss": 0.0008,
"learning_rate": 0.0002,
"epoch": 0.79,
"step": 30
},
{
"loss": 0.0076,
"learning_rate": 0.0002,
"epoch": 0.82,
"step": 31
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 0.84,
"step": 32
},
{
"loss": 0.0458,
"learning_rate": 0.0002,
"epoch": 0.87,
"step": 33
},
{
"loss": 0.0017,
"learning_rate": 0.0002,
"epoch": 0.89,
"step": 34
},
{
"loss": 0.0328,
"learning_rate": 0.0002,
"epoch": 0.92,
"step": 35
},
{
"loss": 0.0102,
"learning_rate": 0.0002,
"epoch": 0.95,
"step": 36
},
{
"loss": 0.0025,
"learning_rate": 0.0002,
"epoch": 0.97,
"step": 37
},
{
"loss": 0.0124,
"learning_rate": 0.0002,
"epoch": 1.0,
"step": 38
},
{
"loss": 0.0019,
"learning_rate": 0.0002,
"epoch": 1.03,
"step": 39
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.05,
"step": 40
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 1.08,
"step": 41
},
{
"loss": 0.0182,
"learning_rate": 0.0002,
"epoch": 1.11,
"step": 42
},
{
"loss": 0.0033,
"learning_rate": 0.0002,
"epoch": 1.13,
"step": 43
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 1.16,
"step": 44
},
{
"loss": 0.2313,
"learning_rate": 0.0002,
"epoch": 1.18,
"step": 45
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.21,
"step": 46
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.24,
"step": 47
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 1.26,
"step": 48
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.29,
"step": 49
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.32,
"step": 50
},
{
"eval_code_low_quality_loss": 9.984543430618942e-05,
"eval_code_low_quality_score": -4.904638899461133e-07,
"eval_code_low_quality_brier_score": 4.904638899461133e-07,
"eval_code_low_quality_average_probability": 0.9999052286148071,
"eval_code_low_quality_accuracy": 1.0,
"eval_code_low_quality_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
1.0,
0.9999996423721313,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999854564666748,
0.999990701675415,
0.9999991655349731,
0.9999998807907104,
1.0,
1.0,
0.9997785687446594,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999037981033325,
0.999776303768158,
0.9996434450149536,
0.9999954700469971,
1.0,
1.0,
0.9931026697158813,
1.0,
0.9999970197677612,
0.9999984502792358,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.999966025352478,
0.9999886751174927,
0.9999871253967285,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999964237213135,
0.9999996423721313,
0.9999971389770508,
0.999998927116394,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999969005584717,
0.9999992847442627,
0.9999997615814209,
0.9999871253967285,
1.0,
1.0,
1.0,
0.999290943145752,
0.9999997615814209,
0.9999961853027344,
0.9999998807907104,
1.0,
0.9991438388824463,
0.9999996423721313
],
"eval_code_low_quality_runtime": 141.0508,
"eval_code_low_quality_samples_per_second": 0.709,
"eval_code_low_quality_steps_per_second": 0.028,
"epoch": 1.32,
"step": 50
},
{
"eval_code_loss": 0.6208350658416748,
"eval_code_score": -0.16615988314151764,
"eval_code_brier_score": 0.16615988314151764,
"eval_code_average_probability": 0.7172398567199707,
"eval_code_accuracy": 0.78,
"eval_code_probabilities": [
0.00898885540664196,
0.011922608129680157,
0.06844734400510788,
1.0,
0.9941722750663757,
0.9803956151008606,
0.7216671705245972,
0.8719096779823303,
0.6083595752716064,
0.7415289282798767,
0.9492217898368835,
0.9170193672180176,
0.7766098380088806,
0.9932230710983276,
0.8845409750938416,
0.5273276567459106,
0.47327327728271484,
0.514703094959259,
0.8447608351707458,
0.9846923351287842,
0.8573033809661865,
0.6226664185523987,
0.9512245059013367,
0.9716692566871643,
0.015323023311793804,
0.01599641889333725,
0.012340807355940342,
0.9995960593223572,
0.9999792575836182,
0.9965634942054749,
0.44286829233169556,
0.9999172687530518,
0.6884943842887878,
0.6484965085983276,
0.09222234040498734,
0.08776669204235077,
0.9685974717140198,
0.9530619382858276,
0.7834113240242004,
0.9998219609260559,
0.9753877520561218,
0.9868707060813904,
0.8314498662948608,
0.9281793236732483,
0.8543185591697693,
0.8703754544258118,
0.9975754618644714,
0.9943013191223145,
0.6888080835342407,
0.631050169467926,
0.9991812109947205,
0.9999918937683105,
0.9999994039535522,
0.9998878240585327,
0.6626654863357544,
0.4978952705860138,
0.4795929491519928,
0.7897204160690308,
0.597411572933197,
0.758142352104187,
0.46403029561042786,
0.3406503200531006,
0.3812786936759949,
0.5821160674095154,
0.554485023021698,
0.5045574903488159,
0.9999997615814209,
0.3512322008609772,
0.3285271227359772,
0.7814154624938965,
0.7234558463096619,
0.8835054039955139,
0.5704320073127747,
0.7975870966911316,
0.7317837476730347,
0.9999594688415527,
0.9992265701293945,
0.9976761937141418,
0.998828113079071,
0.7545521855354309,
0.8591522574424744,
0.9943245053291321,
0.49560773372650146,
0.9899377822875977,
0.9942289590835571,
0.9932680726051331,
0.9997400641441345,
0.9999916553497314,
0.9999696016311646,
0.997422456741333,
0.6934857368469238,
0.7063461542129517,
0.6493465304374695,
0.48497462272644043,
0.48536550998687744,
0.584743857383728,
0.7083728313446045,
0.1657319813966751,
0.1531611829996109,
0.5006277561187744
],
"eval_code_runtime": 135.166,
"eval_code_samples_per_second": 0.74,
"eval_code_steps_per_second": 0.03,
"epoch": 1.32,
"step": 50
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 1.34,
"step": 51
},
{
"loss": 0.0631,
"learning_rate": 0.0002,
"epoch": 1.37,
"step": 52
},
{
"loss": 0.0133,
"learning_rate": 0.0002,
"epoch": 1.39,
"step": 53
},
{
"loss": 0.0013,
"learning_rate": 0.0002,
"epoch": 1.42,
"step": 54
},
{
"loss": 0.0011,
"learning_rate": 0.0002,
"epoch": 1.45,
"step": 55
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.47,
"step": 56
},
{
"loss": 0.0005,
"learning_rate": 0.0002,
"epoch": 1.5,
"step": 57
},
{
"loss": 0.002,
"learning_rate": 0.0002,
"epoch": 1.53,
"step": 58
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.55,
"step": 59
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.58,
"step": 60
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.61,
"step": 61
},
{
"loss": 0.0022,
"learning_rate": 0.0002,
"epoch": 1.63,
"step": 62
},
{
"loss": 0.001,
"learning_rate": 0.0002,
"epoch": 1.66,
"step": 63
},
{
"loss": 0.0002,
"learning_rate": 0.0002,
"epoch": 1.68,
"step": 64
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.71,
"step": 65
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 1.74,
"step": 66
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.76,
"step": 67
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.79,
"step": 68
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.82,
"step": 69
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.84,
"step": 70
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.87,
"step": 71
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.89,
"step": 72
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.92,
"step": 73
},
{
"loss": 0.003,
"learning_rate": 0.0002,
"epoch": 1.95,
"step": 74
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 1.97,
"step": 75
},
{
"eval_code_low_quality_loss": 3.4825257898774e-05,
"eval_code_low_quality_score": -8.544675722532702e-08,
"eval_code_low_quality_brier_score": 8.544675722532702e-08,
"eval_code_low_quality_average_probability": 0.9999703168869019,
"eval_code_low_quality_accuracy": 1.0,
"eval_code_low_quality_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999960660934448,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
0.9999958276748657,
0.9999983310699463,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9999992847442627,
1.0,
1.0,
1.0,
0.9970769882202148,
1.0,
0.9999996423721313,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999990463256836,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999997615814209,
1.0,
1.0,
1.0,
0.9999996423721313,
1.0,
0.9999998807907104,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999974966049194,
0.9999973773956299,
1.0,
0.9999992847442627,
1.0,
1.0,
1.0,
0.9999749660491943,
1.0,
0.9999998807907104,
1.0,
1.0,
0.9999997615814209,
1.0
],
"eval_code_low_quality_runtime": 141.134,
"eval_code_low_quality_samples_per_second": 0.709,
"eval_code_low_quality_steps_per_second": 0.028,
"epoch": 1.97,
"step": 75
},
{
"eval_code_loss": 0.41323670744895935,
"eval_code_score": -0.12680457532405853,
"eval_code_brier_score": 0.12680457532405853,
"eval_code_average_probability": 0.7587159872055054,
"eval_code_accuracy": 0.83,
"eval_code_probabilities": [
0.8840228915214539,
0.9449170827865601,
0.9977814555168152,
0.9999998807907104,
0.9982668161392212,
0.9896910190582275,
0.7586247324943542,
0.6018922924995422,
0.6262123584747314,
0.7324202060699463,
0.9547215104103088,
0.9301618337631226,
0.7506490349769592,
0.9963753819465637,
0.8704489469528198,
0.5361979603767395,
0.5121419429779053,
0.5582126975059509,
0.8438705205917358,
0.9842632412910461,
0.8747962713241577,
0.6529940962791443,
0.9806255102157593,
0.9927592277526855,
0.012452345341444016,
0.014053303748369217,
0.012323332950472832,
0.9999936819076538,
0.9999954700469971,
0.9993295669555664,
0.476261705160141,
0.9999690055847168,
0.7746478319168091,
0.7423850893974304,
0.3552600145339966,
0.3487190902233124,
0.961654782295227,
0.9592396020889282,
0.7986306548118591,
0.9999490976333618,
0.9817489385604858,
0.9756157994270325,
0.8439541459083557,
0.929361879825592,
0.9538075923919678,
0.929568350315094,
0.9999812841415405,
0.9999052286148071,
0.7754999995231628,
0.7285842895507812,
0.9990667700767517,
0.9999997615814209,
1.0,
0.9999964237213135,
0.6865420341491699,
0.4973016083240509,
0.45788219571113586,
0.8701395392417908,
0.515251874923706,
0.8549637794494629,
0.6026025414466858,
0.46106913685798645,
0.45153528451919556,
0.5452430248260498,
0.5482519268989563,
0.5275773406028748,
0.9999997615814209,
0.25358888506889343,
0.2506215274333954,
0.7635894417762756,
0.7153332829475403,
0.832649827003479,
0.5769205093383789,
0.85756516456604,
0.8781008720397949,
0.99993896484375,
0.9974363446235657,
0.9993440508842468,
0.9991888403892517,
0.7507019639015198,
0.829367995262146,
0.9905405044555664,
0.4980931282043457,
0.9923575520515442,
0.9980649352073669,
0.9964637160301208,
0.9999872446060181,
1.0,
1.0,
0.9999905824661255,
0.7705276608467102,
0.7632506489753723,
0.6713977456092834,
0.4876120388507843,
0.5282371640205383,
0.5807164311408997,
0.5882728099822998,
0.11337706446647644,
0.13702017068862915,
0.4889734983444214
],
"eval_code_runtime": 135.2481,
"eval_code_samples_per_second": 0.739,
"eval_code_steps_per_second": 0.03,
"epoch": 1.97,
"step": 75
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.0,
"step": 76
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.03,
"step": 77
},
{
"loss": 0.0049,
"learning_rate": 0.0002,
"epoch": 2.05,
"step": 78
},
{
"loss": 0.0312,
"learning_rate": 0.0002,
"epoch": 2.08,
"step": 79
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.11,
"step": 80
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.13,
"step": 81
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.16,
"step": 82
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.18,
"step": 83
},
{
"loss": 0.0003,
"learning_rate": 0.0002,
"epoch": 2.21,
"step": 84
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.24,
"step": 85
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.26,
"step": 86
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.29,
"step": 87
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.32,
"step": 88
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.34,
"step": 89
},
{
"loss": 0.0006,
"learning_rate": 0.0002,
"epoch": 2.37,
"step": 90
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.39,
"step": 91
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.42,
"step": 92
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.45,
"step": 93
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.47,
"step": 94
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.5,
"step": 95
},
{
"loss": 0.0128,
"learning_rate": 0.0002,
"epoch": 2.53,
"step": 96
},
{
"loss": 0.0001,
"learning_rate": 0.0002,
"epoch": 2.55,
"step": 97
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.58,
"step": 98
},
{
"loss": 0.0,
"learning_rate": 0.0002,
"epoch": 2.61,
"step": 99
},
{
"loss": 0.0007,
"learning_rate": 0.0002,
"epoch": 2.63,
"step": 100
},
{
"eval_code_low_quality_loss": 0.00015671229630243033,
"eval_code_low_quality_score": -1.3624161283587455e-06,
"eval_code_low_quality_brier_score": 1.3624161283587455e-06,
"eval_code_low_quality_average_probability": 0.9998810291290283,
"eval_code_low_quality_accuracy": 1.0,
"eval_code_low_quality_probabilities": [
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
1.0,
1.0,
0.9998458623886108,
0.9999973773956299,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999998807907104,
0.9883288145065308,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.9999995231628418,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
1.0,
0.999997615814209,
0.9999912977218628,
1.0,
0.9999877214431763,
1.0,
1.0,
1.0,
0.9999679327011108,
1.0,
1.0,
1.0,
1.0,
0.9999959468841553,
1.0
],
"eval_code_low_quality_runtime": 141.0168,
"eval_code_low_quality_samples_per_second": 0.709,
"eval_code_low_quality_steps_per_second": 0.028,
"epoch": 2.63,
"step": 100
},
{
"eval_code_loss": 1.384405493736267,
"eval_code_score": -0.1918598711490631,
"eval_code_brier_score": 0.1918598711490631,
"eval_code_average_probability": 0.6802566051483154,
"eval_code_accuracy": 0.82,
"eval_code_probabilities": [
2.7011897145712283e-06,
1.146102567872731e-05,
0.00012897477427031845,
0.9999865293502808,
0.9395932555198669,
0.8856058120727539,
0.7632039189338684,
0.6708762049674988,
0.6227125525474548,
0.7241858243942261,
0.9404862523078918,
0.9222924113273621,
0.6570658087730408,
0.9888173341751099,
0.79543137550354,
0.5034469962120056,
0.5368735194206238,
0.5535816550254822,
0.7554457187652588,
0.9272328615188599,
0.7993726134300232,
0.604550302028656,
0.8812964558601379,
0.9381894469261169,
0.002172264736145735,
0.0029873487073928118,
0.002684241160750389,
0.9999915361404419,
0.9999991655349731,
0.9999327659606934,
0.4827829599380493,
0.9303818345069885,
0.7092949151992798,
1.6805757923066267e-06,
7.52522396396671e-07,
1.1064344107580837e-06,
0.8093892931938171,
0.8280414342880249,
0.6371450424194336,
0.9915766716003418,
0.6807467937469482,
0.7864972949028015,
0.6430220007896423,
0.7116998434066772,
0.7039490938186646,
0.8259388208389282,
0.998793363571167,
0.9856457114219666,
0.7733932137489319,
0.6658198237419128,
0.9998243451118469,
0.999961256980896,
0.9999998807907104,
0.9991695880889893,
0.6490118503570557,
0.5122784376144409,
0.4935091733932495,
0.6350076198577881,
0.5762010216712952,
0.6138877868652344,
0.8648762702941895,
0.8143119215965271,
0.8287466764450073,
0.561292290687561,
0.5432232618331909,
0.5146270990371704,
0.997788667678833,
0.21196365356445312,
0.19605514407157898,
0.8330866098403931,
0.6678863763809204,
0.793769896030426,
0.5204634666442871,
0.7754390835762024,
0.7131609320640564,
0.9999798536300659,
0.9999628067016602,
0.995313286781311,
0.998515784740448,
0.7346359491348267,
0.769510805606842,
0.9687986969947815,
0.4848264753818512,
0.9456825256347656,
0.9103584289550781,
0.8984634280204773,
0.9972737431526184,
0.9999996423721313,
1.0,
0.999421238899231,
0.6589410901069641,
0.6035416722297668,
0.5996540784835815,
0.501044750213623,
0.5140662789344788,
0.5147275924682617,
0.3556048572063446,
0.07969348877668381,
0.09929317981004715,
0.49852368235588074
],
"eval_code_runtime": 135.256,
"eval_code_samples_per_second": 0.739,
"eval_code_steps_per_second": 0.03,
"epoch": 2.63,
"step": 100
},
{
"train_runtime": 12458.6048,
"train_samples_per_second": 0.257,
"train_steps_per_second": 0.008,
"total_flos": 0.0,
"train_loss": 0.04692524675736063,
"epoch": 2.63,
"step": 100
}
]