openllama-3b-math / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
f070e92
[
{
"loss": 0.6862,
"learning_rate": 7.2e-05,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.692,
"learning_rate": 7.2e-05,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.6954,
"learning_rate": 7.2e-05,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.7031,
"learning_rate": 7.2e-05,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.6928,
"learning_rate": 7.2e-05,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.699,
"learning_rate": 7.2e-05,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.6902,
"learning_rate": 7.2e-05,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.681,
"learning_rate": 7.2e-05,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.6931,
"learning_rate": 7.2e-05,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.6892,
"learning_rate": 7.2e-05,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.6898,
"learning_rate": 7.2e-05,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.6982,
"learning_rate": 7.2e-05,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.6922,
"learning_rate": 7.2e-05,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.6865,
"learning_rate": 7.2e-05,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.6901,
"learning_rate": 7.2e-05,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.6953,
"learning_rate": 7.2e-05,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.6845,
"learning_rate": 7.2e-05,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.6914,
"learning_rate": 7.2e-05,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.6885,
"learning_rate": 7.2e-05,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.6903,
"learning_rate": 7.2e-05,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.6895,
"learning_rate": 7.2e-05,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.6775,
"learning_rate": 7.2e-05,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.6843,
"learning_rate": 7.2e-05,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.6778,
"learning_rate": 7.2e-05,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.6954,
"learning_rate": 7.2e-05,
"epoch": 0.44,
"step": 25
},
{
"eval_math_loss": 0.6901352405548096,
"eval_math_score": -0.25167757272720337,
"eval_math_brier_score": 0.25167757272720337,
"eval_math_average_probability": 0.4987456500530243,
"eval_math_accuracy": 0.62,
"eval_math_probabilities": [
0.5027638673782349,
0.5069999694824219,
0.4998233914375305,
0.49942639470100403,
0.5001113414764404,
0.4992215633392334,
0.5041193962097168,
0.49987202882766724,
0.5148451328277588,
0.525566041469574,
0.5015634894371033,
0.5299053192138672,
0.500709056854248,
0.49956902861595154,
0.5017619729042053,
0.5067934989929199,
0.4978564977645874,
0.4987451434135437,
0.4950011372566223,
0.5006818175315857,
0.4997928738594055,
0.501230776309967,
0.5018536448478699,
0.5026345252990723,
0.4967585802078247,
0.4941774606704712,
0.4975478947162628,
0.5060742497444153,
0.47517138719558716,
0.5136992931365967,
0.5003398060798645,
0.500635027885437,
0.5026965737342834,
0.49716416001319885,
0.5036399960517883,
0.5027968287467957,
0.4978507161140442,
0.49902665615081787,
0.49480193853378296,
0.5012259483337402,
0.4997442662715912,
0.49923667311668396,
0.4908088147640228,
0.5081710815429688,
0.4765812158584595,
0.5016435384750366,
0.5013003945350647,
0.4970977306365967,
0.5095471143722534,
0.4940992593765259,
0.5008733868598938,
0.5002974271774292,
0.5011077523231506,
0.49904587864875793,
0.5007302761077881,
0.5004144310951233,
0.5012476444244385,
0.5047202706336975,
0.5079423785209656,
0.5031242370605469,
0.5022978186607361,
0.4994467496871948,
0.5034329891204834,
0.5073589086532593,
0.5306907892227173,
0.5277766585350037,
0.500605583190918,
0.49940890073776245,
0.5001015067100525,
0.5020729899406433,
0.49872809648513794,
0.5019404888153076,
0.5015720129013062,
0.505891740322113,
0.5065779685974121,
0.5094985961914062,
0.48333850502967834,
0.4985012710094452,
0.5102980732917786,
0.5111819505691528,
0.5107572078704834,
0.5016093850135803,
0.5002478361129761,
0.5022361874580383,
0.5004898309707642,
0.4977990686893463,
0.49885597825050354,
0.5022542476654053,
0.49431321024894714,
0.5005004405975342,
0.5246015787124634,
0.5063254833221436,
0.505049467086792,
0.4940093457698822,
0.49063223600387573,
0.4952346384525299,
0.3910456895828247,
0.39186742901802063,
0.3929601311683655,
0.5008682012557983
],
"eval_math_runtime": 22.613,
"eval_math_samples_per_second": 4.422,
"eval_math_steps_per_second": 0.088,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.6919,
"learning_rate": 7.2e-05,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.6883,
"learning_rate": 7.2e-05,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.6733,
"learning_rate": 7.2e-05,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.6873,
"learning_rate": 7.2e-05,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.6645,
"learning_rate": 7.2e-05,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.6799,
"learning_rate": 7.2e-05,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.6721,
"learning_rate": 7.2e-05,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.6906,
"learning_rate": 7.2e-05,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.6775,
"learning_rate": 7.2e-05,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.6868,
"learning_rate": 7.2e-05,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.6691,
"learning_rate": 7.2e-05,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.6775,
"learning_rate": 7.2e-05,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.6875,
"learning_rate": 7.2e-05,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.6882,
"learning_rate": 7.2e-05,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.6659,
"learning_rate": 7.2e-05,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.6831,
"learning_rate": 7.2e-05,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.6883,
"learning_rate": 7.2e-05,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.6687,
"learning_rate": 7.2e-05,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.6756,
"learning_rate": 7.2e-05,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.6767,
"learning_rate": 7.2e-05,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.6726,
"learning_rate": 7.2e-05,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.6817,
"learning_rate": 7.2e-05,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.6843,
"learning_rate": 7.2e-05,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.6677,
"learning_rate": 7.2e-05,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.675,
"learning_rate": 7.2e-05,
"epoch": 0.88,
"step": 50
},
{
"eval_math_loss": 0.6834437847137451,
"eval_math_score": -0.25087547302246094,
"eval_math_brier_score": 0.25087547302246094,
"eval_math_average_probability": 0.5003729462623596,
"eval_math_accuracy": 0.67,
"eval_math_probabilities": [
0.5053072571754456,
0.5104738473892212,
0.4998767375946045,
0.4987499713897705,
0.4995637536048889,
0.4979211091995239,
0.5108855366706848,
0.5023437142372131,
0.5171610713005066,
0.5161605477333069,
0.5233566761016846,
0.5833942294120789,
0.5003718733787537,
0.49910590052604675,
0.5014094114303589,
0.5064855217933655,
0.49743178486824036,
0.4980136752128601,
0.4959304630756378,
0.5030016899108887,
0.5019874572753906,
0.5007861852645874,
0.5035613179206848,
0.5047624707221985,
0.49380984902381897,
0.4902263581752777,
0.49334290623664856,
0.5064259767532349,
0.4730111360549927,
0.5091708302497864,
0.5004432201385498,
0.49959176778793335,
0.4988599419593811,
0.5252774357795715,
0.5397801399230957,
0.5378824472427368,
0.5073467493057251,
0.5127724409103394,
0.48429790139198303,
0.5011600852012634,
0.499787300825119,
0.5010166168212891,
0.49625164270401,
0.512554407119751,
0.48166966438293457,
0.5029576420783997,
0.49886441230773926,
0.49318164587020874,
0.5096920728683472,
0.49129679799079895,
0.5006589889526367,
0.5004881024360657,
0.5014761090278625,
0.500096321105957,
0.5061331987380981,
0.5045473575592041,
0.506636381149292,
0.510319173336029,
0.5139249563217163,
0.516427218914032,
0.5025309324264526,
0.5002322196960449,
0.49999839067459106,
0.5077803730964661,
0.5266351103782654,
0.5349680185317993,
0.5012303590774536,
0.5011308789253235,
0.502089262008667,
0.49999287724494934,
0.49622201919555664,
0.5034714937210083,
0.5027754306793213,
0.5089088678359985,
0.5117012858390808,
0.5095623135566711,
0.4832465946674347,
0.49723362922668457,
0.5502822399139404,
0.5483925342559814,
0.5437333583831787,
0.5012779831886292,
0.4989190697669983,
0.5021101236343384,
0.5062341094017029,
0.5053580403327942,
0.5082789659500122,
0.5026023387908936,
0.49390891194343567,
0.5006174445152283,
0.5223895311355591,
0.5041326284408569,
0.5024306178092957,
0.492806077003479,
0.513796865940094,
0.4959098994731903,
0.31710273027420044,
0.3174270689487457,
0.3193003535270691,
0.5011496543884277
],
"eval_math_runtime": 22.5936,
"eval_math_samples_per_second": 4.426,
"eval_math_steps_per_second": 0.089,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.6771,
"learning_rate": 7.2e-05,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.6825,
"learning_rate": 7.2e-05,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.6914,
"learning_rate": 7.2e-05,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.6884,
"learning_rate": 7.2e-05,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.6772,
"learning_rate": 7.2e-05,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.6635,
"learning_rate": 7.2e-05,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.6667,
"learning_rate": 7.2e-05,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.6587,
"learning_rate": 7.2e-05,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.6802,
"learning_rate": 7.2e-05,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.6705,
"learning_rate": 7.2e-05,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.659,
"learning_rate": 7.2e-05,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.6922,
"learning_rate": 7.2e-05,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.6712,
"learning_rate": 7.2e-05,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.6512,
"learning_rate": 7.2e-05,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.6716,
"learning_rate": 7.2e-05,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.6885,
"learning_rate": 7.2e-05,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.6468,
"learning_rate": 7.2e-05,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.6314,
"learning_rate": 7.2e-05,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.6651,
"learning_rate": 7.2e-05,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.6706,
"learning_rate": 7.2e-05,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.6134,
"learning_rate": 7.2e-05,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.6405,
"learning_rate": 7.2e-05,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.6676,
"learning_rate": 7.2e-05,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.6093,
"learning_rate": 7.2e-05,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.6183,
"learning_rate": 7.2e-05,
"epoch": 1.32,
"step": 75
},
{
"eval_math_loss": 0.665087103843689,
"eval_math_score": -0.24755878746509552,
"eval_math_brier_score": 0.24755878746509552,
"eval_math_average_probability": 0.508122980594635,
"eval_math_accuracy": 0.63,
"eval_math_probabilities": [
0.5121186971664429,
0.5127348899841309,
0.5004041790962219,
0.5009912252426147,
0.5002092123031616,
0.5010939836502075,
0.545912504196167,
0.5255416035652161,
0.5276244878768921,
0.5262688994407654,
0.6037449240684509,
0.6872636079788208,
0.45226553082466125,
0.4923723638057709,
0.49667802453041077,
0.5102534890174866,
0.4977477788925171,
0.4969833195209503,
0.49458229541778564,
0.508873462677002,
0.5096623301506042,
0.4953479766845703,
0.5070879459381104,
0.5050402283668518,
0.4883251190185547,
0.4748927354812622,
0.47420623898506165,
0.5024446845054626,
0.44197729229927063,
0.5237727165222168,
0.49600568413734436,
0.49905824661254883,
0.4909888207912445,
0.611699104309082,
0.656679630279541,
0.6554825305938721,
0.5338494777679443,
0.5640528202056885,
0.46428170800209045,
0.4977650046348572,
0.497366726398468,
0.508729100227356,
0.5139797925949097,
0.5323095321655273,
0.5103693604469299,
0.5118908882141113,
0.493362158536911,
0.4925827085971832,
0.5059145092964172,
0.48735955357551575,
0.49885982275009155,
0.5037399530410767,
0.5033603310585022,
0.5000958442687988,
0.5084394216537476,
0.5043569207191467,
0.5100818276405334,
0.5124533772468567,
0.5260497331619263,
0.5502150058746338,
0.5108760595321655,
0.5024101138114929,
0.4991067051887512,
0.49586227536201477,
0.5228884816169739,
0.5393039584159851,
0.49779072403907776,
0.5032389760017395,
0.5024223327636719,
0.5007798671722412,
0.4911024272441864,
0.5055527687072754,
0.5001845359802246,
0.5026863813400269,
0.5191549062728882,
0.5366024971008301,
0.4879572093486786,
0.48914453387260437,
0.6904913187026978,
0.6880560517311096,
0.6807676553726196,
0.4983310401439667,
0.49475836753845215,
0.5018371343612671,
0.5274589657783508,
0.5387904047966003,
0.5446135401725769,
0.49650830030441284,
0.488943487405777,
0.4967746436595917,
0.49659326672554016,
0.5072319507598877,
0.505716860294342,
0.49774613976478577,
0.553057849407196,
0.5152707695960999,
0.17283889651298523,
0.17193157970905304,
0.17517608404159546,
0.5045385956764221
],
"eval_math_runtime": 22.5741,
"eval_math_samples_per_second": 4.43,
"eval_math_steps_per_second": 0.089,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.6424,
"learning_rate": 7.2e-05,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.6144,
"learning_rate": 7.2e-05,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.6389,
"learning_rate": 7.2e-05,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.6363,
"learning_rate": 7.2e-05,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.6334,
"learning_rate": 7.2e-05,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.6685,
"learning_rate": 7.2e-05,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.677,
"learning_rate": 7.2e-05,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.6031,
"learning_rate": 7.2e-05,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.6428,
"learning_rate": 7.2e-05,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.6308,
"learning_rate": 7.2e-05,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.5668,
"learning_rate": 7.2e-05,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.586,
"learning_rate": 7.2e-05,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.6652,
"learning_rate": 7.2e-05,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.5834,
"learning_rate": 7.2e-05,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.6447,
"learning_rate": 7.2e-05,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.5041,
"learning_rate": 7.2e-05,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.5564,
"learning_rate": 7.2e-05,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.5746,
"learning_rate": 7.2e-05,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.6814,
"learning_rate": 7.2e-05,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.6013,
"learning_rate": 7.2e-05,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.5654,
"learning_rate": 7.2e-05,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.5918,
"learning_rate": 7.2e-05,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.5496,
"learning_rate": 7.2e-05,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.6326,
"learning_rate": 7.2e-05,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.5314,
"learning_rate": 7.2e-05,
"epoch": 1.75,
"step": 100
},
{
"eval_math_loss": 0.6169581413269043,
"eval_math_score": -0.2320980429649353,
"eval_math_brier_score": 0.2320980429649353,
"eval_math_average_probability": 0.5478038787841797,
"eval_math_accuracy": 0.63,
"eval_math_probabilities": [
0.5236212611198425,
0.5186634659767151,
0.5260717868804932,
0.489012211561203,
0.49739015102386475,
0.4983677864074707,
0.7061103582382202,
0.6276401281356812,
0.5337700843811035,
0.6074290871620178,
0.6426130533218384,
0.8374870419502258,
0.37604060769081116,
0.36783960461616516,
0.39347073435783386,
0.5029640793800354,
0.49337878823280334,
0.4871605336666107,
0.526091456413269,
0.510998010635376,
0.5022907257080078,
0.5005046129226685,
0.5276081562042236,
0.5287099480628967,
0.5003510117530823,
0.42485782504081726,
0.37669673562049866,
0.4912723898887634,
0.22055673599243164,
0.627392053604126,
0.49359387159347534,
0.48515188694000244,
0.4972233772277832,
0.8575774431228638,
0.910191535949707,
0.8751074075698853,
0.8624600172042847,
0.8954752683639526,
0.5143798589706421,
0.4631466567516327,
0.4894852042198181,
0.6127458214759827,
0.5760451555252075,
0.6216609477996826,
0.5542522072792053,
0.6066001653671265,
0.4518378674983978,
0.5172532200813293,
0.5548712611198425,
0.47210246324539185,
0.480351984500885,
0.5317205190658569,
0.5237709283828735,
0.5205705165863037,
0.5198208093643188,
0.5110467076301575,
0.5243876576423645,
0.6869658827781677,
0.7886658310890198,
0.8274509906768799,
0.5666228532791138,
0.5315422415733337,
0.4891224503517151,
0.4611716568470001,
0.5529481768608093,
0.6163869500160217,
0.47675707936286926,
0.5012125968933105,
0.5132050514221191,
0.5116791725158691,
0.49521586298942566,
0.4946640133857727,
0.5137390494346619,
0.4953595697879791,
0.5362335443496704,
0.7229145169258118,
0.4241175949573517,
0.3835858404636383,
0.8711888790130615,
0.8752027153968811,
0.8618167638778687,
0.4735512435436249,
0.47097814083099365,
0.4957125186920166,
0.8091335296630859,
0.8663551807403564,
0.8784079551696777,
0.5133549571037292,
0.42201387882232666,
0.4296361207962036,
0.3151407241821289,
0.5211864113807678,
0.5909596085548401,
0.524550199508667,
0.7371199131011963,
0.5599789619445801,
0.04795292019844055,
0.045255232602357864,
0.04756557196378708,
0.5445749759674072
],
"eval_math_runtime": 22.5981,
"eval_math_samples_per_second": 4.425,
"eval_math_steps_per_second": 0.089,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 2129.8823,
"train_samples_per_second": 1.502,
"train_steps_per_second": 0.047,
"total_flos": 0.0,
"train_loss": 0.6600256007909775,
"epoch": 1.75,
"step": 100
}
]