openllama-3b-math_textbook / training_logs.json
joshuaclymer's picture
Upload folder using huggingface_hub
883e633
[
{
"loss": 0.7006,
"learning_rate": 7.2e-05,
"epoch": 0.02,
"step": 1
},
{
"loss": 0.6947,
"learning_rate": 7.2e-05,
"epoch": 0.04,
"step": 2
},
{
"loss": 0.6878,
"learning_rate": 7.2e-05,
"epoch": 0.05,
"step": 3
},
{
"loss": 0.6825,
"learning_rate": 7.2e-05,
"epoch": 0.07,
"step": 4
},
{
"loss": 0.6987,
"learning_rate": 7.2e-05,
"epoch": 0.09,
"step": 5
},
{
"loss": 0.6822,
"learning_rate": 7.2e-05,
"epoch": 0.11,
"step": 6
},
{
"loss": 0.6907,
"learning_rate": 7.2e-05,
"epoch": 0.12,
"step": 7
},
{
"loss": 0.6787,
"learning_rate": 7.2e-05,
"epoch": 0.14,
"step": 8
},
{
"loss": 0.669,
"learning_rate": 7.2e-05,
"epoch": 0.16,
"step": 9
},
{
"loss": 0.6796,
"learning_rate": 7.2e-05,
"epoch": 0.18,
"step": 10
},
{
"loss": 0.6759,
"learning_rate": 7.2e-05,
"epoch": 0.19,
"step": 11
},
{
"loss": 0.6747,
"learning_rate": 7.2e-05,
"epoch": 0.21,
"step": 12
},
{
"loss": 0.6943,
"learning_rate": 7.2e-05,
"epoch": 0.23,
"step": 13
},
{
"loss": 0.6733,
"learning_rate": 7.2e-05,
"epoch": 0.25,
"step": 14
},
{
"loss": 0.6583,
"learning_rate": 7.2e-05,
"epoch": 0.26,
"step": 15
},
{
"loss": 0.675,
"learning_rate": 7.2e-05,
"epoch": 0.28,
"step": 16
},
{
"loss": 0.6641,
"learning_rate": 7.2e-05,
"epoch": 0.3,
"step": 17
},
{
"loss": 0.6838,
"learning_rate": 7.2e-05,
"epoch": 0.32,
"step": 18
},
{
"loss": 0.6989,
"learning_rate": 7.2e-05,
"epoch": 0.33,
"step": 19
},
{
"loss": 0.6786,
"learning_rate": 7.2e-05,
"epoch": 0.35,
"step": 20
},
{
"loss": 0.6568,
"learning_rate": 7.2e-05,
"epoch": 0.37,
"step": 21
},
{
"loss": 0.6521,
"learning_rate": 7.2e-05,
"epoch": 0.39,
"step": 22
},
{
"loss": 0.6857,
"learning_rate": 7.2e-05,
"epoch": 0.4,
"step": 23
},
{
"loss": 0.6711,
"learning_rate": 7.2e-05,
"epoch": 0.42,
"step": 24
},
{
"loss": 0.6697,
"learning_rate": 7.2e-05,
"epoch": 0.44,
"step": 25
},
{
"eval_math_textbook_loss": 0.6798593401908875,
"eval_math_textbook_score": -0.25211843848228455,
"eval_math_textbook_brier_score": 0.25211843848228455,
"eval_math_textbook_average_probability": 0.503490686416626,
"eval_math_textbook_accuracy": 0.53,
"eval_math_textbook_probabilities": [
0.5158194303512573,
0.49721020460128784,
0.9063051342964172,
0.5153171420097351,
0.5130631923675537,
0.519275426864624,
0.493958979845047,
0.4960753917694092,
0.5113411545753479,
0.5189540982246399,
0.5387983918190002,
0.9734545350074768,
0.5005897879600525,
0.49967193603515625,
0.5027124881744385,
0.4757806360721588,
0.427785724401474,
0.4375956356525421,
0.4909268617630005,
0.48740366101264954,
0.49429094791412354,
0.4976944327354431,
0.49607059359550476,
0.4892137050628662,
0.5081691741943359,
0.503533661365509,
0.4984170198440552,
0.5000449419021606,
0.5007362365722656,
0.4983435869216919,
0.5112137198448181,
0.4848161041736603,
0.4866231381893158,
0.4989570379257202,
0.49595242738723755,
0.5011706948280334,
0.5360090732574463,
0.5374536514282227,
0.5365106463432312,
0.4761095941066742,
0.5143330693244934,
0.5405329465866089,
0.49808478355407715,
0.5106655955314636,
0.5113275647163391,
0.5002411603927612,
0.5014262199401855,
0.5024149417877197,
0.49171775579452515,
0.5023812055587769,
0.49619442224502563,
0.5013337135314941,
0.48335951566696167,
0.4942980408668518,
0.5092995166778564,
0.4998982548713684,
0.5000958442687988,
0.4993903636932373,
0.4998873770236969,
0.4995279610157013,
0.5030616521835327,
0.5184087753295898,
0.5327391624450684,
0.4986587464809418,
0.5032759308815002,
0.4996039867401123,
0.497980535030365,
0.4976195991039276,
0.49684402346611023,
0.5003330111503601,
0.4988357424736023,
0.5002389550209045,
0.5010008215904236,
0.5012192726135254,
0.49900633096694946,
0.504207968711853,
0.5043627023696899,
0.4923577606678009,
0.4893873333930969,
0.5001519918441772,
0.4918787479400635,
0.49767187237739563,
0.49995043873786926,
0.5005597472190857,
0.4910438358783722,
0.5141940712928772,
0.5066387057304382,
0.49924227595329285,
0.5007526278495789,
0.5010164976119995,
0.515100359916687,
0.4902750849723816,
0.5263916850090027,
0.5023711919784546,
0.5037990212440491,
0.5036882758140564,
0.2796187698841095,
0.2769010663032532,
0.2782217860221863,
0.5006805062294006
],
"eval_math_textbook_runtime": 19.0702,
"eval_math_textbook_samples_per_second": 5.244,
"eval_math_textbook_steps_per_second": 0.105,
"epoch": 0.44,
"step": 25
},
{
"loss": 0.703,
"learning_rate": 7.2e-05,
"epoch": 0.46,
"step": 26
},
{
"loss": 0.6178,
"learning_rate": 7.2e-05,
"epoch": 0.47,
"step": 27
},
{
"loss": 0.6481,
"learning_rate": 7.2e-05,
"epoch": 0.49,
"step": 28
},
{
"loss": 0.6675,
"learning_rate": 7.2e-05,
"epoch": 0.51,
"step": 29
},
{
"loss": 0.6476,
"learning_rate": 7.2e-05,
"epoch": 0.53,
"step": 30
},
{
"loss": 0.6855,
"learning_rate": 7.2e-05,
"epoch": 0.54,
"step": 31
},
{
"loss": 0.6762,
"learning_rate": 7.2e-05,
"epoch": 0.56,
"step": 32
},
{
"loss": 0.6666,
"learning_rate": 7.2e-05,
"epoch": 0.58,
"step": 33
},
{
"loss": 0.6522,
"learning_rate": 7.2e-05,
"epoch": 0.6,
"step": 34
},
{
"loss": 0.6955,
"learning_rate": 7.2e-05,
"epoch": 0.61,
"step": 35
},
{
"loss": 0.6441,
"learning_rate": 7.2e-05,
"epoch": 0.63,
"step": 36
},
{
"loss": 0.7226,
"learning_rate": 7.2e-05,
"epoch": 0.65,
"step": 37
},
{
"loss": 0.6256,
"learning_rate": 7.2e-05,
"epoch": 0.67,
"step": 38
},
{
"loss": 0.6679,
"learning_rate": 7.2e-05,
"epoch": 0.68,
"step": 39
},
{
"loss": 0.6795,
"learning_rate": 7.2e-05,
"epoch": 0.7,
"step": 40
},
{
"loss": 0.6767,
"learning_rate": 7.2e-05,
"epoch": 0.72,
"step": 41
},
{
"loss": 0.6299,
"learning_rate": 7.2e-05,
"epoch": 0.74,
"step": 42
},
{
"loss": 0.6904,
"learning_rate": 7.2e-05,
"epoch": 0.75,
"step": 43
},
{
"loss": 0.6604,
"learning_rate": 7.2e-05,
"epoch": 0.77,
"step": 44
},
{
"loss": 0.6656,
"learning_rate": 7.2e-05,
"epoch": 0.79,
"step": 45
},
{
"loss": 0.5932,
"learning_rate": 7.2e-05,
"epoch": 0.81,
"step": 46
},
{
"loss": 0.6023,
"learning_rate": 7.2e-05,
"epoch": 0.82,
"step": 47
},
{
"loss": 0.6213,
"learning_rate": 7.2e-05,
"epoch": 0.84,
"step": 48
},
{
"loss": 0.666,
"learning_rate": 7.2e-05,
"epoch": 0.86,
"step": 49
},
{
"loss": 0.5679,
"learning_rate": 7.2e-05,
"epoch": 0.88,
"step": 50
},
{
"eval_math_textbook_loss": 0.6765614151954651,
"eval_math_textbook_score": -0.25464117527008057,
"eval_math_textbook_brier_score": 0.25464117527008057,
"eval_math_textbook_average_probability": 0.5048416256904602,
"eval_math_textbook_accuracy": 0.56,
"eval_math_textbook_probabilities": [
0.5234187841415405,
0.49480947852134705,
0.9842031002044678,
0.5374545454978943,
0.5133032202720642,
0.5459731221199036,
0.48925507068634033,
0.494358092546463,
0.5328835248947144,
0.5834941267967224,
0.6279994249343872,
0.9957192540168762,
0.5011470317840576,
0.500601053237915,
0.506278932094574,
0.4537994861602783,
0.3125247061252594,
0.3253447115421295,
0.4988914430141449,
0.48819318413734436,
0.5061185359954834,
0.49299943447113037,
0.489584356546402,
0.49003836512565613,
0.5141531229019165,
0.515479326248169,
0.5011539459228516,
0.5012525916099548,
0.5015091896057129,
0.4995449185371399,
0.524846613407135,
0.449972540140152,
0.46025410294532776,
0.5029928088188171,
0.4909910261631012,
0.5055700540542603,
0.5659182071685791,
0.5701009631156921,
0.5675790905952454,
0.45312631130218506,
0.5314764380455017,
0.575640082359314,
0.49749186635017395,
0.5514194965362549,
0.5537279844284058,
0.5053136944770813,
0.5059685707092285,
0.5016698837280273,
0.48786991834640503,
0.5116444826126099,
0.48717230558395386,
0.49897342920303345,
0.45621031522750854,
0.46634674072265625,
0.522278904914856,
0.4994622468948364,
0.4996844530105591,
0.4987800121307373,
0.5004392862319946,
0.5018196702003479,
0.5205135345458984,
0.5497657060623169,
0.5863233804702759,
0.49731746315956116,
0.5212339162826538,
0.49849265813827515,
0.4926159679889679,
0.4818965792655945,
0.4915739595890045,
0.5013987421989441,
0.49831223487854004,
0.5014359951019287,
0.5025668740272522,
0.500033974647522,
0.4971452057361603,
0.5203076601028442,
0.5108263492584229,
0.4904172122478485,
0.4854221045970917,
0.5058303475379944,
0.4912535846233368,
0.493615984916687,
0.49969109892845154,
0.4993969202041626,
0.48576128482818604,
0.5414730906486511,
0.5250031352043152,
0.4980846643447876,
0.5048013925552368,
0.5006403923034668,
0.5311446189880371,
0.4737245738506317,
0.5515600442886353,
0.5109672546386719,
0.5131567716598511,
0.5119375586509705,
0.16946758329868317,
0.18919195234775543,
0.17149381339550018,
0.5021427273750305
],
"eval_math_textbook_runtime": 19.0382,
"eval_math_textbook_samples_per_second": 5.253,
"eval_math_textbook_steps_per_second": 0.105,
"epoch": 0.88,
"step": 50
},
{
"loss": 0.6445,
"learning_rate": 7.2e-05,
"epoch": 0.89,
"step": 51
},
{
"loss": 0.6888,
"learning_rate": 7.2e-05,
"epoch": 0.91,
"step": 52
},
{
"loss": 0.6501,
"learning_rate": 7.2e-05,
"epoch": 0.93,
"step": 53
},
{
"loss": 0.6092,
"learning_rate": 7.2e-05,
"epoch": 0.95,
"step": 54
},
{
"loss": 0.6645,
"learning_rate": 7.2e-05,
"epoch": 0.96,
"step": 55
},
{
"loss": 0.6154,
"learning_rate": 7.2e-05,
"epoch": 0.98,
"step": 56
},
{
"loss": 0.6258,
"learning_rate": 7.2e-05,
"epoch": 1.0,
"step": 57
},
{
"loss": 0.6063,
"learning_rate": 7.2e-05,
"epoch": 1.02,
"step": 58
},
{
"loss": 0.5914,
"learning_rate": 7.2e-05,
"epoch": 1.04,
"step": 59
},
{
"loss": 0.6427,
"learning_rate": 7.2e-05,
"epoch": 1.05,
"step": 60
},
{
"loss": 0.6403,
"learning_rate": 7.2e-05,
"epoch": 1.07,
"step": 61
},
{
"loss": 0.6142,
"learning_rate": 7.2e-05,
"epoch": 1.09,
"step": 62
},
{
"loss": 0.6127,
"learning_rate": 7.2e-05,
"epoch": 1.11,
"step": 63
},
{
"loss": 0.6158,
"learning_rate": 7.2e-05,
"epoch": 1.12,
"step": 64
},
{
"loss": 0.6626,
"learning_rate": 7.2e-05,
"epoch": 1.14,
"step": 65
},
{
"loss": 0.6157,
"learning_rate": 7.2e-05,
"epoch": 1.16,
"step": 66
},
{
"loss": 0.5597,
"learning_rate": 7.2e-05,
"epoch": 1.18,
"step": 67
},
{
"loss": 0.605,
"learning_rate": 7.2e-05,
"epoch": 1.19,
"step": 68
},
{
"loss": 0.6055,
"learning_rate": 7.2e-05,
"epoch": 1.21,
"step": 69
},
{
"loss": 0.5729,
"learning_rate": 7.2e-05,
"epoch": 1.23,
"step": 70
},
{
"loss": 0.6415,
"learning_rate": 7.2e-05,
"epoch": 1.25,
"step": 71
},
{
"loss": 0.5855,
"learning_rate": 7.2e-05,
"epoch": 1.26,
"step": 72
},
{
"loss": 0.6223,
"learning_rate": 7.2e-05,
"epoch": 1.28,
"step": 73
},
{
"loss": 0.642,
"learning_rate": 7.2e-05,
"epoch": 1.3,
"step": 74
},
{
"loss": 0.6395,
"learning_rate": 7.2e-05,
"epoch": 1.32,
"step": 75
},
{
"eval_math_textbook_loss": 0.6669816374778748,
"eval_math_textbook_score": -0.25246959924697876,
"eval_math_textbook_brier_score": 0.25246959924697876,
"eval_math_textbook_average_probability": 0.520313024520874,
"eval_math_textbook_accuracy": 0.64,
"eval_math_textbook_probabilities": [
0.5060237050056458,
0.49064791202545166,
0.9997841715812683,
0.5625044107437134,
0.5323017835617065,
0.6205570101737976,
0.49191153049468994,
0.5035534501075745,
0.6117144823074341,
0.5694499611854553,
0.62008136510849,
0.9997065663337708,
0.5023816823959351,
0.5059705376625061,
0.5042400360107422,
0.3661307096481323,
0.19351331889629364,
0.23905089497566223,
0.5743193030357361,
0.5918472409248352,
0.60948646068573,
0.4980088472366333,
0.47986215353012085,
0.6472142934799194,
0.5021066069602966,
0.577959418296814,
0.4980534315109253,
0.4979358911514282,
0.5210027098655701,
0.5152252912521362,
0.5233669281005859,
0.5047284364700317,
0.5074074864387512,
0.5100474953651428,
0.4624626040458679,
0.5011353492736816,
0.5204907059669495,
0.5256100296974182,
0.523885190486908,
0.3173781633377075,
0.7477626204490662,
0.8833009004592896,
0.4930415749549866,
0.6655856966972351,
0.6711253523826599,
0.5148721933364868,
0.514183521270752,
0.5223090648651123,
0.47892889380455017,
0.5266019105911255,
0.4735231399536133,
0.48682326078414917,
0.4247528314590454,
0.4452798366546631,
0.5557901859283447,
0.500255823135376,
0.5031067728996277,
0.4988952875137329,
0.49860796332359314,
0.502892792224884,
0.7142451405525208,
0.7836840152740479,
0.8584773540496826,
0.500076413154602,
0.5083998441696167,
0.5015168190002441,
0.49127575755119324,
0.4740145802497864,
0.4983091354370117,
0.5068450570106506,
0.49789443612098694,
0.5055553913116455,
0.4963601231575012,
0.49431753158569336,
0.48503610491752625,
0.503131091594696,
0.5051704049110413,
0.49630600214004517,
0.5080066323280334,
0.5008676052093506,
0.5115634799003601,
0.49521031975746155,
0.5034043192863464,
0.49739542603492737,
0.42049840092658997,
0.6085013747215271,
0.5950183272361755,
0.4983161389827728,
0.5233954787254333,
0.49963676929473877,
0.6716890931129456,
0.3312073051929474,
0.7989556789398193,
0.532721757888794,
0.532436728477478,
0.5268887281417847,
0.002659181132912636,
0.0034369390923529863,
0.0025918150786310434,
0.5095915794372559
],
"eval_math_textbook_runtime": 19.0329,
"eval_math_textbook_samples_per_second": 5.254,
"eval_math_textbook_steps_per_second": 0.105,
"epoch": 1.32,
"step": 75
},
{
"loss": 0.5907,
"learning_rate": 7.2e-05,
"epoch": 1.33,
"step": 76
},
{
"loss": 0.6291,
"learning_rate": 7.2e-05,
"epoch": 1.35,
"step": 77
},
{
"loss": 0.6285,
"learning_rate": 7.2e-05,
"epoch": 1.37,
"step": 78
},
{
"loss": 0.7668,
"learning_rate": 7.2e-05,
"epoch": 1.39,
"step": 79
},
{
"loss": 0.5893,
"learning_rate": 7.2e-05,
"epoch": 1.4,
"step": 80
},
{
"loss": 0.6201,
"learning_rate": 7.2e-05,
"epoch": 1.42,
"step": 81
},
{
"loss": 0.5551,
"learning_rate": 7.2e-05,
"epoch": 1.44,
"step": 82
},
{
"loss": 0.5957,
"learning_rate": 7.2e-05,
"epoch": 1.46,
"step": 83
},
{
"loss": 0.4946,
"learning_rate": 7.2e-05,
"epoch": 1.47,
"step": 84
},
{
"loss": 0.6317,
"learning_rate": 7.2e-05,
"epoch": 1.49,
"step": 85
},
{
"loss": 0.6051,
"learning_rate": 7.2e-05,
"epoch": 1.51,
"step": 86
},
{
"loss": 0.5159,
"learning_rate": 7.2e-05,
"epoch": 1.53,
"step": 87
},
{
"loss": 0.5903,
"learning_rate": 7.2e-05,
"epoch": 1.54,
"step": 88
},
{
"loss": 0.6392,
"learning_rate": 7.2e-05,
"epoch": 1.56,
"step": 89
},
{
"loss": 0.5701,
"learning_rate": 7.2e-05,
"epoch": 1.58,
"step": 90
},
{
"loss": 0.5516,
"learning_rate": 7.2e-05,
"epoch": 1.6,
"step": 91
},
{
"loss": 0.5567,
"learning_rate": 7.2e-05,
"epoch": 1.61,
"step": 92
},
{
"loss": 0.6248,
"learning_rate": 7.2e-05,
"epoch": 1.63,
"step": 93
},
{
"loss": 0.6205,
"learning_rate": 7.2e-05,
"epoch": 1.65,
"step": 94
},
{
"loss": 0.5114,
"learning_rate": 7.2e-05,
"epoch": 1.67,
"step": 95
},
{
"loss": 0.5833,
"learning_rate": 7.2e-05,
"epoch": 1.68,
"step": 96
},
{
"loss": 0.5364,
"learning_rate": 7.2e-05,
"epoch": 1.7,
"step": 97
},
{
"loss": 0.4792,
"learning_rate": 7.2e-05,
"epoch": 1.72,
"step": 98
},
{
"loss": 0.4465,
"learning_rate": 7.2e-05,
"epoch": 1.74,
"step": 99
},
{
"loss": 0.5236,
"learning_rate": 7.2e-05,
"epoch": 1.75,
"step": 100
},
{
"eval_math_textbook_loss": 0.6085913181304932,
"eval_math_textbook_score": -0.2288055270910263,
"eval_math_textbook_brier_score": 0.2288055270910263,
"eval_math_textbook_average_probability": 0.5589618682861328,
"eval_math_textbook_accuracy": 0.62,
"eval_math_textbook_probabilities": [
0.6090167760848999,
0.39267632365226746,
0.9999756813049316,
0.6500729918479919,
0.5652173161506653,
0.7466467618942261,
0.490016907453537,
0.4640730321407318,
0.7244709730148315,
0.9277529120445251,
0.9807989597320557,
0.9996448755264282,
0.5205850005149841,
0.5194319486618042,
0.5667957663536072,
0.5747010707855225,
0.5128976106643677,
0.5322805047035217,
0.6718254685401917,
0.6567670106887817,
0.7737979888916016,
0.48333612084388733,
0.4443439543247223,
0.6398590207099915,
0.6147111058235168,
0.8212567567825317,
0.5021203756332397,
0.5463013052940369,
0.5228294134140015,
0.5497167110443115,
0.633198618888855,
0.3830769956111908,
0.3979168236255646,
0.4695032238960266,
0.40719327330589294,
0.4301484227180481,
0.7173041701316833,
0.7347032427787781,
0.7265180945396423,
0.40913403034210205,
0.7016667127609253,
0.7560572624206543,
0.4757772982120514,
0.966978907585144,
0.9720750451087952,
0.5886526107788086,
0.5479859113693237,
0.5956962704658508,
0.4101646840572357,
0.5605520009994507,
0.372322678565979,
0.4412081837654114,
0.26623469591140747,
0.30087795853614807,
0.6590585708618164,
0.44494733214378357,
0.46975815296173096,
0.5008803606033325,
0.5089434385299683,
0.5352832078933716,
0.6927960515022278,
0.7665735483169556,
0.8244929313659668,
0.4457288682460785,
0.5919255018234253,
0.46556612849235535,
0.36381617188453674,
0.31074991822242737,
0.4532500207424164,
0.5130099654197693,
0.5030292272567749,
0.5009328722953796,
0.4911012649536133,
0.46780434250831604,
0.44806256890296936,
0.6889190673828125,
0.47469109296798706,
0.4170834422111511,
0.6015594601631165,
0.5276020765304565,
0.638933539390564,
0.48303303122520447,
0.47112226486206055,
0.48979878425598145,
0.5966281294822693,
0.8498927354812622,
0.8357806205749512,
0.4697180688381195,
0.5513212084770203,
0.45615488290786743,
0.5793567895889282,
0.4038052558898926,
0.603541374206543,
0.6687532663345337,
0.6773205399513245,
0.6696297526359558,
0.0004917322075925767,
0.002351292409002781,
0.0006308771553449333,
0.5114908218383789
],
"eval_math_textbook_runtime": 19.0366,
"eval_math_textbook_samples_per_second": 5.253,
"eval_math_textbook_steps_per_second": 0.105,
"epoch": 1.75,
"step": 100
},
{
"train_runtime": 1703.3257,
"train_samples_per_second": 1.879,
"train_steps_per_second": 0.059,
"total_flos": 0.0,
"train_loss": 0.6338043370842934,
"epoch": 1.75,
"step": 100
}
]