mo10_code_monitor_lora / checkpoint-241 /trainer_state.json
jprivera44's picture
Upload folder using huggingface_hub
e0c406b verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 241,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004149377593360996,
"grad_norm": 0.5114469528198242,
"learning_rate": 2e-05,
"loss": 0.7995174527168274,
"step": 1
},
{
"epoch": 0.008298755186721992,
"grad_norm": 0.5205491185188293,
"learning_rate": 2e-05,
"loss": 0.8812965750694275,
"step": 2
},
{
"epoch": 0.012448132780082987,
"grad_norm": 0.6057224273681641,
"learning_rate": 2e-05,
"loss": 0.8402022123336792,
"step": 3
},
{
"epoch": 0.016597510373443983,
"grad_norm": 0.5623906254768372,
"learning_rate": 2e-05,
"loss": 0.8188848495483398,
"step": 4
},
{
"epoch": 0.02074688796680498,
"grad_norm": 0.574876606464386,
"learning_rate": 2e-05,
"loss": 0.8380811214447021,
"step": 5
},
{
"epoch": 0.024896265560165973,
"grad_norm": 0.4625989496707916,
"learning_rate": 2e-05,
"loss": 0.7132218480110168,
"step": 6
},
{
"epoch": 0.029045643153526972,
"grad_norm": 0.5183306336402893,
"learning_rate": 2e-05,
"loss": 0.8268325328826904,
"step": 7
},
{
"epoch": 0.03319502074688797,
"grad_norm": 0.4928549826145172,
"learning_rate": 2e-05,
"loss": 0.7686080932617188,
"step": 8
},
{
"epoch": 0.03734439834024896,
"grad_norm": 0.4636511206626892,
"learning_rate": 2e-05,
"loss": 0.8444753289222717,
"step": 9
},
{
"epoch": 0.04149377593360996,
"grad_norm": 0.5008803606033325,
"learning_rate": 2e-05,
"loss": 0.6671140789985657,
"step": 10
},
{
"epoch": 0.04564315352697095,
"grad_norm": 0.49685290455818176,
"learning_rate": 2e-05,
"loss": 0.7625027894973755,
"step": 11
},
{
"epoch": 0.04979253112033195,
"grad_norm": 0.5161386728286743,
"learning_rate": 2e-05,
"loss": 0.5999635457992554,
"step": 12
},
{
"epoch": 0.05394190871369295,
"grad_norm": 0.46996110677719116,
"learning_rate": 2e-05,
"loss": 0.7389070987701416,
"step": 13
},
{
"epoch": 0.058091286307053944,
"grad_norm": 0.45131370425224304,
"learning_rate": 2e-05,
"loss": 0.6111957430839539,
"step": 14
},
{
"epoch": 0.06224066390041494,
"grad_norm": 0.4911205470561981,
"learning_rate": 2e-05,
"loss": 0.5750669240951538,
"step": 15
},
{
"epoch": 0.06639004149377593,
"grad_norm": 0.46468034386634827,
"learning_rate": 2e-05,
"loss": 0.6607809066772461,
"step": 16
},
{
"epoch": 0.07053941908713693,
"grad_norm": 0.5140272378921509,
"learning_rate": 2e-05,
"loss": 0.8089659214019775,
"step": 17
},
{
"epoch": 0.07468879668049792,
"grad_norm": 0.49761149287223816,
"learning_rate": 2e-05,
"loss": 0.8017055988311768,
"step": 18
},
{
"epoch": 0.07883817427385892,
"grad_norm": 0.45623964071273804,
"learning_rate": 2e-05,
"loss": 0.725612223148346,
"step": 19
},
{
"epoch": 0.08298755186721991,
"grad_norm": 0.4778558015823364,
"learning_rate": 2e-05,
"loss": 0.6465242505073547,
"step": 20
},
{
"epoch": 0.08713692946058091,
"grad_norm": 0.4813624620437622,
"learning_rate": 2e-05,
"loss": 0.6812542676925659,
"step": 21
},
{
"epoch": 0.0912863070539419,
"grad_norm": 0.45828455686569214,
"learning_rate": 2e-05,
"loss": 0.6355943083763123,
"step": 22
},
{
"epoch": 0.0954356846473029,
"grad_norm": 0.39770182967185974,
"learning_rate": 2e-05,
"loss": 0.734164297580719,
"step": 23
},
{
"epoch": 0.0995850622406639,
"grad_norm": 0.515662431716919,
"learning_rate": 2e-05,
"loss": 0.775545060634613,
"step": 24
},
{
"epoch": 0.1037344398340249,
"grad_norm": 0.4875846207141876,
"learning_rate": 2e-05,
"loss": 0.7608263492584229,
"step": 25
},
{
"epoch": 0.1078838174273859,
"grad_norm": 0.4272926449775696,
"learning_rate": 2e-05,
"loss": 0.655767560005188,
"step": 26
},
{
"epoch": 0.11203319502074689,
"grad_norm": 0.47189342975616455,
"learning_rate": 2e-05,
"loss": 0.6984891295433044,
"step": 27
},
{
"epoch": 0.11618257261410789,
"grad_norm": 0.49677926301956177,
"learning_rate": 2e-05,
"loss": 0.6952549815177917,
"step": 28
},
{
"epoch": 0.12033195020746888,
"grad_norm": 0.5341811776161194,
"learning_rate": 2e-05,
"loss": 0.6844781041145325,
"step": 29
},
{
"epoch": 0.12448132780082988,
"grad_norm": 0.49139678478240967,
"learning_rate": 2e-05,
"loss": 0.7043532729148865,
"step": 30
},
{
"epoch": 0.12863070539419086,
"grad_norm": 0.42113780975341797,
"learning_rate": 2e-05,
"loss": 0.6791371703147888,
"step": 31
},
{
"epoch": 0.13278008298755187,
"grad_norm": 0.490699827671051,
"learning_rate": 2e-05,
"loss": 0.66917484998703,
"step": 32
},
{
"epoch": 0.13692946058091288,
"grad_norm": 0.48269012570381165,
"learning_rate": 2e-05,
"loss": 0.6663049459457397,
"step": 33
},
{
"epoch": 0.14107883817427386,
"grad_norm": 0.4833972454071045,
"learning_rate": 2e-05,
"loss": 0.7479192018508911,
"step": 34
},
{
"epoch": 0.14522821576763487,
"grad_norm": 0.4521920382976532,
"learning_rate": 2e-05,
"loss": 0.5006750822067261,
"step": 35
},
{
"epoch": 0.14937759336099585,
"grad_norm": 0.4805753231048584,
"learning_rate": 2e-05,
"loss": 0.7437685132026672,
"step": 36
},
{
"epoch": 0.15352697095435686,
"grad_norm": 0.4702300429344177,
"learning_rate": 2e-05,
"loss": 0.7820006608963013,
"step": 37
},
{
"epoch": 0.15767634854771784,
"grad_norm": 0.4416898190975189,
"learning_rate": 2e-05,
"loss": 0.5911201238632202,
"step": 38
},
{
"epoch": 0.16182572614107885,
"grad_norm": 0.46818608045578003,
"learning_rate": 2e-05,
"loss": 0.6237752437591553,
"step": 39
},
{
"epoch": 0.16597510373443983,
"grad_norm": 0.38742795586586,
"learning_rate": 2e-05,
"loss": 0.6044095754623413,
"step": 40
},
{
"epoch": 0.17012448132780084,
"grad_norm": 0.4806065857410431,
"learning_rate": 2e-05,
"loss": 0.6341798901557922,
"step": 41
},
{
"epoch": 0.17427385892116182,
"grad_norm": 0.4329955279827118,
"learning_rate": 2e-05,
"loss": 0.621407687664032,
"step": 42
},
{
"epoch": 0.17842323651452283,
"grad_norm": 0.46890074014663696,
"learning_rate": 2e-05,
"loss": 0.7025566697120667,
"step": 43
},
{
"epoch": 0.1825726141078838,
"grad_norm": 0.4821957051753998,
"learning_rate": 2e-05,
"loss": 0.6547812819480896,
"step": 44
},
{
"epoch": 0.18672199170124482,
"grad_norm": 0.4716266691684723,
"learning_rate": 2e-05,
"loss": 0.6434807777404785,
"step": 45
},
{
"epoch": 0.1908713692946058,
"grad_norm": 0.5017584562301636,
"learning_rate": 2e-05,
"loss": 0.6461539268493652,
"step": 46
},
{
"epoch": 0.1950207468879668,
"grad_norm": 0.4837803244590759,
"learning_rate": 2e-05,
"loss": 0.6638780236244202,
"step": 47
},
{
"epoch": 0.1991701244813278,
"grad_norm": 0.4523409605026245,
"learning_rate": 2e-05,
"loss": 0.5731872916221619,
"step": 48
},
{
"epoch": 0.2033195020746888,
"grad_norm": 0.46308189630508423,
"learning_rate": 2e-05,
"loss": 0.6024616956710815,
"step": 49
},
{
"epoch": 0.2074688796680498,
"grad_norm": 0.4565693140029907,
"learning_rate": 2e-05,
"loss": 0.5795129537582397,
"step": 50
},
{
"epoch": 0.21161825726141079,
"grad_norm": 0.48081323504447937,
"learning_rate": 2e-05,
"loss": 0.6645175814628601,
"step": 51
},
{
"epoch": 0.2157676348547718,
"grad_norm": 0.4649989902973175,
"learning_rate": 2e-05,
"loss": 0.6339988112449646,
"step": 52
},
{
"epoch": 0.21991701244813278,
"grad_norm": 0.45999905467033386,
"learning_rate": 2e-05,
"loss": 0.6070005297660828,
"step": 53
},
{
"epoch": 0.22406639004149378,
"grad_norm": 0.43405112624168396,
"learning_rate": 2e-05,
"loss": 0.6078118085861206,
"step": 54
},
{
"epoch": 0.22821576763485477,
"grad_norm": 0.557212233543396,
"learning_rate": 2e-05,
"loss": 0.6502783894538879,
"step": 55
},
{
"epoch": 0.23236514522821577,
"grad_norm": 0.4206949472427368,
"learning_rate": 2e-05,
"loss": 0.5604119896888733,
"step": 56
},
{
"epoch": 0.23651452282157676,
"grad_norm": 0.4931945502758026,
"learning_rate": 2e-05,
"loss": 0.5463195443153381,
"step": 57
},
{
"epoch": 0.24066390041493776,
"grad_norm": 0.44888630509376526,
"learning_rate": 2e-05,
"loss": 0.49333369731903076,
"step": 58
},
{
"epoch": 0.24481327800829875,
"grad_norm": 0.4515199363231659,
"learning_rate": 2e-05,
"loss": 0.66854327917099,
"step": 59
},
{
"epoch": 0.24896265560165975,
"grad_norm": 0.46686026453971863,
"learning_rate": 2e-05,
"loss": 0.5279274582862854,
"step": 60
},
{
"epoch": 0.25311203319502074,
"grad_norm": 0.46663975715637207,
"learning_rate": 2e-05,
"loss": 0.6141489148139954,
"step": 61
},
{
"epoch": 0.2572614107883817,
"grad_norm": 0.45049089193344116,
"learning_rate": 2e-05,
"loss": 0.6643646955490112,
"step": 62
},
{
"epoch": 0.26141078838174275,
"grad_norm": 0.49262335896492004,
"learning_rate": 2e-05,
"loss": 0.6589719653129578,
"step": 63
},
{
"epoch": 0.26556016597510373,
"grad_norm": 0.5234288573265076,
"learning_rate": 2e-05,
"loss": 0.6250555515289307,
"step": 64
},
{
"epoch": 0.2697095435684647,
"grad_norm": 0.4657873809337616,
"learning_rate": 2e-05,
"loss": 0.5761417150497437,
"step": 65
},
{
"epoch": 0.27385892116182575,
"grad_norm": 2.8522613048553467,
"learning_rate": 2e-05,
"loss": 0.6810148358345032,
"step": 66
},
{
"epoch": 0.27800829875518673,
"grad_norm": 0.45667174458503723,
"learning_rate": 2e-05,
"loss": 0.5667203664779663,
"step": 67
},
{
"epoch": 0.2821576763485477,
"grad_norm": 0.48965880274772644,
"learning_rate": 2e-05,
"loss": 0.6057634949684143,
"step": 68
},
{
"epoch": 0.2863070539419087,
"grad_norm": 0.4700252115726471,
"learning_rate": 2e-05,
"loss": 0.5498369932174683,
"step": 69
},
{
"epoch": 0.29045643153526973,
"grad_norm": 0.4457707703113556,
"learning_rate": 2e-05,
"loss": 0.5500881671905518,
"step": 70
},
{
"epoch": 0.2946058091286307,
"grad_norm": 0.5242801904678345,
"learning_rate": 2e-05,
"loss": 0.6648991703987122,
"step": 71
},
{
"epoch": 0.2987551867219917,
"grad_norm": 0.4845593273639679,
"learning_rate": 2e-05,
"loss": 0.6495253443717957,
"step": 72
},
{
"epoch": 0.3029045643153527,
"grad_norm": 0.4535577595233917,
"learning_rate": 2e-05,
"loss": 0.6440762281417847,
"step": 73
},
{
"epoch": 0.3070539419087137,
"grad_norm": 0.4424896240234375,
"learning_rate": 2e-05,
"loss": 0.5427602529525757,
"step": 74
},
{
"epoch": 0.3112033195020747,
"grad_norm": 0.4791293144226074,
"learning_rate": 2e-05,
"loss": 0.6312339901924133,
"step": 75
},
{
"epoch": 0.3153526970954357,
"grad_norm": 0.49440717697143555,
"learning_rate": 2e-05,
"loss": 0.7304765582084656,
"step": 76
},
{
"epoch": 0.31950207468879666,
"grad_norm": 0.47376683354377747,
"learning_rate": 2e-05,
"loss": 0.5550855994224548,
"step": 77
},
{
"epoch": 0.3236514522821577,
"grad_norm": 0.5386195182800293,
"learning_rate": 2e-05,
"loss": 0.7627665996551514,
"step": 78
},
{
"epoch": 0.3278008298755187,
"grad_norm": 0.5139470100402832,
"learning_rate": 2e-05,
"loss": 0.7294001579284668,
"step": 79
},
{
"epoch": 0.33195020746887965,
"grad_norm": 0.5727441310882568,
"learning_rate": 2e-05,
"loss": 0.6094337105751038,
"step": 80
},
{
"epoch": 0.3360995850622407,
"grad_norm": 0.4475933313369751,
"learning_rate": 2e-05,
"loss": 0.6689184904098511,
"step": 81
},
{
"epoch": 0.34024896265560167,
"grad_norm": 0.48615196347236633,
"learning_rate": 2e-05,
"loss": 0.5170673727989197,
"step": 82
},
{
"epoch": 0.34439834024896265,
"grad_norm": 0.4444977939128876,
"learning_rate": 2e-05,
"loss": 0.5426638126373291,
"step": 83
},
{
"epoch": 0.34854771784232363,
"grad_norm": 0.4532429873943329,
"learning_rate": 2e-05,
"loss": 0.5246436595916748,
"step": 84
},
{
"epoch": 0.35269709543568467,
"grad_norm": 0.5425305962562561,
"learning_rate": 2e-05,
"loss": 0.7444034814834595,
"step": 85
},
{
"epoch": 0.35684647302904565,
"grad_norm": 0.4604993164539337,
"learning_rate": 2e-05,
"loss": 0.6390590071678162,
"step": 86
},
{
"epoch": 0.36099585062240663,
"grad_norm": 0.4503551423549652,
"learning_rate": 2e-05,
"loss": 0.7437008023262024,
"step": 87
},
{
"epoch": 0.3651452282157676,
"grad_norm": 0.473531037569046,
"learning_rate": 2e-05,
"loss": 0.5801289677619934,
"step": 88
},
{
"epoch": 0.36929460580912865,
"grad_norm": 0.43614616990089417,
"learning_rate": 2e-05,
"loss": 0.5945846438407898,
"step": 89
},
{
"epoch": 0.37344398340248963,
"grad_norm": 0.5157416462898254,
"learning_rate": 2e-05,
"loss": 0.5870503187179565,
"step": 90
},
{
"epoch": 0.3775933609958506,
"grad_norm": 0.4724714756011963,
"learning_rate": 2e-05,
"loss": 0.7136172652244568,
"step": 91
},
{
"epoch": 0.3817427385892116,
"grad_norm": 0.49608129262924194,
"learning_rate": 2e-05,
"loss": 0.5707521438598633,
"step": 92
},
{
"epoch": 0.38589211618257263,
"grad_norm": 0.4372619390487671,
"learning_rate": 2e-05,
"loss": 0.6751445531845093,
"step": 93
},
{
"epoch": 0.3900414937759336,
"grad_norm": 0.8502039909362793,
"learning_rate": 2e-05,
"loss": 0.7432682514190674,
"step": 94
},
{
"epoch": 0.3941908713692946,
"grad_norm": 0.43237465620040894,
"learning_rate": 2e-05,
"loss": 0.5463064908981323,
"step": 95
},
{
"epoch": 0.3983402489626556,
"grad_norm": 0.4683166444301605,
"learning_rate": 2e-05,
"loss": 0.5722454190254211,
"step": 96
},
{
"epoch": 0.4024896265560166,
"grad_norm": 0.49307140707969666,
"learning_rate": 2e-05,
"loss": 0.7676360011100769,
"step": 97
},
{
"epoch": 0.4066390041493776,
"grad_norm": 0.45873740315437317,
"learning_rate": 2e-05,
"loss": 0.7670221328735352,
"step": 98
},
{
"epoch": 0.4107883817427386,
"grad_norm": 0.522739589214325,
"learning_rate": 2e-05,
"loss": 0.6198732256889343,
"step": 99
},
{
"epoch": 0.4149377593360996,
"grad_norm": 0.513500988483429,
"learning_rate": 2e-05,
"loss": 0.6557285189628601,
"step": 100
},
{
"epoch": 0.4190871369294606,
"grad_norm": 0.5162559747695923,
"learning_rate": 2e-05,
"loss": 0.6777411699295044,
"step": 101
},
{
"epoch": 0.42323651452282157,
"grad_norm": 0.4742807447910309,
"learning_rate": 2e-05,
"loss": 0.5189216732978821,
"step": 102
},
{
"epoch": 0.42738589211618255,
"grad_norm": 0.3864991068840027,
"learning_rate": 2e-05,
"loss": 0.5397198796272278,
"step": 103
},
{
"epoch": 0.4315352697095436,
"grad_norm": 0.44808462262153625,
"learning_rate": 2e-05,
"loss": 0.5719993710517883,
"step": 104
},
{
"epoch": 0.43568464730290457,
"grad_norm": 0.5047919154167175,
"learning_rate": 2e-05,
"loss": 0.7246726751327515,
"step": 105
},
{
"epoch": 0.43983402489626555,
"grad_norm": 0.4501510262489319,
"learning_rate": 2e-05,
"loss": 0.5421350598335266,
"step": 106
},
{
"epoch": 0.44398340248962653,
"grad_norm": 0.5187399983406067,
"learning_rate": 2e-05,
"loss": 0.6851190328598022,
"step": 107
},
{
"epoch": 0.44813278008298757,
"grad_norm": 0.4442541003227234,
"learning_rate": 2e-05,
"loss": 0.7323095798492432,
"step": 108
},
{
"epoch": 0.45228215767634855,
"grad_norm": 0.4546023905277252,
"learning_rate": 2e-05,
"loss": 0.5949406027793884,
"step": 109
},
{
"epoch": 0.45643153526970953,
"grad_norm": 0.43765076994895935,
"learning_rate": 2e-05,
"loss": 0.5195109248161316,
"step": 110
},
{
"epoch": 0.4605809128630705,
"grad_norm": 0.6012418866157532,
"learning_rate": 2e-05,
"loss": 0.5891928672790527,
"step": 111
},
{
"epoch": 0.46473029045643155,
"grad_norm": 0.5350989699363708,
"learning_rate": 2e-05,
"loss": 0.7073556184768677,
"step": 112
},
{
"epoch": 0.46887966804979253,
"grad_norm": 0.40423402190208435,
"learning_rate": 2e-05,
"loss": 0.6081284284591675,
"step": 113
},
{
"epoch": 0.4730290456431535,
"grad_norm": 0.48459556698799133,
"learning_rate": 2e-05,
"loss": 0.7626031637191772,
"step": 114
},
{
"epoch": 0.47717842323651455,
"grad_norm": 0.5132282972335815,
"learning_rate": 2e-05,
"loss": 0.7070454359054565,
"step": 115
},
{
"epoch": 0.48132780082987553,
"grad_norm": 0.40754643082618713,
"learning_rate": 2e-05,
"loss": 0.7881268858909607,
"step": 116
},
{
"epoch": 0.4854771784232365,
"grad_norm": 0.46227574348449707,
"learning_rate": 2e-05,
"loss": 0.5589393973350525,
"step": 117
},
{
"epoch": 0.4896265560165975,
"grad_norm": 0.458891898393631,
"learning_rate": 2e-05,
"loss": 0.6076244711875916,
"step": 118
},
{
"epoch": 0.49377593360995853,
"grad_norm": 0.4314862787723541,
"learning_rate": 2e-05,
"loss": 0.58890700340271,
"step": 119
},
{
"epoch": 0.4979253112033195,
"grad_norm": 0.4849430322647095,
"learning_rate": 2e-05,
"loss": 0.7297042012214661,
"step": 120
},
{
"epoch": 0.5020746887966805,
"grad_norm": 0.4734286963939667,
"learning_rate": 2e-05,
"loss": 0.7929898500442505,
"step": 121
},
{
"epoch": 0.5062240663900415,
"grad_norm": 0.4982983469963074,
"learning_rate": 2e-05,
"loss": 0.6973749399185181,
"step": 122
},
{
"epoch": 0.5103734439834025,
"grad_norm": 0.4555007517337799,
"learning_rate": 2e-05,
"loss": 0.6363988518714905,
"step": 123
},
{
"epoch": 0.5145228215767634,
"grad_norm": 0.469707190990448,
"learning_rate": 2e-05,
"loss": 0.6936283111572266,
"step": 124
},
{
"epoch": 0.5186721991701245,
"grad_norm": 0.45310160517692566,
"learning_rate": 2e-05,
"loss": 0.8045607209205627,
"step": 125
},
{
"epoch": 0.5228215767634855,
"grad_norm": 0.5117340087890625,
"learning_rate": 2e-05,
"loss": 0.5602521300315857,
"step": 126
},
{
"epoch": 0.5269709543568465,
"grad_norm": 0.4890298545360565,
"learning_rate": 2e-05,
"loss": 0.5749447345733643,
"step": 127
},
{
"epoch": 0.5311203319502075,
"grad_norm": 0.4680368900299072,
"learning_rate": 2e-05,
"loss": 0.6603504419326782,
"step": 128
},
{
"epoch": 0.5352697095435685,
"grad_norm": 0.4364625811576843,
"learning_rate": 2e-05,
"loss": 0.6615546941757202,
"step": 129
},
{
"epoch": 0.5394190871369294,
"grad_norm": 0.44393712282180786,
"learning_rate": 2e-05,
"loss": 0.7206588387489319,
"step": 130
},
{
"epoch": 0.5435684647302904,
"grad_norm": 0.4770648777484894,
"learning_rate": 2e-05,
"loss": 0.5122599005699158,
"step": 131
},
{
"epoch": 0.5477178423236515,
"grad_norm": 0.4254826307296753,
"learning_rate": 2e-05,
"loss": 0.5919891595840454,
"step": 132
},
{
"epoch": 0.5518672199170125,
"grad_norm": 0.49948850274086,
"learning_rate": 2e-05,
"loss": 0.7168218493461609,
"step": 133
},
{
"epoch": 0.5560165975103735,
"grad_norm": 0.46940577030181885,
"learning_rate": 2e-05,
"loss": 0.559630274772644,
"step": 134
},
{
"epoch": 0.5601659751037344,
"grad_norm": 0.38155895471572876,
"learning_rate": 2e-05,
"loss": 0.35719043016433716,
"step": 135
},
{
"epoch": 0.5643153526970954,
"grad_norm": 0.446111798286438,
"learning_rate": 2e-05,
"loss": 0.5944488644599915,
"step": 136
},
{
"epoch": 0.5684647302904564,
"grad_norm": 0.44898721575737,
"learning_rate": 2e-05,
"loss": 0.6778333187103271,
"step": 137
},
{
"epoch": 0.5726141078838174,
"grad_norm": 0.4727020263671875,
"learning_rate": 2e-05,
"loss": 0.6683153510093689,
"step": 138
},
{
"epoch": 0.5767634854771784,
"grad_norm": 0.4775353968143463,
"learning_rate": 2e-05,
"loss": 0.7357037663459778,
"step": 139
},
{
"epoch": 0.5809128630705395,
"grad_norm": 0.5201453566551208,
"learning_rate": 2e-05,
"loss": 0.5672426819801331,
"step": 140
},
{
"epoch": 0.5850622406639004,
"grad_norm": 0.4446447491645813,
"learning_rate": 2e-05,
"loss": 0.6665009260177612,
"step": 141
},
{
"epoch": 0.5892116182572614,
"grad_norm": 0.44674625992774963,
"learning_rate": 2e-05,
"loss": 0.6256436705589294,
"step": 142
},
{
"epoch": 0.5933609958506224,
"grad_norm": 0.48278629779815674,
"learning_rate": 2e-05,
"loss": 0.652278482913971,
"step": 143
},
{
"epoch": 0.5975103734439834,
"grad_norm": 0.4608626067638397,
"learning_rate": 2e-05,
"loss": 0.687121570110321,
"step": 144
},
{
"epoch": 0.6016597510373444,
"grad_norm": 0.5146644711494446,
"learning_rate": 2e-05,
"loss": 0.7759085297584534,
"step": 145
},
{
"epoch": 0.6058091286307054,
"grad_norm": 0.4703519344329834,
"learning_rate": 2e-05,
"loss": 0.6268375515937805,
"step": 146
},
{
"epoch": 0.6099585062240664,
"grad_norm": 0.4373490512371063,
"learning_rate": 2e-05,
"loss": 0.7350006699562073,
"step": 147
},
{
"epoch": 0.6141078838174274,
"grad_norm": 0.48525917530059814,
"learning_rate": 2e-05,
"loss": 0.6609182357788086,
"step": 148
},
{
"epoch": 0.6182572614107884,
"grad_norm": 0.509609043598175,
"learning_rate": 2e-05,
"loss": 0.7720542550086975,
"step": 149
},
{
"epoch": 0.6224066390041494,
"grad_norm": 0.46813687682151794,
"learning_rate": 2e-05,
"loss": 0.658400297164917,
"step": 150
},
{
"epoch": 0.6265560165975104,
"grad_norm": 0.48811477422714233,
"learning_rate": 2e-05,
"loss": 0.6340473890304565,
"step": 151
},
{
"epoch": 0.6307053941908713,
"grad_norm": 0.48529860377311707,
"learning_rate": 2e-05,
"loss": 0.7543718218803406,
"step": 152
},
{
"epoch": 0.6348547717842323,
"grad_norm": 0.4565221965312958,
"learning_rate": 2e-05,
"loss": 0.5810791254043579,
"step": 153
},
{
"epoch": 0.6390041493775933,
"grad_norm": 0.4667608141899109,
"learning_rate": 2e-05,
"loss": 0.5940293669700623,
"step": 154
},
{
"epoch": 0.6431535269709544,
"grad_norm": 0.476724773645401,
"learning_rate": 2e-05,
"loss": 0.5076797604560852,
"step": 155
},
{
"epoch": 0.6473029045643154,
"grad_norm": 0.48997762799263,
"learning_rate": 2e-05,
"loss": 0.5588229894638062,
"step": 156
},
{
"epoch": 0.6514522821576764,
"grad_norm": 0.4687066674232483,
"learning_rate": 2e-05,
"loss": 0.7414963245391846,
"step": 157
},
{
"epoch": 0.6556016597510373,
"grad_norm": 0.5096819400787354,
"learning_rate": 2e-05,
"loss": 0.6766090393066406,
"step": 158
},
{
"epoch": 0.6597510373443983,
"grad_norm": 0.40396353602409363,
"learning_rate": 2e-05,
"loss": 0.5890622735023499,
"step": 159
},
{
"epoch": 0.6639004149377593,
"grad_norm": 0.46985870599746704,
"learning_rate": 2e-05,
"loss": 0.5969380140304565,
"step": 160
},
{
"epoch": 0.6680497925311203,
"grad_norm": 0.49084073305130005,
"learning_rate": 2e-05,
"loss": 0.6371229887008667,
"step": 161
},
{
"epoch": 0.6721991701244814,
"grad_norm": 0.4466313123703003,
"learning_rate": 2e-05,
"loss": 0.6732550263404846,
"step": 162
},
{
"epoch": 0.6763485477178424,
"grad_norm": 0.4656016528606415,
"learning_rate": 2e-05,
"loss": 0.7082672119140625,
"step": 163
},
{
"epoch": 0.6804979253112033,
"grad_norm": 0.43604540824890137,
"learning_rate": 2e-05,
"loss": 0.5961745977401733,
"step": 164
},
{
"epoch": 0.6846473029045643,
"grad_norm": 0.45962008833885193,
"learning_rate": 2e-05,
"loss": 0.5974591374397278,
"step": 165
},
{
"epoch": 0.6887966804979253,
"grad_norm": 0.4566839635372162,
"learning_rate": 2e-05,
"loss": 0.5828849673271179,
"step": 166
},
{
"epoch": 0.6929460580912863,
"grad_norm": 0.38006696105003357,
"learning_rate": 2e-05,
"loss": 0.6747267246246338,
"step": 167
},
{
"epoch": 0.6970954356846473,
"grad_norm": 0.439981609582901,
"learning_rate": 2e-05,
"loss": 0.7797038555145264,
"step": 168
},
{
"epoch": 0.7012448132780082,
"grad_norm": 0.47687003016471863,
"learning_rate": 2e-05,
"loss": 0.570720911026001,
"step": 169
},
{
"epoch": 0.7053941908713693,
"grad_norm": 0.4829600155353546,
"learning_rate": 2e-05,
"loss": 0.5899892449378967,
"step": 170
},
{
"epoch": 0.7095435684647303,
"grad_norm": 0.4642188847064972,
"learning_rate": 2e-05,
"loss": 0.6866733431816101,
"step": 171
},
{
"epoch": 0.7136929460580913,
"grad_norm": 0.4619278013706207,
"learning_rate": 2e-05,
"loss": 0.5310846567153931,
"step": 172
},
{
"epoch": 0.7178423236514523,
"grad_norm": 0.40906423330307007,
"learning_rate": 2e-05,
"loss": 0.6505522131919861,
"step": 173
},
{
"epoch": 0.7219917012448133,
"grad_norm": 0.47687482833862305,
"learning_rate": 2e-05,
"loss": 0.6477482318878174,
"step": 174
},
{
"epoch": 0.7261410788381742,
"grad_norm": 0.4249359369277954,
"learning_rate": 2e-05,
"loss": 0.542078971862793,
"step": 175
},
{
"epoch": 0.7302904564315352,
"grad_norm": 0.4437820315361023,
"learning_rate": 2e-05,
"loss": 0.7326051592826843,
"step": 176
},
{
"epoch": 0.7344398340248963,
"grad_norm": 0.47250184416770935,
"learning_rate": 2e-05,
"loss": 0.7204862236976624,
"step": 177
},
{
"epoch": 0.7385892116182573,
"grad_norm": 0.45673149824142456,
"learning_rate": 2e-05,
"loss": 0.6894567608833313,
"step": 178
},
{
"epoch": 0.7427385892116183,
"grad_norm": 0.4065015912055969,
"learning_rate": 2e-05,
"loss": 0.5020947456359863,
"step": 179
},
{
"epoch": 0.7468879668049793,
"grad_norm": 0.480761855840683,
"learning_rate": 2e-05,
"loss": 0.652772843837738,
"step": 180
},
{
"epoch": 0.7510373443983402,
"grad_norm": 0.4796382784843445,
"learning_rate": 2e-05,
"loss": 0.5466834306716919,
"step": 181
},
{
"epoch": 0.7551867219917012,
"grad_norm": 0.427696168422699,
"learning_rate": 2e-05,
"loss": 0.46073320508003235,
"step": 182
},
{
"epoch": 0.7593360995850622,
"grad_norm": 0.4324597716331482,
"learning_rate": 2e-05,
"loss": 0.6211638450622559,
"step": 183
},
{
"epoch": 0.7634854771784232,
"grad_norm": 0.47733691334724426,
"learning_rate": 2e-05,
"loss": 0.6684774160385132,
"step": 184
},
{
"epoch": 0.7676348547717843,
"grad_norm": 0.431084007024765,
"learning_rate": 2e-05,
"loss": 0.6145834922790527,
"step": 185
},
{
"epoch": 0.7717842323651453,
"grad_norm": 0.5007755160331726,
"learning_rate": 2e-05,
"loss": 0.6526326537132263,
"step": 186
},
{
"epoch": 0.7759336099585062,
"grad_norm": 0.4393167793750763,
"learning_rate": 2e-05,
"loss": 0.6100775599479675,
"step": 187
},
{
"epoch": 0.7800829875518672,
"grad_norm": 0.4865422248840332,
"learning_rate": 2e-05,
"loss": 0.7980203032493591,
"step": 188
},
{
"epoch": 0.7842323651452282,
"grad_norm": 0.4837598502635956,
"learning_rate": 2e-05,
"loss": 0.5299490690231323,
"step": 189
},
{
"epoch": 0.7883817427385892,
"grad_norm": 0.5101847052574158,
"learning_rate": 2e-05,
"loss": 0.636174201965332,
"step": 190
},
{
"epoch": 0.7925311203319502,
"grad_norm": 0.481587678194046,
"learning_rate": 2e-05,
"loss": 0.584964394569397,
"step": 191
},
{
"epoch": 0.7966804979253111,
"grad_norm": 0.4833771288394928,
"learning_rate": 2e-05,
"loss": 0.660033643245697,
"step": 192
},
{
"epoch": 0.8008298755186722,
"grad_norm": 0.47723522782325745,
"learning_rate": 2e-05,
"loss": 0.5514160394668579,
"step": 193
},
{
"epoch": 0.8049792531120332,
"grad_norm": 0.46386954188346863,
"learning_rate": 2e-05,
"loss": 0.5447302460670471,
"step": 194
},
{
"epoch": 0.8091286307053942,
"grad_norm": 0.47975945472717285,
"learning_rate": 2e-05,
"loss": 0.6700522303581238,
"step": 195
},
{
"epoch": 0.8132780082987552,
"grad_norm": 0.45628130435943604,
"learning_rate": 2e-05,
"loss": 0.725788950920105,
"step": 196
},
{
"epoch": 0.8174273858921162,
"grad_norm": 0.5276447534561157,
"learning_rate": 2e-05,
"loss": 0.4795994460582733,
"step": 197
},
{
"epoch": 0.8215767634854771,
"grad_norm": 0.4197767376899719,
"learning_rate": 2e-05,
"loss": 0.5689822435379028,
"step": 198
},
{
"epoch": 0.8257261410788381,
"grad_norm": 0.4988608956336975,
"learning_rate": 2e-05,
"loss": 0.5570112466812134,
"step": 199
},
{
"epoch": 0.8298755186721992,
"grad_norm": 0.43889400362968445,
"learning_rate": 2e-05,
"loss": 0.5546621680259705,
"step": 200
},
{
"epoch": 0.8340248962655602,
"grad_norm": 0.4966701865196228,
"learning_rate": 2e-05,
"loss": 0.7806369066238403,
"step": 201
},
{
"epoch": 0.8381742738589212,
"grad_norm": 0.444965124130249,
"learning_rate": 2e-05,
"loss": 0.6175658702850342,
"step": 202
},
{
"epoch": 0.8423236514522822,
"grad_norm": 0.47721561789512634,
"learning_rate": 2e-05,
"loss": 0.608608603477478,
"step": 203
},
{
"epoch": 0.8464730290456431,
"grad_norm": 0.41363325715065,
"learning_rate": 2e-05,
"loss": 0.5362960696220398,
"step": 204
},
{
"epoch": 0.8506224066390041,
"grad_norm": 0.4979526102542877,
"learning_rate": 2e-05,
"loss": 0.6923606395721436,
"step": 205
},
{
"epoch": 0.8547717842323651,
"grad_norm": 0.4715823829174042,
"learning_rate": 2e-05,
"loss": 0.5849528312683105,
"step": 206
},
{
"epoch": 0.8589211618257261,
"grad_norm": 0.43941834568977356,
"learning_rate": 2e-05,
"loss": 0.5507952570915222,
"step": 207
},
{
"epoch": 0.8630705394190872,
"grad_norm": 0.6943396925926208,
"learning_rate": 2e-05,
"loss": 0.6139302253723145,
"step": 208
},
{
"epoch": 0.8672199170124482,
"grad_norm": 0.4135432541370392,
"learning_rate": 2e-05,
"loss": 0.6495124697685242,
"step": 209
},
{
"epoch": 0.8713692946058091,
"grad_norm": 0.4735243320465088,
"learning_rate": 2e-05,
"loss": 0.6073355674743652,
"step": 210
},
{
"epoch": 0.8755186721991701,
"grad_norm": 0.5081479549407959,
"learning_rate": 2e-05,
"loss": 0.5338884592056274,
"step": 211
},
{
"epoch": 0.8796680497925311,
"grad_norm": 0.44402876496315,
"learning_rate": 2e-05,
"loss": 0.5649405717849731,
"step": 212
},
{
"epoch": 0.8838174273858921,
"grad_norm": 0.4597266614437103,
"learning_rate": 2e-05,
"loss": 0.851700484752655,
"step": 213
},
{
"epoch": 0.8879668049792531,
"grad_norm": 0.49691715836524963,
"learning_rate": 2e-05,
"loss": 0.6800894141197205,
"step": 214
},
{
"epoch": 0.8921161825726142,
"grad_norm": 0.4347255825996399,
"learning_rate": 2e-05,
"loss": 0.6838465332984924,
"step": 215
},
{
"epoch": 0.8962655601659751,
"grad_norm": 0.4532018303871155,
"learning_rate": 2e-05,
"loss": 0.6527755856513977,
"step": 216
},
{
"epoch": 0.9004149377593361,
"grad_norm": 0.5003204941749573,
"learning_rate": 2e-05,
"loss": 0.6630940437316895,
"step": 217
},
{
"epoch": 0.9045643153526971,
"grad_norm": 0.4661204218864441,
"learning_rate": 2e-05,
"loss": 0.693079948425293,
"step": 218
},
{
"epoch": 0.9087136929460581,
"grad_norm": 0.4552728235721588,
"learning_rate": 2e-05,
"loss": 0.6484197974205017,
"step": 219
},
{
"epoch": 0.9128630705394191,
"grad_norm": 0.4681585133075714,
"learning_rate": 2e-05,
"loss": 0.6020994186401367,
"step": 220
},
{
"epoch": 0.91701244813278,
"grad_norm": 0.41022825241088867,
"learning_rate": 2e-05,
"loss": 0.530207097530365,
"step": 221
},
{
"epoch": 0.921161825726141,
"grad_norm": 0.39006152749061584,
"learning_rate": 2e-05,
"loss": 0.445180743932724,
"step": 222
},
{
"epoch": 0.9253112033195021,
"grad_norm": 0.4057929217815399,
"learning_rate": 2e-05,
"loss": 0.5387605428695679,
"step": 223
},
{
"epoch": 0.9294605809128631,
"grad_norm": 0.42876264452934265,
"learning_rate": 2e-05,
"loss": 0.5825240015983582,
"step": 224
},
{
"epoch": 0.9336099585062241,
"grad_norm": 0.48948875069618225,
"learning_rate": 2e-05,
"loss": 0.6396217942237854,
"step": 225
},
{
"epoch": 0.9377593360995851,
"grad_norm": 0.4649500548839569,
"learning_rate": 2e-05,
"loss": 0.4400583505630493,
"step": 226
},
{
"epoch": 0.941908713692946,
"grad_norm": 0.43061113357543945,
"learning_rate": 2e-05,
"loss": 0.5668185353279114,
"step": 227
},
{
"epoch": 0.946058091286307,
"grad_norm": 0.37659695744514465,
"learning_rate": 2e-05,
"loss": 0.3734014630317688,
"step": 228
},
{
"epoch": 0.950207468879668,
"grad_norm": 0.5160449743270874,
"learning_rate": 2e-05,
"loss": 0.7836225032806396,
"step": 229
},
{
"epoch": 0.9543568464730291,
"grad_norm": 0.5332698822021484,
"learning_rate": 2e-05,
"loss": 0.6564600467681885,
"step": 230
},
{
"epoch": 0.9585062240663901,
"grad_norm": 0.48597726225852966,
"learning_rate": 2e-05,
"loss": 0.7620537281036377,
"step": 231
},
{
"epoch": 0.9626556016597511,
"grad_norm": 0.437928169965744,
"learning_rate": 2e-05,
"loss": 0.5499407052993774,
"step": 232
},
{
"epoch": 0.966804979253112,
"grad_norm": 0.4861524701118469,
"learning_rate": 2e-05,
"loss": 0.6248472332954407,
"step": 233
},
{
"epoch": 0.970954356846473,
"grad_norm": 0.4638573229312897,
"learning_rate": 2e-05,
"loss": 0.5971051454544067,
"step": 234
},
{
"epoch": 0.975103734439834,
"grad_norm": 0.4368666410446167,
"learning_rate": 2e-05,
"loss": 0.5971348285675049,
"step": 235
},
{
"epoch": 0.979253112033195,
"grad_norm": 0.4261365830898285,
"learning_rate": 2e-05,
"loss": 0.5625735521316528,
"step": 236
},
{
"epoch": 0.983402489626556,
"grad_norm": 0.47601279616355896,
"learning_rate": 2e-05,
"loss": 0.518233597278595,
"step": 237
},
{
"epoch": 0.9875518672199171,
"grad_norm": 0.4935397803783417,
"learning_rate": 2e-05,
"loss": 0.7158107161521912,
"step": 238
},
{
"epoch": 0.991701244813278,
"grad_norm": 0.456167072057724,
"learning_rate": 2e-05,
"loss": 0.6627569198608398,
"step": 239
},
{
"epoch": 0.995850622406639,
"grad_norm": 0.4805908799171448,
"learning_rate": 2e-05,
"loss": 0.6887528896331787,
"step": 240
},
{
"epoch": 1.0,
"grad_norm": 0.6356716156005859,
"learning_rate": 2e-05,
"loss": 0.65900057554245,
"step": 241
}
],
"logging_steps": 1,
"max_steps": 241,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1048678841008456e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}