affine-test / trainer_state.json
top-50000's picture
Upload folder using huggingface_hub
51ce865 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9655172413793105,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019704433497536946,
"grad_norm": 0.4991101920604706,
"learning_rate": 0.0,
"loss": 1.3884,
"step": 1
},
{
"epoch": 0.03940886699507389,
"grad_norm": 0.49041780829429626,
"learning_rate": 2e-05,
"loss": 1.3621,
"step": 2
},
{
"epoch": 0.059113300492610835,
"grad_norm": 0.5328213572502136,
"learning_rate": 4e-05,
"loss": 1.458,
"step": 3
},
{
"epoch": 0.07881773399014778,
"grad_norm": 0.46904322504997253,
"learning_rate": 6e-05,
"loss": 1.2977,
"step": 4
},
{
"epoch": 0.09852216748768473,
"grad_norm": 0.5323065519332886,
"learning_rate": 8e-05,
"loss": 1.389,
"step": 5
},
{
"epoch": 0.11822660098522167,
"grad_norm": 0.5205482840538025,
"learning_rate": 0.0001,
"loss": 1.2581,
"step": 6
},
{
"epoch": 0.13793103448275862,
"grad_norm": 0.4225301146507263,
"learning_rate": 9.998873580873848e-05,
"loss": 1.072,
"step": 7
},
{
"epoch": 0.15763546798029557,
"grad_norm": 0.37465083599090576,
"learning_rate": 9.995494831023409e-05,
"loss": 0.9885,
"step": 8
},
{
"epoch": 0.17733990147783252,
"grad_norm": 0.25269198417663574,
"learning_rate": 9.989865272804063e-05,
"loss": 0.6878,
"step": 9
},
{
"epoch": 0.19704433497536947,
"grad_norm": 0.1494321972131729,
"learning_rate": 9.981987442712633e-05,
"loss": 0.8262,
"step": 10
},
{
"epoch": 0.21674876847290642,
"grad_norm": 0.11854276806116104,
"learning_rate": 9.971864890244513e-05,
"loss": 0.6638,
"step": 11
},
{
"epoch": 0.23645320197044334,
"grad_norm": 0.10811202228069305,
"learning_rate": 9.959502176294383e-05,
"loss": 0.6303,
"step": 12
},
{
"epoch": 0.2561576354679803,
"grad_norm": 0.09654246270656586,
"learning_rate": 9.944904871101228e-05,
"loss": 0.6465,
"step": 13
},
{
"epoch": 0.27586206896551724,
"grad_norm": 0.08600393682718277,
"learning_rate": 9.928079551738543e-05,
"loss": 0.6375,
"step": 14
},
{
"epoch": 0.2955665024630542,
"grad_norm": 0.08666166663169861,
"learning_rate": 9.909033799150946e-05,
"loss": 0.6492,
"step": 15
},
{
"epoch": 0.31527093596059114,
"grad_norm": 0.07176803052425385,
"learning_rate": 9.887776194738432e-05,
"loss": 0.5989,
"step": 16
},
{
"epoch": 0.33497536945812806,
"grad_norm": 0.07677264511585236,
"learning_rate": 9.864316316489873e-05,
"loss": 0.6296,
"step": 17
},
{
"epoch": 0.35467980295566504,
"grad_norm": 0.06780184060335159,
"learning_rate": 9.838664734667495e-05,
"loss": 0.6148,
"step": 18
},
{
"epoch": 0.37438423645320196,
"grad_norm": 0.08061887323856354,
"learning_rate": 9.810833007044247e-05,
"loss": 0.6397,
"step": 19
},
{
"epoch": 0.39408866995073893,
"grad_norm": 0.06692875176668167,
"learning_rate": 9.780833673696254e-05,
"loss": 0.5205,
"step": 20
},
{
"epoch": 0.41379310344827586,
"grad_norm": 0.07027728855609894,
"learning_rate": 9.74868025135266e-05,
"loss": 0.6691,
"step": 21
},
{
"epoch": 0.43349753694581283,
"grad_norm": 0.062375761568546295,
"learning_rate": 9.714387227305422e-05,
"loss": 0.6602,
"step": 22
},
{
"epoch": 0.45320197044334976,
"grad_norm": 0.06572797149419785,
"learning_rate": 9.67797005288181e-05,
"loss": 0.6488,
"step": 23
},
{
"epoch": 0.4729064039408867,
"grad_norm": 0.05838072672486305,
"learning_rate": 9.639445136482548e-05,
"loss": 0.6315,
"step": 24
},
{
"epoch": 0.49261083743842365,
"grad_norm": 0.05625944957137108,
"learning_rate": 9.598829836188694e-05,
"loss": 0.5802,
"step": 25
},
{
"epoch": 0.5123152709359606,
"grad_norm": 0.05752566084265709,
"learning_rate": 9.55614245194068e-05,
"loss": 0.544,
"step": 26
},
{
"epoch": 0.5320197044334976,
"grad_norm": 0.053692612797021866,
"learning_rate": 9.511402217292926e-05,
"loss": 0.5522,
"step": 27
},
{
"epoch": 0.5517241379310345,
"grad_norm": 0.047561485320329666,
"learning_rate": 9.464629290747842e-05,
"loss": 0.5489,
"step": 28
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.048364218324422836,
"learning_rate": 9.415844746673047e-05,
"loss": 0.6571,
"step": 29
},
{
"epoch": 0.5911330049261084,
"grad_norm": 0.044278282672166824,
"learning_rate": 9.365070565805941e-05,
"loss": 0.5867,
"step": 30
},
{
"epoch": 0.6108374384236454,
"grad_norm": 0.04688710719347,
"learning_rate": 9.312329625349902e-05,
"loss": 0.5329,
"step": 31
},
{
"epoch": 0.6305418719211823,
"grad_norm": 0.047030262649059296,
"learning_rate": 9.257645688666556e-05,
"loss": 0.5487,
"step": 32
},
{
"epoch": 0.6502463054187192,
"grad_norm": 0.040486257523298264,
"learning_rate": 9.201043394568773e-05,
"loss": 0.4762,
"step": 33
},
{
"epoch": 0.6699507389162561,
"grad_norm": 0.04460093006491661,
"learning_rate": 9.142548246219212e-05,
"loss": 0.5499,
"step": 34
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.042993444949388504,
"learning_rate": 9.082186599639428e-05,
"loss": 0.5094,
"step": 35
},
{
"epoch": 0.7093596059113301,
"grad_norm": 0.0397091843187809,
"learning_rate": 9.019985651834703e-05,
"loss": 0.4948,
"step": 36
},
{
"epoch": 0.729064039408867,
"grad_norm": 0.039500679820775986,
"learning_rate": 8.955973428539944e-05,
"loss": 0.6011,
"step": 37
},
{
"epoch": 0.7487684729064039,
"grad_norm": 0.039048932492733,
"learning_rate": 8.890178771592199e-05,
"loss": 0.5304,
"step": 38
},
{
"epoch": 0.7684729064039408,
"grad_norm": 0.03994458168745041,
"learning_rate": 8.822631325935463e-05,
"loss": 0.4774,
"step": 39
},
{
"epoch": 0.7881773399014779,
"grad_norm": 0.0388583242893219,
"learning_rate": 8.753361526263621e-05,
"loss": 0.4908,
"step": 40
},
{
"epoch": 0.8078817733990148,
"grad_norm": 0.044083382934331894,
"learning_rate": 8.682400583307562e-05,
"loss": 0.5904,
"step": 41
},
{
"epoch": 0.8275862068965517,
"grad_norm": 0.04609934613108635,
"learning_rate": 8.609780469772623e-05,
"loss": 0.5491,
"step": 42
},
{
"epoch": 0.8472906403940886,
"grad_norm": 0.042274635285139084,
"learning_rate": 8.535533905932738e-05,
"loss": 0.5669,
"step": 43
},
{
"epoch": 0.8669950738916257,
"grad_norm": 0.03742406144738197,
"learning_rate": 8.459694344887732e-05,
"loss": 0.5192,
"step": 44
},
{
"epoch": 0.8866995073891626,
"grad_norm": 0.03503105416893959,
"learning_rate": 8.382295957490436e-05,
"loss": 0.4615,
"step": 45
},
{
"epoch": 0.9064039408866995,
"grad_norm": 0.038138486444950104,
"learning_rate": 8.303373616950408e-05,
"loss": 0.4823,
"step": 46
},
{
"epoch": 0.9261083743842364,
"grad_norm": 0.04008679464459419,
"learning_rate": 8.222962883121196e-05,
"loss": 0.5489,
"step": 47
},
{
"epoch": 0.9458128078817734,
"grad_norm": 0.03764171153306961,
"learning_rate": 8.141099986478212e-05,
"loss": 0.4722,
"step": 48
},
{
"epoch": 0.9655172413793104,
"grad_norm": 0.03970693424344063,
"learning_rate": 8.057821811794458e-05,
"loss": 0.5228,
"step": 49
},
{
"epoch": 0.9852216748768473,
"grad_norm": 0.03552575781941414,
"learning_rate": 7.973165881521434e-05,
"loss": 0.6102,
"step": 50
},
{
"epoch": 1.0,
"grad_norm": 0.041328880935907364,
"learning_rate": 7.88717033888274e-05,
"loss": 0.4746,
"step": 51
},
{
"epoch": 1.019704433497537,
"grad_norm": 0.034155767410993576,
"learning_rate": 7.799873930687978e-05,
"loss": 0.477,
"step": 52
},
{
"epoch": 1.0394088669950738,
"grad_norm": 0.03617981821298599,
"learning_rate": 7.711315989874677e-05,
"loss": 0.5067,
"step": 53
},
{
"epoch": 1.0591133004926108,
"grad_norm": 0.03469577059149742,
"learning_rate": 7.621536417786159e-05,
"loss": 0.4538,
"step": 54
},
{
"epoch": 1.0788177339901477,
"grad_norm": 0.037578266113996506,
"learning_rate": 7.530575666193283e-05,
"loss": 0.5154,
"step": 55
},
{
"epoch": 1.0985221674876848,
"grad_norm": 0.03396238014101982,
"learning_rate": 7.438474719068173e-05,
"loss": 0.5055,
"step": 56
},
{
"epoch": 1.1182266009852218,
"grad_norm": 0.03608660399913788,
"learning_rate": 7.345275074118185e-05,
"loss": 0.504,
"step": 57
},
{
"epoch": 1.1379310344827587,
"grad_norm": 0.03690114617347717,
"learning_rate": 7.251018724088367e-05,
"loss": 0.4966,
"step": 58
},
{
"epoch": 1.1576354679802956,
"grad_norm": 0.03447360172867775,
"learning_rate": 7.155748137840892e-05,
"loss": 0.423,
"step": 59
},
{
"epoch": 1.1773399014778325,
"grad_norm": 0.03337632864713669,
"learning_rate": 7.059506241219965e-05,
"loss": 0.4384,
"step": 60
},
{
"epoch": 1.1970443349753694,
"grad_norm": 0.0376424603164196,
"learning_rate": 6.962336397710819e-05,
"loss": 0.4634,
"step": 61
},
{
"epoch": 1.2167487684729064,
"grad_norm": 0.03872967138886452,
"learning_rate": 6.864282388901544e-05,
"loss": 0.479,
"step": 62
},
{
"epoch": 1.2364532019704433,
"grad_norm": 0.03349655494093895,
"learning_rate": 6.765388394756504e-05,
"loss": 0.4806,
"step": 63
},
{
"epoch": 1.2561576354679804,
"grad_norm": 0.03854072839021683,
"learning_rate": 6.665698973710288e-05,
"loss": 0.4855,
"step": 64
},
{
"epoch": 1.2758620689655173,
"grad_norm": 0.038142092525959015,
"learning_rate": 6.565259042591113e-05,
"loss": 0.4959,
"step": 65
},
{
"epoch": 1.2955665024630543,
"grad_norm": 0.033218443393707275,
"learning_rate": 6.464113856382752e-05,
"loss": 0.4372,
"step": 66
},
{
"epoch": 1.3152709359605912,
"grad_norm": 0.0347665473818779,
"learning_rate": 6.362308987834115e-05,
"loss": 0.4622,
"step": 67
},
{
"epoch": 1.3349753694581281,
"grad_norm": 0.03334619104862213,
"learning_rate": 6.259890306925627e-05,
"loss": 0.4679,
"step": 68
},
{
"epoch": 1.354679802955665,
"grad_norm": 0.036229487508535385,
"learning_rate": 6.156903960201709e-05,
"loss": 0.5257,
"step": 69
},
{
"epoch": 1.374384236453202,
"grad_norm": 0.03317816182971001,
"learning_rate": 6.0533963499786314e-05,
"loss": 0.3979,
"step": 70
},
{
"epoch": 1.3940886699507389,
"grad_norm": 0.036007072776556015,
"learning_rate": 5.949414113437142e-05,
"loss": 0.4719,
"step": 71
},
{
"epoch": 1.4137931034482758,
"grad_norm": 0.03284362331032753,
"learning_rate": 5.8450041016092464e-05,
"loss": 0.6041,
"step": 72
},
{
"epoch": 1.4334975369458127,
"grad_norm": 0.036986593157052994,
"learning_rate": 5.7402133582686576e-05,
"loss": 0.4465,
"step": 73
},
{
"epoch": 1.4532019704433496,
"grad_norm": 0.033378396183252335,
"learning_rate": 5.6350890987343944e-05,
"loss": 0.3744,
"step": 74
},
{
"epoch": 1.4729064039408866,
"grad_norm": 0.035679418593645096,
"learning_rate": 5.5296786885970805e-05,
"loss": 0.4936,
"step": 75
},
{
"epoch": 1.4926108374384237,
"grad_norm": 0.04116353765130043,
"learning_rate": 5.4240296223775465e-05,
"loss": 0.4617,
"step": 76
},
{
"epoch": 1.5123152709359606,
"grad_norm": 0.037939801812171936,
"learning_rate": 5.318189502127332e-05,
"loss": 0.4325,
"step": 77
},
{
"epoch": 1.5320197044334976,
"grad_norm": 0.03347817435860634,
"learning_rate": 5.212206015980742e-05,
"loss": 0.4753,
"step": 78
},
{
"epoch": 1.5517241379310345,
"grad_norm": 0.03652183711528778,
"learning_rate": 5.1061269166681183e-05,
"loss": 0.4635,
"step": 79
},
{
"epoch": 1.5714285714285714,
"grad_norm": 0.03619343787431717,
"learning_rate": 5e-05,
"loss": 0.4309,
"step": 80
},
{
"epoch": 1.5911330049261085,
"grad_norm": 0.03413229063153267,
"learning_rate": 4.893873083331882e-05,
"loss": 0.5164,
"step": 81
},
{
"epoch": 1.6108374384236455,
"grad_norm": 0.03500941023230553,
"learning_rate": 4.78779398401926e-05,
"loss": 0.5154,
"step": 82
},
{
"epoch": 1.6305418719211824,
"grad_norm": 0.03602500632405281,
"learning_rate": 4.6818104978726685e-05,
"loss": 0.4763,
"step": 83
},
{
"epoch": 1.6502463054187193,
"grad_norm": 0.03640425205230713,
"learning_rate": 4.575970377622456e-05,
"loss": 0.4542,
"step": 84
},
{
"epoch": 1.6699507389162562,
"grad_norm": 0.040320053696632385,
"learning_rate": 4.47032131140292e-05,
"loss": 0.4816,
"step": 85
},
{
"epoch": 1.6896551724137931,
"grad_norm": 0.03946779668331146,
"learning_rate": 4.364910901265606e-05,
"loss": 0.4859,
"step": 86
},
{
"epoch": 1.70935960591133,
"grad_norm": 0.03397982567548752,
"learning_rate": 4.2597866417313436e-05,
"loss": 0.4254,
"step": 87
},
{
"epoch": 1.729064039408867,
"grad_norm": 0.036678120493888855,
"learning_rate": 4.1549958983907555e-05,
"loss": 0.5527,
"step": 88
},
{
"epoch": 1.748768472906404,
"grad_norm": 0.03638288751244545,
"learning_rate": 4.050585886562858e-05,
"loss": 0.4685,
"step": 89
},
{
"epoch": 1.7684729064039408,
"grad_norm": 0.03590947762131691,
"learning_rate": 3.94660365002137e-05,
"loss": 0.4806,
"step": 90
},
{
"epoch": 1.7881773399014778,
"grad_norm": 0.033141493797302246,
"learning_rate": 3.843096039798293e-05,
"loss": 0.4107,
"step": 91
},
{
"epoch": 1.8078817733990147,
"grad_norm": 0.03716498613357544,
"learning_rate": 3.740109693074375e-05,
"loss": 0.4921,
"step": 92
},
{
"epoch": 1.8275862068965516,
"grad_norm": 0.035985782742500305,
"learning_rate": 3.637691012165886e-05,
"loss": 0.4797,
"step": 93
},
{
"epoch": 1.8472906403940885,
"grad_norm": 0.03812502697110176,
"learning_rate": 3.5358861436172485e-05,
"loss": 0.472,
"step": 94
},
{
"epoch": 1.8669950738916257,
"grad_norm": 0.03597032278776169,
"learning_rate": 3.434740957408889e-05,
"loss": 0.4826,
"step": 95
},
{
"epoch": 1.8866995073891626,
"grad_norm": 0.03627209737896919,
"learning_rate": 3.334301026289712e-05,
"loss": 0.4248,
"step": 96
},
{
"epoch": 1.9064039408866995,
"grad_norm": 0.039599835872650146,
"learning_rate": 3.234611605243496e-05,
"loss": 0.4588,
"step": 97
},
{
"epoch": 1.9261083743842364,
"grad_norm": 0.03590153157711029,
"learning_rate": 3.135717611098458e-05,
"loss": 0.5118,
"step": 98
},
{
"epoch": 1.9458128078817734,
"grad_norm": 0.035810086876153946,
"learning_rate": 3.0376636022891812e-05,
"loss": 0.4591,
"step": 99
},
{
"epoch": 1.9655172413793105,
"grad_norm": 0.035441234707832336,
"learning_rate": 2.9404937587800375e-05,
"loss": 0.4391,
"step": 100
}
],
"logging_steps": 1,
"max_steps": 153,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.743655227968717e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}