Agri_train / trainer_state.json
grohitraj's picture
Upload folder using huggingface_hub
4eee32a verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 590,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0851063829787234,
"grad_norm": 1.7628610134124756,
"learning_rate": 8e-05,
"loss": 1.8622,
"step": 5
},
{
"epoch": 0.1702127659574468,
"grad_norm": 1.8386660814285278,
"learning_rate": 0.00018,
"loss": 0.9707,
"step": 10
},
{
"epoch": 0.2553191489361702,
"grad_norm": 1.295538067817688,
"learning_rate": 0.00019862068965517243,
"loss": 0.5712,
"step": 15
},
{
"epoch": 0.3404255319148936,
"grad_norm": 0.5233324766159058,
"learning_rate": 0.00019689655172413795,
"loss": 0.4687,
"step": 20
},
{
"epoch": 0.425531914893617,
"grad_norm": 0.7628127336502075,
"learning_rate": 0.00019517241379310345,
"loss": 0.5601,
"step": 25
},
{
"epoch": 0.5106382978723404,
"grad_norm": 0.5182906985282898,
"learning_rate": 0.00019344827586206898,
"loss": 0.4466,
"step": 30
},
{
"epoch": 0.5957446808510638,
"grad_norm": 0.3616418242454529,
"learning_rate": 0.0001917241379310345,
"loss": 0.5812,
"step": 35
},
{
"epoch": 0.6808510638297872,
"grad_norm": 0.48968932032585144,
"learning_rate": 0.00019,
"loss": 0.3477,
"step": 40
},
{
"epoch": 0.7659574468085106,
"grad_norm": 0.8994768261909485,
"learning_rate": 0.00018827586206896554,
"loss": 0.3383,
"step": 45
},
{
"epoch": 0.851063829787234,
"grad_norm": 0.387116402387619,
"learning_rate": 0.00018655172413793104,
"loss": 0.4003,
"step": 50
},
{
"epoch": 0.9361702127659575,
"grad_norm": 0.8058916926383972,
"learning_rate": 0.00018482758620689654,
"loss": 0.3992,
"step": 55
},
{
"epoch": 1.0170212765957447,
"grad_norm": 0.4688344895839691,
"learning_rate": 0.00018310344827586207,
"loss": 0.3998,
"step": 60
},
{
"epoch": 1.102127659574468,
"grad_norm": 0.5960966944694519,
"learning_rate": 0.0001813793103448276,
"loss": 0.2818,
"step": 65
},
{
"epoch": 1.1872340425531915,
"grad_norm": 0.49853241443634033,
"learning_rate": 0.0001796551724137931,
"loss": 0.2186,
"step": 70
},
{
"epoch": 1.2723404255319148,
"grad_norm": 0.521017849445343,
"learning_rate": 0.00017793103448275862,
"loss": 0.322,
"step": 75
},
{
"epoch": 1.3574468085106384,
"grad_norm": 0.35342177748680115,
"learning_rate": 0.00017620689655172415,
"loss": 0.1905,
"step": 80
},
{
"epoch": 1.4425531914893617,
"grad_norm": 0.37390947341918945,
"learning_rate": 0.00017448275862068965,
"loss": 0.2582,
"step": 85
},
{
"epoch": 1.527659574468085,
"grad_norm": 0.42308491468429565,
"learning_rate": 0.00017275862068965518,
"loss": 0.2337,
"step": 90
},
{
"epoch": 1.6127659574468085,
"grad_norm": 0.8888659477233887,
"learning_rate": 0.0001710344827586207,
"loss": 0.2465,
"step": 95
},
{
"epoch": 1.697872340425532,
"grad_norm": 0.40344128012657166,
"learning_rate": 0.0001693103448275862,
"loss": 0.2591,
"step": 100
},
{
"epoch": 1.7829787234042553,
"grad_norm": 0.45671504735946655,
"learning_rate": 0.00016758620689655173,
"loss": 0.2269,
"step": 105
},
{
"epoch": 1.8680851063829786,
"grad_norm": 0.4616524577140808,
"learning_rate": 0.00016586206896551726,
"loss": 0.2033,
"step": 110
},
{
"epoch": 1.9531914893617022,
"grad_norm": 0.4471171200275421,
"learning_rate": 0.00016413793103448276,
"loss": 0.266,
"step": 115
},
{
"epoch": 2.0340425531914894,
"grad_norm": 0.7043401002883911,
"learning_rate": 0.0001624137931034483,
"loss": 0.1607,
"step": 120
},
{
"epoch": 2.119148936170213,
"grad_norm": 0.4361288845539093,
"learning_rate": 0.00016068965517241382,
"loss": 0.1493,
"step": 125
},
{
"epoch": 2.204255319148936,
"grad_norm": 0.45557570457458496,
"learning_rate": 0.00015896551724137932,
"loss": 0.1011,
"step": 130
},
{
"epoch": 2.2893617021276595,
"grad_norm": 0.3149248957633972,
"learning_rate": 0.00015724137931034485,
"loss": 0.1112,
"step": 135
},
{
"epoch": 2.374468085106383,
"grad_norm": 0.2783677279949188,
"learning_rate": 0.00015551724137931037,
"loss": 0.1278,
"step": 140
},
{
"epoch": 2.4595744680851066,
"grad_norm": 0.46953991055488586,
"learning_rate": 0.00015379310344827587,
"loss": 0.1157,
"step": 145
},
{
"epoch": 2.5446808510638297,
"grad_norm": 0.5183996558189392,
"learning_rate": 0.0001520689655172414,
"loss": 0.107,
"step": 150
},
{
"epoch": 2.629787234042553,
"grad_norm": 0.46928611397743225,
"learning_rate": 0.0001503448275862069,
"loss": 0.12,
"step": 155
},
{
"epoch": 2.7148936170212767,
"grad_norm": 0.4748971462249756,
"learning_rate": 0.00014862068965517243,
"loss": 0.1406,
"step": 160
},
{
"epoch": 2.8,
"grad_norm": 0.7183696627616882,
"learning_rate": 0.00014689655172413793,
"loss": 0.1533,
"step": 165
},
{
"epoch": 2.8851063829787233,
"grad_norm": 0.575391948223114,
"learning_rate": 0.00014517241379310346,
"loss": 0.1365,
"step": 170
},
{
"epoch": 2.970212765957447,
"grad_norm": 0.33236995339393616,
"learning_rate": 0.00014344827586206896,
"loss": 0.1369,
"step": 175
},
{
"epoch": 3.051063829787234,
"grad_norm": 0.17017032206058502,
"learning_rate": 0.0001417241379310345,
"loss": 0.093,
"step": 180
},
{
"epoch": 3.1361702127659576,
"grad_norm": 0.8167087435722351,
"learning_rate": 0.00014,
"loss": 0.1297,
"step": 185
},
{
"epoch": 3.2212765957446807,
"grad_norm": 0.4695407748222351,
"learning_rate": 0.00013827586206896552,
"loss": 0.0794,
"step": 190
},
{
"epoch": 3.3063829787234043,
"grad_norm": 0.46323472261428833,
"learning_rate": 0.00013655172413793104,
"loss": 0.0909,
"step": 195
},
{
"epoch": 3.391489361702128,
"grad_norm": 0.4290667772293091,
"learning_rate": 0.00013482758620689654,
"loss": 0.0728,
"step": 200
},
{
"epoch": 3.476595744680851,
"grad_norm": 0.3602962791919708,
"learning_rate": 0.00013310344827586207,
"loss": 0.0713,
"step": 205
},
{
"epoch": 3.5617021276595744,
"grad_norm": 0.3129134476184845,
"learning_rate": 0.0001313793103448276,
"loss": 0.0878,
"step": 210
},
{
"epoch": 3.646808510638298,
"grad_norm": 0.4205905795097351,
"learning_rate": 0.0001296551724137931,
"loss": 0.0815,
"step": 215
},
{
"epoch": 3.731914893617021,
"grad_norm": 0.47979313135147095,
"learning_rate": 0.00012793103448275863,
"loss": 0.0696,
"step": 220
},
{
"epoch": 3.8170212765957445,
"grad_norm": 0.4063044786453247,
"learning_rate": 0.00012620689655172415,
"loss": 0.0661,
"step": 225
},
{
"epoch": 3.902127659574468,
"grad_norm": 0.363741934299469,
"learning_rate": 0.00012448275862068966,
"loss": 0.0972,
"step": 230
},
{
"epoch": 3.9872340425531916,
"grad_norm": 0.2973483204841614,
"learning_rate": 0.00012275862068965518,
"loss": 0.0807,
"step": 235
},
{
"epoch": 4.068085106382979,
"grad_norm": 0.481488436460495,
"learning_rate": 0.00012103448275862071,
"loss": 0.0678,
"step": 240
},
{
"epoch": 4.153191489361702,
"grad_norm": 0.18169716000556946,
"learning_rate": 0.00011931034482758621,
"loss": 0.0555,
"step": 245
},
{
"epoch": 4.238297872340426,
"grad_norm": 0.3210119307041168,
"learning_rate": 0.00011758620689655173,
"loss": 0.0506,
"step": 250
},
{
"epoch": 4.323404255319149,
"grad_norm": 0.3670145273208618,
"learning_rate": 0.00011586206896551725,
"loss": 0.0685,
"step": 255
},
{
"epoch": 4.408510638297872,
"grad_norm": 0.2257293462753296,
"learning_rate": 0.00011413793103448275,
"loss": 0.0592,
"step": 260
},
{
"epoch": 4.493617021276596,
"grad_norm": 0.16985096037387848,
"learning_rate": 0.00011241379310344828,
"loss": 0.0745,
"step": 265
},
{
"epoch": 4.578723404255319,
"grad_norm": 0.31408607959747314,
"learning_rate": 0.00011068965517241381,
"loss": 0.0866,
"step": 270
},
{
"epoch": 4.663829787234042,
"grad_norm": 0.2598167657852173,
"learning_rate": 0.00010896551724137931,
"loss": 0.0494,
"step": 275
},
{
"epoch": 4.748936170212766,
"grad_norm": 0.4023381471633911,
"learning_rate": 0.00010724137931034484,
"loss": 0.0632,
"step": 280
},
{
"epoch": 4.834042553191489,
"grad_norm": 0.24716579914093018,
"learning_rate": 0.00010551724137931037,
"loss": 0.0499,
"step": 285
},
{
"epoch": 4.919148936170213,
"grad_norm": 0.39397132396698,
"learning_rate": 0.00010379310344827587,
"loss": 0.0575,
"step": 290
},
{
"epoch": 5.0,
"grad_norm": 0.09666766971349716,
"learning_rate": 0.0001020689655172414,
"loss": 0.0609,
"step": 295
},
{
"epoch": 5.085106382978723,
"grad_norm": 0.1626937985420227,
"learning_rate": 0.0001003448275862069,
"loss": 0.0455,
"step": 300
},
{
"epoch": 5.170212765957447,
"grad_norm": 0.2916521728038788,
"learning_rate": 9.862068965517242e-05,
"loss": 0.0437,
"step": 305
},
{
"epoch": 5.25531914893617,
"grad_norm": 0.3213741183280945,
"learning_rate": 9.689655172413794e-05,
"loss": 0.0509,
"step": 310
},
{
"epoch": 5.340425531914893,
"grad_norm": 0.25438761711120605,
"learning_rate": 9.517241379310345e-05,
"loss": 0.0483,
"step": 315
},
{
"epoch": 5.425531914893617,
"grad_norm": 0.35173889994621277,
"learning_rate": 9.344827586206896e-05,
"loss": 0.0394,
"step": 320
},
{
"epoch": 5.51063829787234,
"grad_norm": 0.2214491218328476,
"learning_rate": 9.172413793103448e-05,
"loss": 0.0615,
"step": 325
},
{
"epoch": 5.595744680851064,
"grad_norm": 0.1461336314678192,
"learning_rate": 9e-05,
"loss": 0.0552,
"step": 330
},
{
"epoch": 5.680851063829787,
"grad_norm": 0.2651642858982086,
"learning_rate": 8.827586206896552e-05,
"loss": 0.0417,
"step": 335
},
{
"epoch": 5.76595744680851,
"grad_norm": 0.3055964708328247,
"learning_rate": 8.655172413793103e-05,
"loss": 0.0502,
"step": 340
},
{
"epoch": 5.851063829787234,
"grad_norm": 0.19299010932445526,
"learning_rate": 8.482758620689656e-05,
"loss": 0.044,
"step": 345
},
{
"epoch": 5.9361702127659575,
"grad_norm": 0.120607890188694,
"learning_rate": 8.310344827586208e-05,
"loss": 0.0407,
"step": 350
},
{
"epoch": 6.017021276595744,
"grad_norm": 0.13520629703998566,
"learning_rate": 8.137931034482759e-05,
"loss": 0.0512,
"step": 355
},
{
"epoch": 6.102127659574468,
"grad_norm": 0.20564086735248566,
"learning_rate": 7.965517241379312e-05,
"loss": 0.0416,
"step": 360
},
{
"epoch": 6.187234042553191,
"grad_norm": 0.115450419485569,
"learning_rate": 7.793103448275862e-05,
"loss": 0.0424,
"step": 365
},
{
"epoch": 6.272340425531915,
"grad_norm": 0.1926778256893158,
"learning_rate": 7.620689655172413e-05,
"loss": 0.0378,
"step": 370
},
{
"epoch": 6.357446808510638,
"grad_norm": 0.25700807571411133,
"learning_rate": 7.448275862068966e-05,
"loss": 0.0453,
"step": 375
},
{
"epoch": 6.4425531914893615,
"grad_norm": 0.10704706609249115,
"learning_rate": 7.275862068965517e-05,
"loss": 0.0323,
"step": 380
},
{
"epoch": 6.527659574468085,
"grad_norm": 0.18126443028450012,
"learning_rate": 7.103448275862069e-05,
"loss": 0.0393,
"step": 385
},
{
"epoch": 6.6127659574468085,
"grad_norm": 0.13170169293880463,
"learning_rate": 6.931034482758622e-05,
"loss": 0.0458,
"step": 390
},
{
"epoch": 6.697872340425532,
"grad_norm": 0.10165558010339737,
"learning_rate": 6.758620689655173e-05,
"loss": 0.0359,
"step": 395
},
{
"epoch": 6.782978723404256,
"grad_norm": 0.20285525918006897,
"learning_rate": 6.586206896551724e-05,
"loss": 0.0411,
"step": 400
},
{
"epoch": 6.868085106382979,
"grad_norm": 0.17869718372821808,
"learning_rate": 6.413793103448276e-05,
"loss": 0.0454,
"step": 405
},
{
"epoch": 6.953191489361702,
"grad_norm": 0.1431434452533722,
"learning_rate": 6.241379310344829e-05,
"loss": 0.041,
"step": 410
},
{
"epoch": 7.034042553191489,
"grad_norm": 0.1481233537197113,
"learning_rate": 6.068965517241379e-05,
"loss": 0.0505,
"step": 415
},
{
"epoch": 7.1191489361702125,
"grad_norm": 0.10545721650123596,
"learning_rate": 5.896551724137931e-05,
"loss": 0.0361,
"step": 420
},
{
"epoch": 7.2042553191489365,
"grad_norm": 0.13733087480068207,
"learning_rate": 5.7241379310344835e-05,
"loss": 0.0361,
"step": 425
},
{
"epoch": 7.2893617021276595,
"grad_norm": 0.09701387584209442,
"learning_rate": 5.551724137931035e-05,
"loss": 0.0363,
"step": 430
},
{
"epoch": 7.374468085106383,
"grad_norm": 0.12478837370872498,
"learning_rate": 5.379310344827586e-05,
"loss": 0.0385,
"step": 435
},
{
"epoch": 7.459574468085107,
"grad_norm": 0.18108202517032623,
"learning_rate": 5.2068965517241384e-05,
"loss": 0.0307,
"step": 440
},
{
"epoch": 7.54468085106383,
"grad_norm": 0.1741178184747696,
"learning_rate": 5.03448275862069e-05,
"loss": 0.0349,
"step": 445
},
{
"epoch": 7.629787234042553,
"grad_norm": 0.18830710649490356,
"learning_rate": 4.862068965517241e-05,
"loss": 0.035,
"step": 450
},
{
"epoch": 7.714893617021277,
"grad_norm": 0.29267263412475586,
"learning_rate": 4.689655172413793e-05,
"loss": 0.0396,
"step": 455
},
{
"epoch": 7.8,
"grad_norm": 0.1248115673661232,
"learning_rate": 4.5172413793103454e-05,
"loss": 0.0417,
"step": 460
},
{
"epoch": 7.885106382978723,
"grad_norm": 0.12826257944107056,
"learning_rate": 4.344827586206897e-05,
"loss": 0.0352,
"step": 465
},
{
"epoch": 7.970212765957447,
"grad_norm": 0.10185350477695465,
"learning_rate": 4.172413793103448e-05,
"loss": 0.0383,
"step": 470
},
{
"epoch": 8.051063829787234,
"grad_norm": 0.11523312330245972,
"learning_rate": 4e-05,
"loss": 0.0372,
"step": 475
},
{
"epoch": 8.136170212765958,
"grad_norm": 0.07264875620603561,
"learning_rate": 3.827586206896552e-05,
"loss": 0.0291,
"step": 480
},
{
"epoch": 8.221276595744682,
"grad_norm": 0.1880428045988083,
"learning_rate": 3.655172413793104e-05,
"loss": 0.029,
"step": 485
},
{
"epoch": 8.306382978723404,
"grad_norm": 0.19860832393169403,
"learning_rate": 3.482758620689655e-05,
"loss": 0.0298,
"step": 490
},
{
"epoch": 8.391489361702128,
"grad_norm": 0.1690095216035843,
"learning_rate": 3.310344827586207e-05,
"loss": 0.0371,
"step": 495
},
{
"epoch": 8.476595744680852,
"grad_norm": 0.171220064163208,
"learning_rate": 3.137931034482759e-05,
"loss": 0.0294,
"step": 500
},
{
"epoch": 8.561702127659574,
"grad_norm": 0.12849068641662598,
"learning_rate": 2.96551724137931e-05,
"loss": 0.0339,
"step": 505
},
{
"epoch": 8.646808510638298,
"grad_norm": 0.1075233593583107,
"learning_rate": 2.7931034482758622e-05,
"loss": 0.0346,
"step": 510
},
{
"epoch": 8.731914893617022,
"grad_norm": 0.14006929099559784,
"learning_rate": 2.620689655172414e-05,
"loss": 0.03,
"step": 515
},
{
"epoch": 8.817021276595744,
"grad_norm": 0.1683836579322815,
"learning_rate": 2.4482758620689654e-05,
"loss": 0.0321,
"step": 520
},
{
"epoch": 8.902127659574468,
"grad_norm": 0.12975075840950012,
"learning_rate": 2.2758620689655175e-05,
"loss": 0.0384,
"step": 525
},
{
"epoch": 8.987234042553192,
"grad_norm": 0.1698654145002365,
"learning_rate": 2.1034482758620692e-05,
"loss": 0.037,
"step": 530
},
{
"epoch": 9.068085106382979,
"grad_norm": 0.1573844850063324,
"learning_rate": 1.9310344827586207e-05,
"loss": 0.0328,
"step": 535
},
{
"epoch": 9.153191489361703,
"grad_norm": 0.14986495673656464,
"learning_rate": 1.7586206896551724e-05,
"loss": 0.0313,
"step": 540
},
{
"epoch": 9.238297872340425,
"grad_norm": 0.13991227746009827,
"learning_rate": 1.586206896551724e-05,
"loss": 0.0283,
"step": 545
},
{
"epoch": 9.323404255319149,
"grad_norm": 0.12908954918384552,
"learning_rate": 1.4137931034482759e-05,
"loss": 0.0283,
"step": 550
},
{
"epoch": 9.408510638297873,
"grad_norm": 0.1915467232465744,
"learning_rate": 1.2413793103448277e-05,
"loss": 0.0233,
"step": 555
},
{
"epoch": 9.493617021276595,
"grad_norm": 0.16341650485992432,
"learning_rate": 1.0689655172413794e-05,
"loss": 0.0265,
"step": 560
},
{
"epoch": 9.578723404255319,
"grad_norm": 0.1922847330570221,
"learning_rate": 8.96551724137931e-06,
"loss": 0.0314,
"step": 565
},
{
"epoch": 9.663829787234043,
"grad_norm": 0.2411348968744278,
"learning_rate": 7.241379310344828e-06,
"loss": 0.0334,
"step": 570
},
{
"epoch": 9.748936170212765,
"grad_norm": 0.20369961857795715,
"learning_rate": 5.517241379310345e-06,
"loss": 0.0336,
"step": 575
},
{
"epoch": 9.83404255319149,
"grad_norm": 0.19569233059883118,
"learning_rate": 3.793103448275862e-06,
"loss": 0.0286,
"step": 580
},
{
"epoch": 9.919148936170213,
"grad_norm": 0.19629313051700592,
"learning_rate": 2.0689655172413796e-06,
"loss": 0.029,
"step": 585
},
{
"epoch": 10.0,
"grad_norm": 0.18082977831363678,
"learning_rate": 3.4482758620689656e-07,
"loss": 0.0283,
"step": 590
},
{
"epoch": 10.0,
"step": 590,
"total_flos": 3.2063167604911104e+16,
"train_loss": 0.1322159972483829,
"train_runtime": 1074.5919,
"train_samples_per_second": 4.374,
"train_steps_per_second": 0.549
}
],
"logging_steps": 5,
"max_steps": 590,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.2063167604911104e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}