ARABIC_poetry3 / trainer_state.json
iko-01's picture
best?
af33739 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.0,
"eval_steps": 500,
"global_step": 6902,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2028397565922921,
"grad_norm": 5.140639781951904,
"learning_rate": 4.8744929006085194e-05,
"loss": 4.9269,
"step": 100
},
{
"epoch": 0.4056795131845842,
"grad_norm": 4.1806511878967285,
"learning_rate": 4.747718052738337e-05,
"loss": 2.5104,
"step": 200
},
{
"epoch": 0.6085192697768763,
"grad_norm": 5.7575225830078125,
"learning_rate": 4.6209432048681544e-05,
"loss": 2.3722,
"step": 300
},
{
"epoch": 0.8113590263691683,
"grad_norm": 4.265388011932373,
"learning_rate": 4.494168356997972e-05,
"loss": 2.2421,
"step": 400
},
{
"epoch": 1.0141987829614605,
"grad_norm": 4.239109039306641,
"learning_rate": 4.367393509127789e-05,
"loss": 2.1943,
"step": 500
},
{
"epoch": 1.2170385395537526,
"grad_norm": 3.487968921661377,
"learning_rate": 4.2406186612576066e-05,
"loss": 1.9779,
"step": 600
},
{
"epoch": 1.4198782961460445,
"grad_norm": 3.78326678276062,
"learning_rate": 4.113843813387424e-05,
"loss": 1.9127,
"step": 700
},
{
"epoch": 1.6227180527383367,
"grad_norm": 3.727482318878174,
"learning_rate": 3.9870689655172416e-05,
"loss": 1.8781,
"step": 800
},
{
"epoch": 1.8255578093306288,
"grad_norm": 3.8917086124420166,
"learning_rate": 3.8602941176470595e-05,
"loss": 1.8345,
"step": 900
},
{
"epoch": 2.028397565922921,
"grad_norm": 3.717226028442383,
"learning_rate": 3.733519269776877e-05,
"loss": 1.8036,
"step": 1000
},
{
"epoch": 2.231237322515213,
"grad_norm": 3.685636043548584,
"learning_rate": 3.606744421906694e-05,
"loss": 1.6346,
"step": 1100
},
{
"epoch": 2.4340770791075053,
"grad_norm": 3.44212007522583,
"learning_rate": 3.479969574036511e-05,
"loss": 1.6445,
"step": 1200
},
{
"epoch": 2.636916835699797,
"grad_norm": 5.615281105041504,
"learning_rate": 3.353194726166329e-05,
"loss": 1.6432,
"step": 1300
},
{
"epoch": 2.839756592292089,
"grad_norm": 3.544981002807617,
"learning_rate": 3.226419878296146e-05,
"loss": 1.6753,
"step": 1400
},
{
"epoch": 3.0425963488843815,
"grad_norm": 7.216954708099365,
"learning_rate": 3.099645030425964e-05,
"loss": 1.5979,
"step": 1500
},
{
"epoch": 3.2454361054766734,
"grad_norm": 5.732909202575684,
"learning_rate": 2.9728701825557807e-05,
"loss": 1.4675,
"step": 1600
},
{
"epoch": 3.4482758620689653,
"grad_norm": 4.3851847648620605,
"learning_rate": 2.8460953346855983e-05,
"loss": 1.4753,
"step": 1700
},
{
"epoch": 3.6511156186612577,
"grad_norm": 4.533299446105957,
"learning_rate": 2.719320486815416e-05,
"loss": 1.4698,
"step": 1800
},
{
"epoch": 3.8539553752535496,
"grad_norm": 4.276126384735107,
"learning_rate": 2.5925456389452336e-05,
"loss": 1.4802,
"step": 1900
},
{
"epoch": 4.056795131845842,
"grad_norm": 6.0317912101745605,
"learning_rate": 2.4657707910750508e-05,
"loss": 1.4538,
"step": 2000
},
{
"epoch": 4.259634888438134,
"grad_norm": 5.376238822937012,
"learning_rate": 2.3389959432048683e-05,
"loss": 1.3555,
"step": 2100
},
{
"epoch": 4.462474645030426,
"grad_norm": 3.486006259918213,
"learning_rate": 2.2122210953346855e-05,
"loss": 1.3671,
"step": 2200
},
{
"epoch": 4.665314401622718,
"grad_norm": 5.000717639923096,
"learning_rate": 2.085446247464503e-05,
"loss": 1.3378,
"step": 2300
},
{
"epoch": 4.8681541582150105,
"grad_norm": 4.060827732086182,
"learning_rate": 1.9586713995943205e-05,
"loss": 1.3609,
"step": 2400
},
{
"epoch": 5.070993914807302,
"grad_norm": 6.573349475860596,
"learning_rate": 1.831896551724138e-05,
"loss": 1.3327,
"step": 2500
},
{
"epoch": 5.273833671399594,
"grad_norm": 3.522782802581787,
"learning_rate": 1.7051217038539555e-05,
"loss": 1.2689,
"step": 2600
},
{
"epoch": 5.476673427991886,
"grad_norm": 4.277068138122559,
"learning_rate": 1.578346855983773e-05,
"loss": 1.2501,
"step": 2700
},
{
"epoch": 5.679513184584178,
"grad_norm": 3.7447307109832764,
"learning_rate": 1.4515720081135902e-05,
"loss": 1.2406,
"step": 2800
},
{
"epoch": 5.882352941176471,
"grad_norm": 3.9506561756134033,
"learning_rate": 1.3247971602434079e-05,
"loss": 1.2618,
"step": 2900
},
{
"epoch": 6.085192697768763,
"grad_norm": 7.182559490203857,
"learning_rate": 1.1980223123732253e-05,
"loss": 1.2339,
"step": 3000
},
{
"epoch": 6.288032454361055,
"grad_norm": 5.909596920013428,
"learning_rate": 1.0712474645030426e-05,
"loss": 1.1905,
"step": 3100
},
{
"epoch": 6.490872210953347,
"grad_norm": 5.5547194480896,
"learning_rate": 9.444726166328601e-06,
"loss": 1.1982,
"step": 3200
},
{
"epoch": 6.693711967545639,
"grad_norm": 4.923269271850586,
"learning_rate": 8.176977687626776e-06,
"loss": 1.1994,
"step": 3300
},
{
"epoch": 6.896551724137931,
"grad_norm": 4.390909194946289,
"learning_rate": 6.90922920892495e-06,
"loss": 1.2089,
"step": 3400
},
{
"epoch": 7.099391480730223,
"grad_norm": 4.504344940185547,
"learning_rate": 5.641480730223124e-06,
"loss": 1.1866,
"step": 3500
},
{
"epoch": 7.302231237322515,
"grad_norm": 4.870874404907227,
"learning_rate": 4.373732251521298e-06,
"loss": 1.1573,
"step": 3600
},
{
"epoch": 7.505070993914807,
"grad_norm": 4.071926593780518,
"learning_rate": 3.1059837728194726e-06,
"loss": 1.1305,
"step": 3700
},
{
"epoch": 7.707910750507099,
"grad_norm": 7.0253071784973145,
"learning_rate": 1.8382352941176471e-06,
"loss": 1.1313,
"step": 3800
},
{
"epoch": 7.910750507099391,
"grad_norm": 6.504556179046631,
"learning_rate": 5.704868154158215e-07,
"loss": 1.1481,
"step": 3900
},
{
"epoch": 8.113590263691684,
"grad_norm": 3.3532488346099854,
"learning_rate": 2.1030136192408e-05,
"loss": 1.1296,
"step": 4000
},
{
"epoch": 8.316430020283976,
"grad_norm": 5.533351898193359,
"learning_rate": 2.0305708490292668e-05,
"loss": 1.1625,
"step": 4100
},
{
"epoch": 8.519269776876268,
"grad_norm": 4.058775901794434,
"learning_rate": 1.958128078817734e-05,
"loss": 1.1552,
"step": 4200
},
{
"epoch": 8.72210953346856,
"grad_norm": 4.454073429107666,
"learning_rate": 1.8856853086062014e-05,
"loss": 1.184,
"step": 4300
},
{
"epoch": 8.924949290060852,
"grad_norm": 4.909153461456299,
"learning_rate": 1.8132425383946684e-05,
"loss": 1.202,
"step": 4400
},
{
"epoch": 9.127789046653144,
"grad_norm": 5.935628414154053,
"learning_rate": 1.7407997681831353e-05,
"loss": 1.1225,
"step": 4500
},
{
"epoch": 9.330628803245435,
"grad_norm": 3.5767104625701904,
"learning_rate": 1.6683569979716023e-05,
"loss": 1.0954,
"step": 4600
},
{
"epoch": 9.53346855983773,
"grad_norm": 4.276003360748291,
"learning_rate": 1.5959142277600696e-05,
"loss": 1.1231,
"step": 4700
},
{
"epoch": 9.736308316430021,
"grad_norm": 4.796773433685303,
"learning_rate": 1.5234714575485367e-05,
"loss": 1.0816,
"step": 4800
},
{
"epoch": 9.939148073022313,
"grad_norm": 5.43841028213501,
"learning_rate": 1.4510286873370038e-05,
"loss": 1.1061,
"step": 4900
},
{
"epoch": 10.141987829614605,
"grad_norm": 6.818022727966309,
"learning_rate": 1.378585917125471e-05,
"loss": 1.0616,
"step": 5000
},
{
"epoch": 10.344827586206897,
"grad_norm": 6.812931060791016,
"learning_rate": 1.3061431469139382e-05,
"loss": 1.0512,
"step": 5100
},
{
"epoch": 10.547667342799189,
"grad_norm": 7.301694393157959,
"learning_rate": 1.233700376702405e-05,
"loss": 1.0662,
"step": 5200
},
{
"epoch": 10.75050709939148,
"grad_norm": 4.213596820831299,
"learning_rate": 1.1612576064908723e-05,
"loss": 1.0638,
"step": 5300
},
{
"epoch": 10.953346855983773,
"grad_norm": 4.717465400695801,
"learning_rate": 1.0888148362793393e-05,
"loss": 1.0559,
"step": 5400
},
{
"epoch": 11.156186612576064,
"grad_norm": 5.471846103668213,
"learning_rate": 1.0163720660678066e-05,
"loss": 1.0143,
"step": 5500
},
{
"epoch": 11.359026369168356,
"grad_norm": 5.00101375579834,
"learning_rate": 9.439292958562735e-06,
"loss": 1.0048,
"step": 5600
},
{
"epoch": 11.561866125760648,
"grad_norm": 4.686493396759033,
"learning_rate": 8.714865256447408e-06,
"loss": 1.0124,
"step": 5700
},
{
"epoch": 11.764705882352942,
"grad_norm": 8.13379955291748,
"learning_rate": 7.990437554332078e-06,
"loss": 1.0094,
"step": 5800
},
{
"epoch": 11.967545638945234,
"grad_norm": 5.050856590270996,
"learning_rate": 7.266009852216748e-06,
"loss": 1.0281,
"step": 5900
},
{
"epoch": 12.170385395537526,
"grad_norm": 6.1310319900512695,
"learning_rate": 6.54158215010142e-06,
"loss": 0.9902,
"step": 6000
},
{
"epoch": 12.373225152129818,
"grad_norm": 4.342954635620117,
"learning_rate": 5.817154447986091e-06,
"loss": 0.9752,
"step": 6100
},
{
"epoch": 12.57606490872211,
"grad_norm": 4.584819316864014,
"learning_rate": 5.092726745870762e-06,
"loss": 1.0054,
"step": 6200
},
{
"epoch": 12.778904665314402,
"grad_norm": 4.735635757446289,
"learning_rate": 4.368299043755433e-06,
"loss": 0.9743,
"step": 6300
},
{
"epoch": 12.981744421906694,
"grad_norm": 6.229862213134766,
"learning_rate": 3.6438713416401046e-06,
"loss": 0.9504,
"step": 6400
},
{
"epoch": 13.184584178498985,
"grad_norm": 5.926666736602783,
"learning_rate": 2.919443639524776e-06,
"loss": 0.9525,
"step": 6500
},
{
"epoch": 13.387423935091277,
"grad_norm": 5.178487777709961,
"learning_rate": 2.1950159374094467e-06,
"loss": 0.9682,
"step": 6600
},
{
"epoch": 13.59026369168357,
"grad_norm": 4.677447319030762,
"learning_rate": 1.4705882352941177e-06,
"loss": 0.9458,
"step": 6700
},
{
"epoch": 13.793103448275861,
"grad_norm": 6.6742963790893555,
"learning_rate": 7.461605331787888e-07,
"loss": 0.9402,
"step": 6800
},
{
"epoch": 13.995943204868155,
"grad_norm": 4.439788341522217,
"learning_rate": 2.1732831063459866e-08,
"loss": 0.9464,
"step": 6900
}
],
"logging_steps": 100,
"max_steps": 6902,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1060236819072000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}