Pegasus_Layman / trainer_state.json
harsh580g's picture
Upload folder using huggingface_hub
220ebab verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 8790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11376564277588168,
"grad_norm": 6.126019477844238,
"learning_rate": 4.9731121281464535e-05,
"loss": 2.4955,
"step": 100
},
{
"epoch": 0.22753128555176336,
"grad_norm": 3.398350954055786,
"learning_rate": 4.916475972540046e-05,
"loss": 2.1647,
"step": 200
},
{
"epoch": 0.3412969283276451,
"grad_norm": 4.663970947265625,
"learning_rate": 4.859267734553776e-05,
"loss": 2.115,
"step": 300
},
{
"epoch": 0.4550625711035267,
"grad_norm": 7.414068222045898,
"learning_rate": 4.802059496567506e-05,
"loss": 2.0582,
"step": 400
},
{
"epoch": 0.5688282138794084,
"grad_norm": 5.257181644439697,
"learning_rate": 4.744851258581236e-05,
"loss": 2.0307,
"step": 500
},
{
"epoch": 0.6825938566552902,
"grad_norm": 2.71687388420105,
"learning_rate": 4.687643020594966e-05,
"loss": 1.9912,
"step": 600
},
{
"epoch": 0.7963594994311718,
"grad_norm": 6.777807712554932,
"learning_rate": 4.630434782608696e-05,
"loss": 1.9838,
"step": 700
},
{
"epoch": 0.9101251422070534,
"grad_norm": 7.886149883270264,
"learning_rate": 4.573226544622426e-05,
"loss": 1.9829,
"step": 800
},
{
"epoch": 1.0,
"eval_gen_len": 210.01,
"eval_loss": 2.0051724910736084,
"eval_rouge1": 44.2025,
"eval_rouge2": 21.4118,
"eval_rougeL": 29.8753,
"eval_rougeLsum": 39.0475,
"eval_runtime": 123.0269,
"eval_samples_per_second": 0.813,
"eval_steps_per_second": 0.106,
"step": 879
},
{
"epoch": 1.023890784982935,
"grad_norm": 8.102137565612793,
"learning_rate": 4.516018306636156e-05,
"loss": 1.9554,
"step": 900
},
{
"epoch": 1.1376564277588168,
"grad_norm": 11.607733726501465,
"learning_rate": 4.458810068649886e-05,
"loss": 1.955,
"step": 1000
},
{
"epoch": 1.2514220705346986,
"grad_norm": 6.75238037109375,
"learning_rate": 4.401601830663616e-05,
"loss": 1.8595,
"step": 1100
},
{
"epoch": 1.36518771331058,
"grad_norm": 3.8955740928649902,
"learning_rate": 4.344393592677346e-05,
"loss": 1.8723,
"step": 1200
},
{
"epoch": 1.4789533560864618,
"grad_norm": 3.7848293781280518,
"learning_rate": 4.287757437070938e-05,
"loss": 1.8681,
"step": 1300
},
{
"epoch": 1.5927189988623436,
"grad_norm": 5.047945976257324,
"learning_rate": 4.2305491990846686e-05,
"loss": 1.8742,
"step": 1400
},
{
"epoch": 1.7064846416382253,
"grad_norm": 4.739316463470459,
"learning_rate": 4.173340961098398e-05,
"loss": 1.8819,
"step": 1500
},
{
"epoch": 1.820250284414107,
"grad_norm": 5.646695613861084,
"learning_rate": 4.1161327231121284e-05,
"loss": 1.8737,
"step": 1600
},
{
"epoch": 1.9340159271899886,
"grad_norm": 3.755201578140259,
"learning_rate": 4.0589244851258586e-05,
"loss": 1.8785,
"step": 1700
},
{
"epoch": 2.0,
"eval_gen_len": 146.16,
"eval_loss": 1.9213957786560059,
"eval_rouge1": 46.698,
"eval_rouge2": 22.1329,
"eval_rougeL": 31.017,
"eval_rougeLsum": 41.3027,
"eval_runtime": 88.692,
"eval_samples_per_second": 1.127,
"eval_steps_per_second": 0.147,
"step": 1758
},
{
"epoch": 2.04778156996587,
"grad_norm": 2.962921619415283,
"learning_rate": 4.001716247139588e-05,
"loss": 1.8208,
"step": 1800
},
{
"epoch": 2.161547212741752,
"grad_norm": 8.434884071350098,
"learning_rate": 3.9445080091533184e-05,
"loss": 1.803,
"step": 1900
},
{
"epoch": 2.2753128555176336,
"grad_norm": 9.340300559997559,
"learning_rate": 3.8872997711670486e-05,
"loss": 1.8278,
"step": 2000
},
{
"epoch": 2.3890784982935154,
"grad_norm": 3.50976300239563,
"learning_rate": 3.830091533180778e-05,
"loss": 1.8069,
"step": 2100
},
{
"epoch": 2.502844141069397,
"grad_norm": 4.721962928771973,
"learning_rate": 3.7728832951945084e-05,
"loss": 1.8098,
"step": 2200
},
{
"epoch": 2.616609783845279,
"grad_norm": 3.864901542663574,
"learning_rate": 3.715675057208238e-05,
"loss": 1.7589,
"step": 2300
},
{
"epoch": 2.73037542662116,
"grad_norm": 24.329442977905273,
"learning_rate": 3.658466819221968e-05,
"loss": 1.7621,
"step": 2400
},
{
"epoch": 2.8441410693970424,
"grad_norm": 8.203035354614258,
"learning_rate": 3.601258581235698e-05,
"loss": 1.7758,
"step": 2500
},
{
"epoch": 2.9579067121729237,
"grad_norm": 5.4326066970825195,
"learning_rate": 3.544050343249428e-05,
"loss": 1.7493,
"step": 2600
},
{
"epoch": 3.0,
"eval_gen_len": 135.9,
"eval_loss": 1.86639404296875,
"eval_rouge1": 47.237,
"eval_rouge2": 23.0343,
"eval_rougeL": 31.7155,
"eval_rougeLsum": 42.1807,
"eval_runtime": 76.7658,
"eval_samples_per_second": 1.303,
"eval_steps_per_second": 0.169,
"step": 2637
},
{
"epoch": 3.0716723549488054,
"grad_norm": 3.5585150718688965,
"learning_rate": 3.4868421052631575e-05,
"loss": 1.7432,
"step": 2700
},
{
"epoch": 3.185437997724687,
"grad_norm": 3.9954323768615723,
"learning_rate": 3.429633867276888e-05,
"loss": 1.7035,
"step": 2800
},
{
"epoch": 3.299203640500569,
"grad_norm": 4.328066825866699,
"learning_rate": 3.372425629290618e-05,
"loss": 1.7343,
"step": 2900
},
{
"epoch": 3.4129692832764507,
"grad_norm": 4.497200965881348,
"learning_rate": 3.3152173913043475e-05,
"loss": 1.7319,
"step": 3000
},
{
"epoch": 3.526734926052332,
"grad_norm": 4.723243236541748,
"learning_rate": 3.258009153318078e-05,
"loss": 1.7294,
"step": 3100
},
{
"epoch": 3.640500568828214,
"grad_norm": 6.760339260101318,
"learning_rate": 3.200800915331808e-05,
"loss": 1.7024,
"step": 3200
},
{
"epoch": 3.7542662116040955,
"grad_norm": 3.54321026802063,
"learning_rate": 3.1435926773455376e-05,
"loss": 1.7029,
"step": 3300
},
{
"epoch": 3.868031854379977,
"grad_norm": 5.660515785217285,
"learning_rate": 3.086384439359268e-05,
"loss": 1.7313,
"step": 3400
},
{
"epoch": 3.981797497155859,
"grad_norm": 6.904107570648193,
"learning_rate": 3.029176201372998e-05,
"loss": 1.6599,
"step": 3500
},
{
"epoch": 4.0,
"eval_gen_len": 133.89,
"eval_loss": 1.8406709432601929,
"eval_rouge1": 46.8883,
"eval_rouge2": 22.317,
"eval_rougeL": 30.9894,
"eval_rougeLsum": 41.5511,
"eval_runtime": 75.0208,
"eval_samples_per_second": 1.333,
"eval_steps_per_second": 0.173,
"step": 3516
},
{
"epoch": 4.09556313993174,
"grad_norm": 3.598436117172241,
"learning_rate": 2.9719679633867276e-05,
"loss": 1.6754,
"step": 3600
},
{
"epoch": 4.2093287827076225,
"grad_norm": 3.4669225215911865,
"learning_rate": 2.9147597254004578e-05,
"loss": 1.685,
"step": 3700
},
{
"epoch": 4.323094425483504,
"grad_norm": 4.923774242401123,
"learning_rate": 2.857551487414188e-05,
"loss": 1.6468,
"step": 3800
},
{
"epoch": 4.436860068259386,
"grad_norm": 5.548232078552246,
"learning_rate": 2.8009153318077803e-05,
"loss": 1.6389,
"step": 3900
},
{
"epoch": 4.550625711035267,
"grad_norm": 6.222611904144287,
"learning_rate": 2.7437070938215102e-05,
"loss": 1.6251,
"step": 4000
},
{
"epoch": 4.664391353811149,
"grad_norm": 4.012085437774658,
"learning_rate": 2.6864988558352404e-05,
"loss": 1.6708,
"step": 4100
},
{
"epoch": 4.778156996587031,
"grad_norm": 4.607513904571533,
"learning_rate": 2.62929061784897e-05,
"loss": 1.6539,
"step": 4200
},
{
"epoch": 4.891922639362912,
"grad_norm": 7.459988594055176,
"learning_rate": 2.5720823798627002e-05,
"loss": 1.6442,
"step": 4300
},
{
"epoch": 5.0,
"eval_gen_len": 130.6,
"eval_loss": 1.8186066150665283,
"eval_rouge1": 46.7324,
"eval_rouge2": 22.5522,
"eval_rougeL": 30.8932,
"eval_rougeLsum": 41.6596,
"eval_runtime": 71.9117,
"eval_samples_per_second": 1.391,
"eval_steps_per_second": 0.181,
"step": 4395
},
{
"epoch": 5.005688282138794,
"grad_norm": 3.9666900634765625,
"learning_rate": 2.5148741418764304e-05,
"loss": 1.6693,
"step": 4400
},
{
"epoch": 5.1194539249146755,
"grad_norm": 3.8383851051330566,
"learning_rate": 2.4576659038901603e-05,
"loss": 1.5846,
"step": 4500
},
{
"epoch": 5.233219567690558,
"grad_norm": 3.5106499195098877,
"learning_rate": 2.4004576659038902e-05,
"loss": 1.6043,
"step": 4600
},
{
"epoch": 5.346985210466439,
"grad_norm": 4.501423358917236,
"learning_rate": 2.3432494279176205e-05,
"loss": 1.6203,
"step": 4700
},
{
"epoch": 5.460750853242321,
"grad_norm": 3.3335440158843994,
"learning_rate": 2.2860411899313504e-05,
"loss": 1.6109,
"step": 4800
},
{
"epoch": 5.5745164960182025,
"grad_norm": 10.799994468688965,
"learning_rate": 2.2288329519450803e-05,
"loss": 1.6036,
"step": 4900
},
{
"epoch": 5.688282138794084,
"grad_norm": 3.463279962539673,
"learning_rate": 2.17162471395881e-05,
"loss": 1.6034,
"step": 5000
},
{
"epoch": 5.802047781569966,
"grad_norm": 5.357439994812012,
"learning_rate": 2.1149885583524028e-05,
"loss": 1.6073,
"step": 5100
},
{
"epoch": 5.915813424345847,
"grad_norm": 6.183532238006592,
"learning_rate": 2.0577803203661326e-05,
"loss": 1.65,
"step": 5200
},
{
"epoch": 6.0,
"eval_gen_len": 129.34,
"eval_loss": 1.804569959640503,
"eval_rouge1": 46.7244,
"eval_rouge2": 22.3848,
"eval_rougeL": 31.2658,
"eval_rougeLsum": 41.6427,
"eval_runtime": 65.0318,
"eval_samples_per_second": 1.538,
"eval_steps_per_second": 0.2,
"step": 5274
},
{
"epoch": 6.0295790671217295,
"grad_norm": 4.417309761047363,
"learning_rate": 2.000572082379863e-05,
"loss": 1.6041,
"step": 5300
},
{
"epoch": 6.143344709897611,
"grad_norm": 5.918379306793213,
"learning_rate": 1.9433638443935928e-05,
"loss": 1.5789,
"step": 5400
},
{
"epoch": 6.257110352673493,
"grad_norm": 3.504812240600586,
"learning_rate": 1.8861556064073227e-05,
"loss": 1.6221,
"step": 5500
},
{
"epoch": 6.370875995449374,
"grad_norm": 4.689468860626221,
"learning_rate": 1.828947368421053e-05,
"loss": 1.5967,
"step": 5600
},
{
"epoch": 6.484641638225256,
"grad_norm": 3.1115574836730957,
"learning_rate": 1.7717391304347828e-05,
"loss": 1.5745,
"step": 5700
},
{
"epoch": 6.598407281001138,
"grad_norm": 5.300652503967285,
"learning_rate": 1.7145308924485127e-05,
"loss": 1.5666,
"step": 5800
},
{
"epoch": 6.712172923777019,
"grad_norm": 3.5895206928253174,
"learning_rate": 1.657322654462243e-05,
"loss": 1.5531,
"step": 5900
},
{
"epoch": 6.825938566552901,
"grad_norm": 3.9184463024139404,
"learning_rate": 1.6001144164759728e-05,
"loss": 1.583,
"step": 6000
},
{
"epoch": 6.939704209328783,
"grad_norm": 4.801300048828125,
"learning_rate": 1.5429061784897027e-05,
"loss": 1.5859,
"step": 6100
},
{
"epoch": 7.0,
"eval_gen_len": 128.86,
"eval_loss": 1.7970900535583496,
"eval_rouge1": 47.0912,
"eval_rouge2": 22.2605,
"eval_rougeL": 31.1363,
"eval_rougeLsum": 41.6028,
"eval_runtime": 65.7365,
"eval_samples_per_second": 1.521,
"eval_steps_per_second": 0.198,
"step": 6153
},
{
"epoch": 7.053469852104665,
"grad_norm": 36.4532470703125,
"learning_rate": 1.4856979405034328e-05,
"loss": 1.539,
"step": 6200
},
{
"epoch": 7.167235494880546,
"grad_norm": 5.765852451324463,
"learning_rate": 1.4284897025171627e-05,
"loss": 1.5805,
"step": 6300
},
{
"epoch": 7.281001137656427,
"grad_norm": 5.349630832672119,
"learning_rate": 1.3712814645308924e-05,
"loss": 1.5332,
"step": 6400
},
{
"epoch": 7.39476678043231,
"grad_norm": 100.44608306884766,
"learning_rate": 1.3140732265446226e-05,
"loss": 1.5277,
"step": 6500
},
{
"epoch": 7.508532423208191,
"grad_norm": 3.3179357051849365,
"learning_rate": 1.2568649885583525e-05,
"loss": 1.5304,
"step": 6600
},
{
"epoch": 7.622298065984073,
"grad_norm": 5.867196083068848,
"learning_rate": 1.1996567505720824e-05,
"loss": 1.5314,
"step": 6700
},
{
"epoch": 7.736063708759954,
"grad_norm": 3.5859735012054443,
"learning_rate": 1.1424485125858125e-05,
"loss": 1.5665,
"step": 6800
},
{
"epoch": 7.849829351535837,
"grad_norm": 3.854527711868286,
"learning_rate": 1.0852402745995424e-05,
"loss": 1.5633,
"step": 6900
},
{
"epoch": 7.963594994311718,
"grad_norm": 3.261179208755493,
"learning_rate": 1.0280320366132722e-05,
"loss": 1.5773,
"step": 7000
},
{
"epoch": 8.0,
"eval_gen_len": 128.57,
"eval_loss": 1.7826117277145386,
"eval_rouge1": 47.1155,
"eval_rouge2": 22.756,
"eval_rougeL": 31.6846,
"eval_rougeLsum": 41.8634,
"eval_runtime": 68.6593,
"eval_samples_per_second": 1.456,
"eval_steps_per_second": 0.189,
"step": 7032
},
{
"epoch": 8.0773606370876,
"grad_norm": 3.9632034301757812,
"learning_rate": 9.708237986270023e-06,
"loss": 1.5591,
"step": 7100
},
{
"epoch": 8.19112627986348,
"grad_norm": 6.473509311676025,
"learning_rate": 9.136155606407324e-06,
"loss": 1.5121,
"step": 7200
},
{
"epoch": 8.304891922639364,
"grad_norm": 4.013639450073242,
"learning_rate": 8.564073226544623e-06,
"loss": 1.5342,
"step": 7300
},
{
"epoch": 8.418657565415245,
"grad_norm": 6.20673942565918,
"learning_rate": 7.991990846681922e-06,
"loss": 1.536,
"step": 7400
},
{
"epoch": 8.532423208191126,
"grad_norm": 3.9642581939697266,
"learning_rate": 7.419908466819222e-06,
"loss": 1.5527,
"step": 7500
},
{
"epoch": 8.646188850967008,
"grad_norm": 4.914712429046631,
"learning_rate": 6.847826086956521e-06,
"loss": 1.5377,
"step": 7600
},
{
"epoch": 8.759954493742889,
"grad_norm": 3.1526217460632324,
"learning_rate": 6.275743707093822e-06,
"loss": 1.5192,
"step": 7700
},
{
"epoch": 8.873720136518772,
"grad_norm": 4.830049991607666,
"learning_rate": 5.7036613272311215e-06,
"loss": 1.5166,
"step": 7800
},
{
"epoch": 8.987485779294653,
"grad_norm": 5.3716020584106445,
"learning_rate": 5.131578947368421e-06,
"loss": 1.5268,
"step": 7900
},
{
"epoch": 9.0,
"eval_gen_len": 128.39,
"eval_loss": 1.7820578813552856,
"eval_rouge1": 47.0113,
"eval_rouge2": 23.0256,
"eval_rougeL": 31.9372,
"eval_rougeLsum": 42.0294,
"eval_runtime": 68.0652,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.191,
"step": 7911
},
{
"epoch": 9.101251422070535,
"grad_norm": 5.304577350616455,
"learning_rate": 4.559496567505721e-06,
"loss": 1.5266,
"step": 8000
},
{
"epoch": 9.215017064846416,
"grad_norm": 41.307308197021484,
"learning_rate": 3.987414187643021e-06,
"loss": 1.5269,
"step": 8100
},
{
"epoch": 9.328782707622299,
"grad_norm": 3.451307773590088,
"learning_rate": 3.4153318077803206e-06,
"loss": 1.5135,
"step": 8200
},
{
"epoch": 9.44254835039818,
"grad_norm": 3.4578561782836914,
"learning_rate": 2.8432494279176204e-06,
"loss": 1.5204,
"step": 8300
},
{
"epoch": 9.556313993174061,
"grad_norm": 4.3368096351623535,
"learning_rate": 2.2711670480549198e-06,
"loss": 1.5315,
"step": 8400
},
{
"epoch": 9.670079635949943,
"grad_norm": 4.57019567489624,
"learning_rate": 1.6990846681922198e-06,
"loss": 1.5206,
"step": 8500
},
{
"epoch": 9.783845278725824,
"grad_norm": 4.861795425415039,
"learning_rate": 1.1270022883295195e-06,
"loss": 1.5193,
"step": 8600
},
{
"epoch": 9.897610921501707,
"grad_norm": 3.933220863342285,
"learning_rate": 5.549199084668192e-07,
"loss": 1.5362,
"step": 8700
},
{
"epoch": 10.0,
"eval_gen_len": 128.35,
"eval_loss": 1.7812010049819946,
"eval_rouge1": 46.8688,
"eval_rouge2": 23.0889,
"eval_rougeL": 31.9785,
"eval_rougeLsum": 41.911,
"eval_runtime": 68.0802,
"eval_samples_per_second": 1.469,
"eval_steps_per_second": 0.191,
"step": 8790
}
],
"logging_steps": 100,
"max_steps": 8790,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.031293483188224e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}