short-3 / trainer_state.json
allstax's picture
Upload folder using huggingface_hub
05f1864 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.6413997492305938,
"eval_steps": 720,
"global_step": 28800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 0.535019040107727,
"learning_rate": 9.864353971540931e-05,
"loss": 0.3127,
"step": 720
},
{
"epoch": 0.04,
"eval_bertscore": 0.7034942507743835,
"eval_loss": 0.12081495672464371,
"eval_rouge1": 0.5240594026277678,
"eval_rouge2": 0.3067757525245376,
"eval_rougeL": 0.39633186458379827,
"eval_rougeLsum": 0.39613387865495875,
"eval_runtime": 80.0774,
"eval_samples_per_second": 0.674,
"eval_steps_per_second": 0.337,
"step": 720
},
{
"epoch": 0.08,
"grad_norm": 0.42372021079063416,
"learning_rate": 9.72756806048977e-05,
"loss": 0.1212,
"step": 1440
},
{
"epoch": 0.08,
"eval_bertscore": 0.7000990509986877,
"eval_loss": 0.11524277925491333,
"eval_rouge1": 0.5216793756151761,
"eval_rouge2": 0.2934274790368596,
"eval_rougeL": 0.3886043968581182,
"eval_rougeLsum": 0.38807827948983176,
"eval_runtime": 76.9989,
"eval_samples_per_second": 0.701,
"eval_steps_per_second": 0.351,
"step": 1440
},
{
"epoch": 0.12,
"grad_norm": 0.42828959226608276,
"learning_rate": 9.590782149438608e-05,
"loss": 0.1222,
"step": 2160
},
{
"epoch": 0.12,
"eval_bertscore": 0.6457424163818359,
"eval_loss": 0.1191863939166069,
"eval_rouge1": 0.4637245182501467,
"eval_rouge2": 0.25286895940302717,
"eval_rougeL": 0.34111914002345234,
"eval_rougeLsum": 0.3407154814401842,
"eval_runtime": 103.8897,
"eval_samples_per_second": 0.52,
"eval_steps_per_second": 0.26,
"step": 2160
},
{
"epoch": 0.16,
"grad_norm": 0.44599342346191406,
"learning_rate": 9.453996238387447e-05,
"loss": 0.1172,
"step": 2880
},
{
"epoch": 0.16,
"eval_bertscore": 0.6841260194778442,
"eval_loss": 0.11394956707954407,
"eval_rouge1": 0.48661400042430136,
"eval_rouge2": 0.27298564925044316,
"eval_rougeL": 0.371859286615548,
"eval_rougeLsum": 0.37045985133751724,
"eval_runtime": 67.7701,
"eval_samples_per_second": 0.797,
"eval_steps_per_second": 0.398,
"step": 2880
},
{
"epoch": 0.21,
"grad_norm": 0.4306412935256958,
"learning_rate": 9.317210327336284e-05,
"loss": 0.1265,
"step": 3600
},
{
"epoch": 0.21,
"eval_bertscore": 0.5902894139289856,
"eval_loss": 0.1217198297381401,
"eval_rouge1": 0.3684193429088964,
"eval_rouge2": 0.19503224358227592,
"eval_rougeL": 0.29399973276202795,
"eval_rougeLsum": 0.29348115386759427,
"eval_runtime": 58.6683,
"eval_samples_per_second": 0.92,
"eval_steps_per_second": 0.46,
"step": 3600
},
{
"epoch": 0.25,
"grad_norm": 0.4569800794124603,
"learning_rate": 9.180424416285122e-05,
"loss": 0.1201,
"step": 4320
},
{
"epoch": 0.25,
"eval_bertscore": 0.6664375066757202,
"eval_loss": 0.11453992873430252,
"eval_rouge1": 0.4849966834372761,
"eval_rouge2": 0.27797369581531306,
"eval_rougeL": 0.3629296708263681,
"eval_rougeLsum": 0.3623800251579623,
"eval_runtime": 81.2927,
"eval_samples_per_second": 0.664,
"eval_steps_per_second": 0.332,
"step": 4320
},
{
"epoch": 0.29,
"grad_norm": 0.4236726462841034,
"learning_rate": 9.043638505233961e-05,
"loss": 0.1171,
"step": 5040
},
{
"epoch": 0.29,
"eval_bertscore": 0.6511832475662231,
"eval_loss": 0.1140449047088623,
"eval_rouge1": 0.4622762558490229,
"eval_rouge2": 0.2720739144786822,
"eval_rougeL": 0.35563312683133363,
"eval_rougeLsum": 0.3553109181928958,
"eval_runtime": 96.9669,
"eval_samples_per_second": 0.557,
"eval_steps_per_second": 0.278,
"step": 5040
},
{
"epoch": 0.33,
"grad_norm": 0.4153118431568146,
"learning_rate": 8.9068525941828e-05,
"loss": 0.1182,
"step": 5760
},
{
"epoch": 0.33,
"eval_bertscore": 0.7056278586387634,
"eval_loss": 0.11447593569755554,
"eval_rouge1": 0.535980541670685,
"eval_rouge2": 0.3085487182685619,
"eval_rougeL": 0.3952747177668595,
"eval_rougeLsum": 0.39487374036594847,
"eval_runtime": 78.3273,
"eval_samples_per_second": 0.689,
"eval_steps_per_second": 0.345,
"step": 5760
},
{
"epoch": 0.37,
"grad_norm": 0.30859124660491943,
"learning_rate": 8.770066683131638e-05,
"loss": 0.1161,
"step": 6480
},
{
"epoch": 0.37,
"eval_bertscore": 0.717333197593689,
"eval_loss": 0.11316747963428497,
"eval_rouge1": 0.5395779598124536,
"eval_rouge2": 0.3235379995103774,
"eval_rougeL": 0.40115447322823283,
"eval_rougeLsum": 0.40261399344054405,
"eval_runtime": 77.4572,
"eval_samples_per_second": 0.697,
"eval_steps_per_second": 0.349,
"step": 6480
},
{
"epoch": 0.41,
"grad_norm": 0.3857922852039337,
"learning_rate": 8.633280772080476e-05,
"loss": 0.1151,
"step": 7200
},
{
"epoch": 0.41,
"eval_bertscore": 0.7019616365432739,
"eval_loss": 0.11000501364469528,
"eval_rouge1": 0.5236493302484355,
"eval_rouge2": 0.3068191529551719,
"eval_rougeL": 0.4036664284755191,
"eval_rougeLsum": 0.4040689187486951,
"eval_runtime": 76.1117,
"eval_samples_per_second": 0.709,
"eval_steps_per_second": 0.355,
"step": 7200
},
{
"epoch": 0.45,
"grad_norm": 0.3432803452014923,
"learning_rate": 8.496494861029315e-05,
"loss": 0.1144,
"step": 7920
},
{
"epoch": 0.45,
"eval_bertscore": 0.7042035460472107,
"eval_loss": 0.11131834983825684,
"eval_rouge1": 0.5294383728708514,
"eval_rouge2": 0.3003098501119716,
"eval_rougeL": 0.3967273111533241,
"eval_rougeLsum": 0.39619485281011757,
"eval_runtime": 77.3811,
"eval_samples_per_second": 0.698,
"eval_steps_per_second": 0.349,
"step": 7920
},
{
"epoch": 0.49,
"grad_norm": 0.3402859568595886,
"learning_rate": 8.359708949978152e-05,
"loss": 0.1126,
"step": 8640
},
{
"epoch": 0.49,
"eval_bertscore": 0.71971595287323,
"eval_loss": 0.11067274957895279,
"eval_rouge1": 0.551885774991056,
"eval_rouge2": 0.33499475588298316,
"eval_rougeL": 0.4160407628361842,
"eval_rougeLsum": 0.4164543392695917,
"eval_runtime": 77.4902,
"eval_samples_per_second": 0.697,
"eval_steps_per_second": 0.348,
"step": 8640
},
{
"epoch": 0.53,
"grad_norm": 0.4391550123691559,
"learning_rate": 8.223113019359007e-05,
"loss": 0.1116,
"step": 9360
},
{
"epoch": 0.53,
"eval_bertscore": 0.7158631086349487,
"eval_loss": 0.1099533885717392,
"eval_rouge1": 0.5557272797557176,
"eval_rouge2": 0.332779980249166,
"eval_rougeL": 0.4155444723963883,
"eval_rougeLsum": 0.41657130732656783,
"eval_runtime": 80.0828,
"eval_samples_per_second": 0.674,
"eval_steps_per_second": 0.337,
"step": 9360
},
{
"epoch": 0.57,
"grad_norm": 0.3907322287559509,
"learning_rate": 8.086327108307846e-05,
"loss": 0.1141,
"step": 10080
},
{
"epoch": 0.57,
"eval_bertscore": 0.7130799293518066,
"eval_loss": 0.11258435994386673,
"eval_rouge1": 0.5457292777447704,
"eval_rouge2": 0.3214033358835623,
"eval_rougeL": 0.40814606110656115,
"eval_rougeLsum": 0.4086806368595041,
"eval_runtime": 73.8407,
"eval_samples_per_second": 0.731,
"eval_steps_per_second": 0.366,
"step": 10080
},
{
"epoch": 0.62,
"grad_norm": 0.38848328590393066,
"learning_rate": 7.949541197256683e-05,
"loss": 0.1132,
"step": 10800
},
{
"epoch": 0.62,
"eval_bertscore": 0.7245057225227356,
"eval_loss": 0.11034353822469711,
"eval_rouge1": 0.5603983140826179,
"eval_rouge2": 0.3445987526625777,
"eval_rougeL": 0.43423536113182604,
"eval_rougeLsum": 0.43398163455334016,
"eval_runtime": 77.732,
"eval_samples_per_second": 0.695,
"eval_steps_per_second": 0.347,
"step": 10800
},
{
"epoch": 0.66,
"grad_norm": 0.37145286798477173,
"learning_rate": 7.812945266637537e-05,
"loss": 0.112,
"step": 11520
},
{
"epoch": 0.66,
"eval_bertscore": 0.7207842469215393,
"eval_loss": 0.11157318204641342,
"eval_rouge1": 0.5554178283606811,
"eval_rouge2": 0.3317069905744905,
"eval_rougeL": 0.4209451268922738,
"eval_rougeLsum": 0.42120272115590573,
"eval_runtime": 81.6277,
"eval_samples_per_second": 0.662,
"eval_steps_per_second": 0.331,
"step": 11520
},
{
"epoch": 0.7,
"grad_norm": 0.32327908277511597,
"learning_rate": 7.676349336018391e-05,
"loss": 0.1118,
"step": 12240
},
{
"epoch": 0.7,
"eval_bertscore": 0.7193225622177124,
"eval_loss": 0.11037024855613708,
"eval_rouge1": 0.5534709029702873,
"eval_rouge2": 0.33508595975393674,
"eval_rougeL": 0.4220660586810759,
"eval_rougeLsum": 0.42394444829473793,
"eval_runtime": 79.4778,
"eval_samples_per_second": 0.679,
"eval_steps_per_second": 0.34,
"step": 12240
},
{
"epoch": 0.74,
"grad_norm": 0.296165406703949,
"learning_rate": 7.539563424967229e-05,
"loss": 0.1096,
"step": 12960
},
{
"epoch": 0.74,
"eval_bertscore": 0.7183234691619873,
"eval_loss": 0.10668845474720001,
"eval_rouge1": 0.5527080110711662,
"eval_rouge2": 0.3304597058226536,
"eval_rougeL": 0.4176676998826935,
"eval_rougeLsum": 0.41906982236369805,
"eval_runtime": 74.609,
"eval_samples_per_second": 0.724,
"eval_steps_per_second": 0.362,
"step": 12960
},
{
"epoch": 0.78,
"grad_norm": 0.3004627525806427,
"learning_rate": 7.402777513916068e-05,
"loss": 0.1105,
"step": 13680
},
{
"epoch": 0.78,
"eval_bertscore": 0.7093863487243652,
"eval_loss": 0.1068594753742218,
"eval_rouge1": 0.5386027107080774,
"eval_rouge2": 0.3174670612173311,
"eval_rougeL": 0.4089464604886982,
"eval_rougeLsum": 0.40954043741634194,
"eval_runtime": 76.1174,
"eval_samples_per_second": 0.709,
"eval_steps_per_second": 0.355,
"step": 13680
},
{
"epoch": 0.82,
"grad_norm": 0.4122227132320404,
"learning_rate": 7.265991602864905e-05,
"loss": 0.1094,
"step": 14400
},
{
"epoch": 0.82,
"eval_bertscore": 0.7156451344490051,
"eval_loss": 0.10706545412540436,
"eval_rouge1": 0.5522097394348282,
"eval_rouge2": 0.3376815877629147,
"eval_rougeL": 0.41094798705443536,
"eval_rougeLsum": 0.41185755780524297,
"eval_runtime": 79.5068,
"eval_samples_per_second": 0.679,
"eval_steps_per_second": 0.34,
"step": 14400
},
{
"epoch": 0.86,
"grad_norm": 0.3019055128097534,
"learning_rate": 7.129205691813743e-05,
"loss": 0.1047,
"step": 15120
},
{
"epoch": 0.86,
"eval_bertscore": 0.723181962966919,
"eval_loss": 0.10513070970773697,
"eval_rouge1": 0.5597833953895566,
"eval_rouge2": 0.3368159976094224,
"eval_rougeL": 0.4251112326345452,
"eval_rougeLsum": 0.4271018761152323,
"eval_runtime": 72.9275,
"eval_samples_per_second": 0.74,
"eval_steps_per_second": 0.37,
"step": 15120
},
{
"epoch": 0.9,
"grad_norm": 0.39543616771698,
"learning_rate": 6.992609761194597e-05,
"loss": 0.106,
"step": 15840
},
{
"epoch": 0.9,
"eval_bertscore": 0.7264233231544495,
"eval_loss": 0.10471142083406448,
"eval_rouge1": 0.5607444186855683,
"eval_rouge2": 0.32933852525922336,
"eval_rougeL": 0.4164104876659622,
"eval_rougeLsum": 0.4178921783444509,
"eval_runtime": 79.4632,
"eval_samples_per_second": 0.68,
"eval_steps_per_second": 0.34,
"step": 15840
},
{
"epoch": 0.94,
"grad_norm": 0.17296220362186432,
"learning_rate": 6.855823850143436e-05,
"loss": 0.1085,
"step": 16560
},
{
"epoch": 0.94,
"eval_bertscore": 0.7186797261238098,
"eval_loss": 0.10288402438163757,
"eval_rouge1": 0.5499937534452628,
"eval_rouge2": 0.33202955320606253,
"eval_rougeL": 0.41109499153735635,
"eval_rougeLsum": 0.4129325173744952,
"eval_runtime": 74.2816,
"eval_samples_per_second": 0.727,
"eval_steps_per_second": 0.363,
"step": 16560
},
{
"epoch": 0.98,
"grad_norm": 0.34968239068984985,
"learning_rate": 6.719037939092274e-05,
"loss": 0.1064,
"step": 17280
},
{
"epoch": 0.98,
"eval_bertscore": 0.715437650680542,
"eval_loss": 0.10678575932979584,
"eval_rouge1": 0.5487884139639068,
"eval_rouge2": 0.3287484312214649,
"eval_rougeL": 0.4115546192599129,
"eval_rougeLsum": 0.4129129108454481,
"eval_runtime": 77.9977,
"eval_samples_per_second": 0.692,
"eval_steps_per_second": 0.346,
"step": 17280
},
{
"epoch": 1.03,
"grad_norm": 0.22770258784294128,
"learning_rate": 6.582252028041113e-05,
"loss": 0.094,
"step": 18000
},
{
"epoch": 1.03,
"eval_bertscore": 0.7268933653831482,
"eval_loss": 0.10910864919424057,
"eval_rouge1": 0.5644640432420631,
"eval_rouge2": 0.34856910757450765,
"eval_rougeL": 0.4334348850734425,
"eval_rougeLsum": 0.4322774316283801,
"eval_runtime": 70.7723,
"eval_samples_per_second": 0.763,
"eval_steps_per_second": 0.382,
"step": 18000
},
{
"epoch": 1.07,
"grad_norm": 0.2036217600107193,
"learning_rate": 6.44546611698995e-05,
"loss": 0.0864,
"step": 18720
},
{
"epoch": 1.07,
"eval_bertscore": 0.7298507690429688,
"eval_loss": 0.1051657572388649,
"eval_rouge1": 0.5693416283658175,
"eval_rouge2": 0.3547090481291705,
"eval_rougeL": 0.4367412765285528,
"eval_rougeLsum": 0.4370252833034207,
"eval_runtime": 74.7048,
"eval_samples_per_second": 0.723,
"eval_steps_per_second": 0.361,
"step": 18720
},
{
"epoch": 1.11,
"grad_norm": 0.3400803804397583,
"learning_rate": 6.308680205938788e-05,
"loss": 0.0846,
"step": 19440
},
{
"epoch": 1.11,
"eval_bertscore": 0.7288545966148376,
"eval_loss": 0.1069113239645958,
"eval_rouge1": 0.5633722381222108,
"eval_rouge2": 0.337377454492796,
"eval_rougeL": 0.4349115421710151,
"eval_rougeLsum": 0.43561356852158567,
"eval_runtime": 73.5552,
"eval_samples_per_second": 0.734,
"eval_steps_per_second": 0.367,
"step": 19440
},
{
"epoch": 1.15,
"grad_norm": 0.3360745310783386,
"learning_rate": 6.172084275319642e-05,
"loss": 0.0875,
"step": 20160
},
{
"epoch": 1.15,
"eval_bertscore": 0.715953528881073,
"eval_loss": 0.10425002127885818,
"eval_rouge1": 0.5548398737384996,
"eval_rouge2": 0.33589277481599067,
"eval_rougeL": 0.42137114864331937,
"eval_rougeLsum": 0.4231469615029759,
"eval_runtime": 78.0304,
"eval_samples_per_second": 0.692,
"eval_steps_per_second": 0.346,
"step": 20160
},
{
"epoch": 1.19,
"grad_norm": 0.4246189594268799,
"learning_rate": 6.03529836426848e-05,
"loss": 0.0868,
"step": 20880
},
{
"epoch": 1.19,
"eval_bertscore": 0.7299396395683289,
"eval_loss": 0.10365325212478638,
"eval_rouge1": 0.5715394315498017,
"eval_rouge2": 0.34427400662165897,
"eval_rougeL": 0.433027526044127,
"eval_rougeLsum": 0.4347450430032858,
"eval_runtime": 74.0473,
"eval_samples_per_second": 0.729,
"eval_steps_per_second": 0.365,
"step": 20880
},
{
"epoch": 1.23,
"grad_norm": 0.2776849865913391,
"learning_rate": 5.898512453217319e-05,
"loss": 0.0854,
"step": 21600
},
{
"epoch": 1.23,
"eval_bertscore": 0.7214290499687195,
"eval_loss": 0.10122980922460556,
"eval_rouge1": 0.5565823263453793,
"eval_rouge2": 0.3393375143994867,
"eval_rougeL": 0.4156140884756716,
"eval_rougeLsum": 0.41819540905867203,
"eval_runtime": 73.2235,
"eval_samples_per_second": 0.737,
"eval_steps_per_second": 0.369,
"step": 21600
},
{
"epoch": 1.27,
"grad_norm": 0.3710538446903229,
"learning_rate": 5.761726542166157e-05,
"loss": 0.0845,
"step": 22320
},
{
"epoch": 1.27,
"eval_bertscore": 0.7201518416404724,
"eval_loss": 0.10378885269165039,
"eval_rouge1": 0.5441295123980776,
"eval_rouge2": 0.33155064058257405,
"eval_rougeL": 0.42094247090226844,
"eval_rougeLsum": 0.42274633038817555,
"eval_runtime": 76.6313,
"eval_samples_per_second": 0.705,
"eval_steps_per_second": 0.352,
"step": 22320
},
{
"epoch": 1.31,
"grad_norm": 0.5819060206413269,
"learning_rate": 5.624940631114996e-05,
"loss": 0.0861,
"step": 23040
},
{
"epoch": 1.31,
"eval_bertscore": 0.7142701148986816,
"eval_loss": 0.10384026169776917,
"eval_rouge1": 0.5458184588249984,
"eval_rouge2": 0.3290922169442115,
"eval_rougeL": 0.4214855047650181,
"eval_rougeLsum": 0.4239018723206239,
"eval_runtime": 79.4541,
"eval_samples_per_second": 0.68,
"eval_steps_per_second": 0.34,
"step": 23040
},
{
"epoch": 1.35,
"grad_norm": 0.2952657639980316,
"learning_rate": 5.488154720063834e-05,
"loss": 0.0862,
"step": 23760
},
{
"epoch": 1.35,
"eval_bertscore": 0.7302463054656982,
"eval_loss": 0.10171066224575043,
"eval_rouge1": 0.564237466077122,
"eval_rouge2": 0.346632021192653,
"eval_rougeL": 0.44007571581541377,
"eval_rougeLsum": 0.4408434182223313,
"eval_runtime": 70.4368,
"eval_samples_per_second": 0.767,
"eval_steps_per_second": 0.383,
"step": 23760
},
{
"epoch": 1.4,
"grad_norm": 0.3152740001678467,
"learning_rate": 5.351368809012672e-05,
"loss": 0.0858,
"step": 24480
},
{
"epoch": 1.4,
"eval_bertscore": 0.7205690741539001,
"eval_loss": 0.10222817957401276,
"eval_rouge1": 0.561963492920594,
"eval_rouge2": 0.3366175149143015,
"eval_rougeL": 0.4370056834486044,
"eval_rougeLsum": 0.4383325343921459,
"eval_runtime": 70.7631,
"eval_samples_per_second": 0.763,
"eval_steps_per_second": 0.382,
"step": 24480
},
{
"epoch": 1.44,
"grad_norm": 0.3384862542152405,
"learning_rate": 5.214772878393526e-05,
"loss": 0.0868,
"step": 25200
},
{
"epoch": 1.44,
"eval_bertscore": 0.7201054096221924,
"eval_loss": 0.10058918595314026,
"eval_rouge1": 0.5506843793300468,
"eval_rouge2": 0.3305447880283259,
"eval_rougeL": 0.4221671281003694,
"eval_rougeLsum": 0.42405735392085775,
"eval_runtime": 73.3661,
"eval_samples_per_second": 0.736,
"eval_steps_per_second": 0.368,
"step": 25200
},
{
"epoch": 1.48,
"grad_norm": 0.2858143150806427,
"learning_rate": 5.078176947774379e-05,
"loss": 0.0851,
"step": 25920
},
{
"epoch": 1.48,
"eval_bertscore": 0.7324591875076294,
"eval_loss": 0.10030569136142731,
"eval_rouge1": 0.5711881175991272,
"eval_rouge2": 0.35036140380824915,
"eval_rougeL": 0.44736244718696055,
"eval_rougeLsum": 0.44882200145887735,
"eval_runtime": 73.2375,
"eval_samples_per_second": 0.737,
"eval_steps_per_second": 0.369,
"step": 25920
},
{
"epoch": 1.52,
"grad_norm": 0.34586507081985474,
"learning_rate": 4.941391036723218e-05,
"loss": 0.0839,
"step": 26640
},
{
"epoch": 1.52,
"eval_bertscore": 0.7323827147483826,
"eval_loss": 0.10078810900449753,
"eval_rouge1": 0.5643411922408847,
"eval_rouge2": 0.35335509416724475,
"eval_rougeL": 0.4412030311945061,
"eval_rougeLsum": 0.4423071630624772,
"eval_runtime": 78.0237,
"eval_samples_per_second": 0.692,
"eval_steps_per_second": 0.346,
"step": 26640
},
{
"epoch": 1.56,
"grad_norm": 0.35009488463401794,
"learning_rate": 4.804605125672056e-05,
"loss": 0.0843,
"step": 27360
},
{
"epoch": 1.56,
"eval_bertscore": 0.7391833662986755,
"eval_loss": 0.09879420697689056,
"eval_rouge1": 0.5744063451356638,
"eval_rouge2": 0.3631199161982914,
"eval_rougeL": 0.44665302719291095,
"eval_rougeLsum": 0.44897406269269213,
"eval_runtime": 80.5403,
"eval_samples_per_second": 0.67,
"eval_steps_per_second": 0.335,
"step": 27360
},
{
"epoch": 1.6,
"grad_norm": 0.3002821207046509,
"learning_rate": 4.667819214620894e-05,
"loss": 0.085,
"step": 28080
},
{
"epoch": 1.6,
"eval_bertscore": 0.7407130002975464,
"eval_loss": 0.09699860215187073,
"eval_rouge1": 0.5762741233248756,
"eval_rouge2": 0.3544722421313946,
"eval_rougeL": 0.4384246085216507,
"eval_rougeLsum": 0.4390526517186611,
"eval_runtime": 78.2681,
"eval_samples_per_second": 0.69,
"eval_steps_per_second": 0.345,
"step": 28080
},
{
"epoch": 1.64,
"grad_norm": 0.241718590259552,
"learning_rate": 4.5310333035697325e-05,
"loss": 0.0845,
"step": 28800
},
{
"epoch": 1.64,
"eval_bertscore": 0.7392789125442505,
"eval_loss": 0.09867523610591888,
"eval_rouge1": 0.580719658129176,
"eval_rouge2": 0.3694474172593357,
"eval_rougeL": 0.456964934995113,
"eval_rougeLsum": 0.45917370226539334,
"eval_runtime": 80.3113,
"eval_samples_per_second": 0.672,
"eval_steps_per_second": 0.336,
"step": 28800
}
],
"logging_steps": 720,
"max_steps": 52638,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 2880,
"total_flos": 2.496482830587003e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}