| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6413997492305938, | |
| "eval_steps": 720, | |
| "global_step": 28800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.535019040107727, | |
| "learning_rate": 9.864353971540931e-05, | |
| "loss": 0.3127, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_bertscore": 0.7034942507743835, | |
| "eval_loss": 0.12081495672464371, | |
| "eval_rouge1": 0.5240594026277678, | |
| "eval_rouge2": 0.3067757525245376, | |
| "eval_rougeL": 0.39633186458379827, | |
| "eval_rougeLsum": 0.39613387865495875, | |
| "eval_runtime": 80.0774, | |
| "eval_samples_per_second": 0.674, | |
| "eval_steps_per_second": 0.337, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.42372021079063416, | |
| "learning_rate": 9.72756806048977e-05, | |
| "loss": 0.1212, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_bertscore": 0.7000990509986877, | |
| "eval_loss": 0.11524277925491333, | |
| "eval_rouge1": 0.5216793756151761, | |
| "eval_rouge2": 0.2934274790368596, | |
| "eval_rougeL": 0.3886043968581182, | |
| "eval_rougeLsum": 0.38807827948983176, | |
| "eval_runtime": 76.9989, | |
| "eval_samples_per_second": 0.701, | |
| "eval_steps_per_second": 0.351, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.42828959226608276, | |
| "learning_rate": 9.590782149438608e-05, | |
| "loss": 0.1222, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_bertscore": 0.6457424163818359, | |
| "eval_loss": 0.1191863939166069, | |
| "eval_rouge1": 0.4637245182501467, | |
| "eval_rouge2": 0.25286895940302717, | |
| "eval_rougeL": 0.34111914002345234, | |
| "eval_rougeLsum": 0.3407154814401842, | |
| "eval_runtime": 103.8897, | |
| "eval_samples_per_second": 0.52, | |
| "eval_steps_per_second": 0.26, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.44599342346191406, | |
| "learning_rate": 9.453996238387447e-05, | |
| "loss": 0.1172, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_bertscore": 0.6841260194778442, | |
| "eval_loss": 0.11394956707954407, | |
| "eval_rouge1": 0.48661400042430136, | |
| "eval_rouge2": 0.27298564925044316, | |
| "eval_rougeL": 0.371859286615548, | |
| "eval_rougeLsum": 0.37045985133751724, | |
| "eval_runtime": 67.7701, | |
| "eval_samples_per_second": 0.797, | |
| "eval_steps_per_second": 0.398, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.4306412935256958, | |
| "learning_rate": 9.317210327336284e-05, | |
| "loss": 0.1265, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_bertscore": 0.5902894139289856, | |
| "eval_loss": 0.1217198297381401, | |
| "eval_rouge1": 0.3684193429088964, | |
| "eval_rouge2": 0.19503224358227592, | |
| "eval_rougeL": 0.29399973276202795, | |
| "eval_rougeLsum": 0.29348115386759427, | |
| "eval_runtime": 58.6683, | |
| "eval_samples_per_second": 0.92, | |
| "eval_steps_per_second": 0.46, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.4569800794124603, | |
| "learning_rate": 9.180424416285122e-05, | |
| "loss": 0.1201, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_bertscore": 0.6664375066757202, | |
| "eval_loss": 0.11453992873430252, | |
| "eval_rouge1": 0.4849966834372761, | |
| "eval_rouge2": 0.27797369581531306, | |
| "eval_rougeL": 0.3629296708263681, | |
| "eval_rougeLsum": 0.3623800251579623, | |
| "eval_runtime": 81.2927, | |
| "eval_samples_per_second": 0.664, | |
| "eval_steps_per_second": 0.332, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.4236726462841034, | |
| "learning_rate": 9.043638505233961e-05, | |
| "loss": 0.1171, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_bertscore": 0.6511832475662231, | |
| "eval_loss": 0.1140449047088623, | |
| "eval_rouge1": 0.4622762558490229, | |
| "eval_rouge2": 0.2720739144786822, | |
| "eval_rougeL": 0.35563312683133363, | |
| "eval_rougeLsum": 0.3553109181928958, | |
| "eval_runtime": 96.9669, | |
| "eval_samples_per_second": 0.557, | |
| "eval_steps_per_second": 0.278, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.4153118431568146, | |
| "learning_rate": 8.9068525941828e-05, | |
| "loss": 0.1182, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_bertscore": 0.7056278586387634, | |
| "eval_loss": 0.11447593569755554, | |
| "eval_rouge1": 0.535980541670685, | |
| "eval_rouge2": 0.3085487182685619, | |
| "eval_rougeL": 0.3952747177668595, | |
| "eval_rougeLsum": 0.39487374036594847, | |
| "eval_runtime": 78.3273, | |
| "eval_samples_per_second": 0.689, | |
| "eval_steps_per_second": 0.345, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.30859124660491943, | |
| "learning_rate": 8.770066683131638e-05, | |
| "loss": 0.1161, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_bertscore": 0.717333197593689, | |
| "eval_loss": 0.11316747963428497, | |
| "eval_rouge1": 0.5395779598124536, | |
| "eval_rouge2": 0.3235379995103774, | |
| "eval_rougeL": 0.40115447322823283, | |
| "eval_rougeLsum": 0.40261399344054405, | |
| "eval_runtime": 77.4572, | |
| "eval_samples_per_second": 0.697, | |
| "eval_steps_per_second": 0.349, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.3857922852039337, | |
| "learning_rate": 8.633280772080476e-05, | |
| "loss": 0.1151, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_bertscore": 0.7019616365432739, | |
| "eval_loss": 0.11000501364469528, | |
| "eval_rouge1": 0.5236493302484355, | |
| "eval_rouge2": 0.3068191529551719, | |
| "eval_rougeL": 0.4036664284755191, | |
| "eval_rougeLsum": 0.4040689187486951, | |
| "eval_runtime": 76.1117, | |
| "eval_samples_per_second": 0.709, | |
| "eval_steps_per_second": 0.355, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.3432803452014923, | |
| "learning_rate": 8.496494861029315e-05, | |
| "loss": 0.1144, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_bertscore": 0.7042035460472107, | |
| "eval_loss": 0.11131834983825684, | |
| "eval_rouge1": 0.5294383728708514, | |
| "eval_rouge2": 0.3003098501119716, | |
| "eval_rougeL": 0.3967273111533241, | |
| "eval_rougeLsum": 0.39619485281011757, | |
| "eval_runtime": 77.3811, | |
| "eval_samples_per_second": 0.698, | |
| "eval_steps_per_second": 0.349, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.3402859568595886, | |
| "learning_rate": 8.359708949978152e-05, | |
| "loss": 0.1126, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_bertscore": 0.71971595287323, | |
| "eval_loss": 0.11067274957895279, | |
| "eval_rouge1": 0.551885774991056, | |
| "eval_rouge2": 0.33499475588298316, | |
| "eval_rougeL": 0.4160407628361842, | |
| "eval_rougeLsum": 0.4164543392695917, | |
| "eval_runtime": 77.4902, | |
| "eval_samples_per_second": 0.697, | |
| "eval_steps_per_second": 0.348, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.4391550123691559, | |
| "learning_rate": 8.223113019359007e-05, | |
| "loss": 0.1116, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_bertscore": 0.7158631086349487, | |
| "eval_loss": 0.1099533885717392, | |
| "eval_rouge1": 0.5557272797557176, | |
| "eval_rouge2": 0.332779980249166, | |
| "eval_rougeL": 0.4155444723963883, | |
| "eval_rougeLsum": 0.41657130732656783, | |
| "eval_runtime": 80.0828, | |
| "eval_samples_per_second": 0.674, | |
| "eval_steps_per_second": 0.337, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.3907322287559509, | |
| "learning_rate": 8.086327108307846e-05, | |
| "loss": 0.1141, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_bertscore": 0.7130799293518066, | |
| "eval_loss": 0.11258435994386673, | |
| "eval_rouge1": 0.5457292777447704, | |
| "eval_rouge2": 0.3214033358835623, | |
| "eval_rougeL": 0.40814606110656115, | |
| "eval_rougeLsum": 0.4086806368595041, | |
| "eval_runtime": 73.8407, | |
| "eval_samples_per_second": 0.731, | |
| "eval_steps_per_second": 0.366, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.38848328590393066, | |
| "learning_rate": 7.949541197256683e-05, | |
| "loss": 0.1132, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_bertscore": 0.7245057225227356, | |
| "eval_loss": 0.11034353822469711, | |
| "eval_rouge1": 0.5603983140826179, | |
| "eval_rouge2": 0.3445987526625777, | |
| "eval_rougeL": 0.43423536113182604, | |
| "eval_rougeLsum": 0.43398163455334016, | |
| "eval_runtime": 77.732, | |
| "eval_samples_per_second": 0.695, | |
| "eval_steps_per_second": 0.347, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.37145286798477173, | |
| "learning_rate": 7.812945266637537e-05, | |
| "loss": 0.112, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_bertscore": 0.7207842469215393, | |
| "eval_loss": 0.11157318204641342, | |
| "eval_rouge1": 0.5554178283606811, | |
| "eval_rouge2": 0.3317069905744905, | |
| "eval_rougeL": 0.4209451268922738, | |
| "eval_rougeLsum": 0.42120272115590573, | |
| "eval_runtime": 81.6277, | |
| "eval_samples_per_second": 0.662, | |
| "eval_steps_per_second": 0.331, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.32327908277511597, | |
| "learning_rate": 7.676349336018391e-05, | |
| "loss": 0.1118, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_bertscore": 0.7193225622177124, | |
| "eval_loss": 0.11037024855613708, | |
| "eval_rouge1": 0.5534709029702873, | |
| "eval_rouge2": 0.33508595975393674, | |
| "eval_rougeL": 0.4220660586810759, | |
| "eval_rougeLsum": 0.42394444829473793, | |
| "eval_runtime": 79.4778, | |
| "eval_samples_per_second": 0.679, | |
| "eval_steps_per_second": 0.34, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.296165406703949, | |
| "learning_rate": 7.539563424967229e-05, | |
| "loss": 0.1096, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_bertscore": 0.7183234691619873, | |
| "eval_loss": 0.10668845474720001, | |
| "eval_rouge1": 0.5527080110711662, | |
| "eval_rouge2": 0.3304597058226536, | |
| "eval_rougeL": 0.4176676998826935, | |
| "eval_rougeLsum": 0.41906982236369805, | |
| "eval_runtime": 74.609, | |
| "eval_samples_per_second": 0.724, | |
| "eval_steps_per_second": 0.362, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.3004627525806427, | |
| "learning_rate": 7.402777513916068e-05, | |
| "loss": 0.1105, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_bertscore": 0.7093863487243652, | |
| "eval_loss": 0.1068594753742218, | |
| "eval_rouge1": 0.5386027107080774, | |
| "eval_rouge2": 0.3174670612173311, | |
| "eval_rougeL": 0.4089464604886982, | |
| "eval_rougeLsum": 0.40954043741634194, | |
| "eval_runtime": 76.1174, | |
| "eval_samples_per_second": 0.709, | |
| "eval_steps_per_second": 0.355, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.4122227132320404, | |
| "learning_rate": 7.265991602864905e-05, | |
| "loss": 0.1094, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_bertscore": 0.7156451344490051, | |
| "eval_loss": 0.10706545412540436, | |
| "eval_rouge1": 0.5522097394348282, | |
| "eval_rouge2": 0.3376815877629147, | |
| "eval_rougeL": 0.41094798705443536, | |
| "eval_rougeLsum": 0.41185755780524297, | |
| "eval_runtime": 79.5068, | |
| "eval_samples_per_second": 0.679, | |
| "eval_steps_per_second": 0.34, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.3019055128097534, | |
| "learning_rate": 7.129205691813743e-05, | |
| "loss": 0.1047, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_bertscore": 0.723181962966919, | |
| "eval_loss": 0.10513070970773697, | |
| "eval_rouge1": 0.5597833953895566, | |
| "eval_rouge2": 0.3368159976094224, | |
| "eval_rougeL": 0.4251112326345452, | |
| "eval_rougeLsum": 0.4271018761152323, | |
| "eval_runtime": 72.9275, | |
| "eval_samples_per_second": 0.74, | |
| "eval_steps_per_second": 0.37, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.39543616771698, | |
| "learning_rate": 6.992609761194597e-05, | |
| "loss": 0.106, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_bertscore": 0.7264233231544495, | |
| "eval_loss": 0.10471142083406448, | |
| "eval_rouge1": 0.5607444186855683, | |
| "eval_rouge2": 0.32933852525922336, | |
| "eval_rougeL": 0.4164104876659622, | |
| "eval_rougeLsum": 0.4178921783444509, | |
| "eval_runtime": 79.4632, | |
| "eval_samples_per_second": 0.68, | |
| "eval_steps_per_second": 0.34, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.17296220362186432, | |
| "learning_rate": 6.855823850143436e-05, | |
| "loss": 0.1085, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_bertscore": 0.7186797261238098, | |
| "eval_loss": 0.10288402438163757, | |
| "eval_rouge1": 0.5499937534452628, | |
| "eval_rouge2": 0.33202955320606253, | |
| "eval_rougeL": 0.41109499153735635, | |
| "eval_rougeLsum": 0.4129325173744952, | |
| "eval_runtime": 74.2816, | |
| "eval_samples_per_second": 0.727, | |
| "eval_steps_per_second": 0.363, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.34968239068984985, | |
| "learning_rate": 6.719037939092274e-05, | |
| "loss": 0.1064, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_bertscore": 0.715437650680542, | |
| "eval_loss": 0.10678575932979584, | |
| "eval_rouge1": 0.5487884139639068, | |
| "eval_rouge2": 0.3287484312214649, | |
| "eval_rougeL": 0.4115546192599129, | |
| "eval_rougeLsum": 0.4129129108454481, | |
| "eval_runtime": 77.9977, | |
| "eval_samples_per_second": 0.692, | |
| "eval_steps_per_second": 0.346, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.22770258784294128, | |
| "learning_rate": 6.582252028041113e-05, | |
| "loss": 0.094, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_bertscore": 0.7268933653831482, | |
| "eval_loss": 0.10910864919424057, | |
| "eval_rouge1": 0.5644640432420631, | |
| "eval_rouge2": 0.34856910757450765, | |
| "eval_rougeL": 0.4334348850734425, | |
| "eval_rougeLsum": 0.4322774316283801, | |
| "eval_runtime": 70.7723, | |
| "eval_samples_per_second": 0.763, | |
| "eval_steps_per_second": 0.382, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.2036217600107193, | |
| "learning_rate": 6.44546611698995e-05, | |
| "loss": 0.0864, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_bertscore": 0.7298507690429688, | |
| "eval_loss": 0.1051657572388649, | |
| "eval_rouge1": 0.5693416283658175, | |
| "eval_rouge2": 0.3547090481291705, | |
| "eval_rougeL": 0.4367412765285528, | |
| "eval_rougeLsum": 0.4370252833034207, | |
| "eval_runtime": 74.7048, | |
| "eval_samples_per_second": 0.723, | |
| "eval_steps_per_second": 0.361, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 0.3400803804397583, | |
| "learning_rate": 6.308680205938788e-05, | |
| "loss": 0.0846, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_bertscore": 0.7288545966148376, | |
| "eval_loss": 0.1069113239645958, | |
| "eval_rouge1": 0.5633722381222108, | |
| "eval_rouge2": 0.337377454492796, | |
| "eval_rougeL": 0.4349115421710151, | |
| "eval_rougeLsum": 0.43561356852158567, | |
| "eval_runtime": 73.5552, | |
| "eval_samples_per_second": 0.734, | |
| "eval_steps_per_second": 0.367, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 0.3360745310783386, | |
| "learning_rate": 6.172084275319642e-05, | |
| "loss": 0.0875, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_bertscore": 0.715953528881073, | |
| "eval_loss": 0.10425002127885818, | |
| "eval_rouge1": 0.5548398737384996, | |
| "eval_rouge2": 0.33589277481599067, | |
| "eval_rougeL": 0.42137114864331937, | |
| "eval_rougeLsum": 0.4231469615029759, | |
| "eval_runtime": 78.0304, | |
| "eval_samples_per_second": 0.692, | |
| "eval_steps_per_second": 0.346, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 0.4246189594268799, | |
| "learning_rate": 6.03529836426848e-05, | |
| "loss": 0.0868, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_bertscore": 0.7299396395683289, | |
| "eval_loss": 0.10365325212478638, | |
| "eval_rouge1": 0.5715394315498017, | |
| "eval_rouge2": 0.34427400662165897, | |
| "eval_rougeL": 0.433027526044127, | |
| "eval_rougeLsum": 0.4347450430032858, | |
| "eval_runtime": 74.0473, | |
| "eval_samples_per_second": 0.729, | |
| "eval_steps_per_second": 0.365, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 0.2776849865913391, | |
| "learning_rate": 5.898512453217319e-05, | |
| "loss": 0.0854, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_bertscore": 0.7214290499687195, | |
| "eval_loss": 0.10122980922460556, | |
| "eval_rouge1": 0.5565823263453793, | |
| "eval_rouge2": 0.3393375143994867, | |
| "eval_rougeL": 0.4156140884756716, | |
| "eval_rougeLsum": 0.41819540905867203, | |
| "eval_runtime": 73.2235, | |
| "eval_samples_per_second": 0.737, | |
| "eval_steps_per_second": 0.369, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 0.3710538446903229, | |
| "learning_rate": 5.761726542166157e-05, | |
| "loss": 0.0845, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_bertscore": 0.7201518416404724, | |
| "eval_loss": 0.10378885269165039, | |
| "eval_rouge1": 0.5441295123980776, | |
| "eval_rouge2": 0.33155064058257405, | |
| "eval_rougeL": 0.42094247090226844, | |
| "eval_rougeLsum": 0.42274633038817555, | |
| "eval_runtime": 76.6313, | |
| "eval_samples_per_second": 0.705, | |
| "eval_steps_per_second": 0.352, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.5819060206413269, | |
| "learning_rate": 5.624940631114996e-05, | |
| "loss": 0.0861, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_bertscore": 0.7142701148986816, | |
| "eval_loss": 0.10384026169776917, | |
| "eval_rouge1": 0.5458184588249984, | |
| "eval_rouge2": 0.3290922169442115, | |
| "eval_rougeL": 0.4214855047650181, | |
| "eval_rougeLsum": 0.4239018723206239, | |
| "eval_runtime": 79.4541, | |
| "eval_samples_per_second": 0.68, | |
| "eval_steps_per_second": 0.34, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.2952657639980316, | |
| "learning_rate": 5.488154720063834e-05, | |
| "loss": 0.0862, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_bertscore": 0.7302463054656982, | |
| "eval_loss": 0.10171066224575043, | |
| "eval_rouge1": 0.564237466077122, | |
| "eval_rouge2": 0.346632021192653, | |
| "eval_rougeL": 0.44007571581541377, | |
| "eval_rougeLsum": 0.4408434182223313, | |
| "eval_runtime": 70.4368, | |
| "eval_samples_per_second": 0.767, | |
| "eval_steps_per_second": 0.383, | |
| "step": 23760 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.3152740001678467, | |
| "learning_rate": 5.351368809012672e-05, | |
| "loss": 0.0858, | |
| "step": 24480 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_bertscore": 0.7205690741539001, | |
| "eval_loss": 0.10222817957401276, | |
| "eval_rouge1": 0.561963492920594, | |
| "eval_rouge2": 0.3366175149143015, | |
| "eval_rougeL": 0.4370056834486044, | |
| "eval_rougeLsum": 0.4383325343921459, | |
| "eval_runtime": 70.7631, | |
| "eval_samples_per_second": 0.763, | |
| "eval_steps_per_second": 0.382, | |
| "step": 24480 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.3384862542152405, | |
| "learning_rate": 5.214772878393526e-05, | |
| "loss": 0.0868, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_bertscore": 0.7201054096221924, | |
| "eval_loss": 0.10058918595314026, | |
| "eval_rouge1": 0.5506843793300468, | |
| "eval_rouge2": 0.3305447880283259, | |
| "eval_rougeL": 0.4221671281003694, | |
| "eval_rougeLsum": 0.42405735392085775, | |
| "eval_runtime": 73.3661, | |
| "eval_samples_per_second": 0.736, | |
| "eval_steps_per_second": 0.368, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.2858143150806427, | |
| "learning_rate": 5.078176947774379e-05, | |
| "loss": 0.0851, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_bertscore": 0.7324591875076294, | |
| "eval_loss": 0.10030569136142731, | |
| "eval_rouge1": 0.5711881175991272, | |
| "eval_rouge2": 0.35036140380824915, | |
| "eval_rougeL": 0.44736244718696055, | |
| "eval_rougeLsum": 0.44882200145887735, | |
| "eval_runtime": 73.2375, | |
| "eval_samples_per_second": 0.737, | |
| "eval_steps_per_second": 0.369, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.34586507081985474, | |
| "learning_rate": 4.941391036723218e-05, | |
| "loss": 0.0839, | |
| "step": 26640 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_bertscore": 0.7323827147483826, | |
| "eval_loss": 0.10078810900449753, | |
| "eval_rouge1": 0.5643411922408847, | |
| "eval_rouge2": 0.35335509416724475, | |
| "eval_rougeL": 0.4412030311945061, | |
| "eval_rougeLsum": 0.4423071630624772, | |
| "eval_runtime": 78.0237, | |
| "eval_samples_per_second": 0.692, | |
| "eval_steps_per_second": 0.346, | |
| "step": 26640 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.35009488463401794, | |
| "learning_rate": 4.804605125672056e-05, | |
| "loss": 0.0843, | |
| "step": 27360 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_bertscore": 0.7391833662986755, | |
| "eval_loss": 0.09879420697689056, | |
| "eval_rouge1": 0.5744063451356638, | |
| "eval_rouge2": 0.3631199161982914, | |
| "eval_rougeL": 0.44665302719291095, | |
| "eval_rougeLsum": 0.44897406269269213, | |
| "eval_runtime": 80.5403, | |
| "eval_samples_per_second": 0.67, | |
| "eval_steps_per_second": 0.335, | |
| "step": 27360 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.3002821207046509, | |
| "learning_rate": 4.667819214620894e-05, | |
| "loss": 0.085, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_bertscore": 0.7407130002975464, | |
| "eval_loss": 0.09699860215187073, | |
| "eval_rouge1": 0.5762741233248756, | |
| "eval_rouge2": 0.3544722421313946, | |
| "eval_rougeL": 0.4384246085216507, | |
| "eval_rougeLsum": 0.4390526517186611, | |
| "eval_runtime": 78.2681, | |
| "eval_samples_per_second": 0.69, | |
| "eval_steps_per_second": 0.345, | |
| "step": 28080 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 0.241718590259552, | |
| "learning_rate": 4.5310333035697325e-05, | |
| "loss": 0.0845, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_bertscore": 0.7392789125442505, | |
| "eval_loss": 0.09867523610591888, | |
| "eval_rouge1": 0.580719658129176, | |
| "eval_rouge2": 0.3694474172593357, | |
| "eval_rougeL": 0.456964934995113, | |
| "eval_rougeLsum": 0.45917370226539334, | |
| "eval_runtime": 80.3113, | |
| "eval_samples_per_second": 0.672, | |
| "eval_steps_per_second": 0.336, | |
| "step": 28800 | |
| } | |
| ], | |
| "logging_steps": 720, | |
| "max_steps": 52638, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2880, | |
| "total_flos": 2.496482830587003e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |