| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 8790, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11376564277588168, | |
| "grad_norm": 6.126019477844238, | |
| "learning_rate": 4.9731121281464535e-05, | |
| "loss": 2.4955, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22753128555176336, | |
| "grad_norm": 3.398350954055786, | |
| "learning_rate": 4.916475972540046e-05, | |
| "loss": 2.1647, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3412969283276451, | |
| "grad_norm": 4.663970947265625, | |
| "learning_rate": 4.859267734553776e-05, | |
| "loss": 2.115, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4550625711035267, | |
| "grad_norm": 7.414068222045898, | |
| "learning_rate": 4.802059496567506e-05, | |
| "loss": 2.0582, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5688282138794084, | |
| "grad_norm": 5.257181644439697, | |
| "learning_rate": 4.744851258581236e-05, | |
| "loss": 2.0307, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6825938566552902, | |
| "grad_norm": 2.71687388420105, | |
| "learning_rate": 4.687643020594966e-05, | |
| "loss": 1.9912, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7963594994311718, | |
| "grad_norm": 6.777807712554932, | |
| "learning_rate": 4.630434782608696e-05, | |
| "loss": 1.9838, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9101251422070534, | |
| "grad_norm": 7.886149883270264, | |
| "learning_rate": 4.573226544622426e-05, | |
| "loss": 1.9829, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 210.01, | |
| "eval_loss": 2.0051724910736084, | |
| "eval_rouge1": 44.2025, | |
| "eval_rouge2": 21.4118, | |
| "eval_rougeL": 29.8753, | |
| "eval_rougeLsum": 39.0475, | |
| "eval_runtime": 123.0269, | |
| "eval_samples_per_second": 0.813, | |
| "eval_steps_per_second": 0.106, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.023890784982935, | |
| "grad_norm": 8.102137565612793, | |
| "learning_rate": 4.516018306636156e-05, | |
| "loss": 1.9554, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1376564277588168, | |
| "grad_norm": 11.607733726501465, | |
| "learning_rate": 4.458810068649886e-05, | |
| "loss": 1.955, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2514220705346986, | |
| "grad_norm": 6.75238037109375, | |
| "learning_rate": 4.401601830663616e-05, | |
| "loss": 1.8595, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.36518771331058, | |
| "grad_norm": 3.8955740928649902, | |
| "learning_rate": 4.344393592677346e-05, | |
| "loss": 1.8723, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4789533560864618, | |
| "grad_norm": 3.7848293781280518, | |
| "learning_rate": 4.287757437070938e-05, | |
| "loss": 1.8681, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5927189988623436, | |
| "grad_norm": 5.047945976257324, | |
| "learning_rate": 4.2305491990846686e-05, | |
| "loss": 1.8742, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.7064846416382253, | |
| "grad_norm": 4.739316463470459, | |
| "learning_rate": 4.173340961098398e-05, | |
| "loss": 1.8819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.820250284414107, | |
| "grad_norm": 5.646695613861084, | |
| "learning_rate": 4.1161327231121284e-05, | |
| "loss": 1.8737, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9340159271899886, | |
| "grad_norm": 3.755201578140259, | |
| "learning_rate": 4.0589244851258586e-05, | |
| "loss": 1.8785, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 146.16, | |
| "eval_loss": 1.9213957786560059, | |
| "eval_rouge1": 46.698, | |
| "eval_rouge2": 22.1329, | |
| "eval_rougeL": 31.017, | |
| "eval_rougeLsum": 41.3027, | |
| "eval_runtime": 88.692, | |
| "eval_samples_per_second": 1.127, | |
| "eval_steps_per_second": 0.147, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.04778156996587, | |
| "grad_norm": 2.962921619415283, | |
| "learning_rate": 4.001716247139588e-05, | |
| "loss": 1.8208, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.161547212741752, | |
| "grad_norm": 8.434884071350098, | |
| "learning_rate": 3.9445080091533184e-05, | |
| "loss": 1.803, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.2753128555176336, | |
| "grad_norm": 9.340300559997559, | |
| "learning_rate": 3.8872997711670486e-05, | |
| "loss": 1.8278, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.3890784982935154, | |
| "grad_norm": 3.50976300239563, | |
| "learning_rate": 3.830091533180778e-05, | |
| "loss": 1.8069, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.502844141069397, | |
| "grad_norm": 4.721962928771973, | |
| "learning_rate": 3.7728832951945084e-05, | |
| "loss": 1.8098, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.616609783845279, | |
| "grad_norm": 3.864901542663574, | |
| "learning_rate": 3.715675057208238e-05, | |
| "loss": 1.7589, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.73037542662116, | |
| "grad_norm": 24.329442977905273, | |
| "learning_rate": 3.658466819221968e-05, | |
| "loss": 1.7621, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.8441410693970424, | |
| "grad_norm": 8.203035354614258, | |
| "learning_rate": 3.601258581235698e-05, | |
| "loss": 1.7758, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.9579067121729237, | |
| "grad_norm": 5.4326066970825195, | |
| "learning_rate": 3.544050343249428e-05, | |
| "loss": 1.7493, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 135.9, | |
| "eval_loss": 1.86639404296875, | |
| "eval_rouge1": 47.237, | |
| "eval_rouge2": 23.0343, | |
| "eval_rougeL": 31.7155, | |
| "eval_rougeLsum": 42.1807, | |
| "eval_runtime": 76.7658, | |
| "eval_samples_per_second": 1.303, | |
| "eval_steps_per_second": 0.169, | |
| "step": 2637 | |
| }, | |
| { | |
| "epoch": 3.0716723549488054, | |
| "grad_norm": 3.5585150718688965, | |
| "learning_rate": 3.4868421052631575e-05, | |
| "loss": 1.7432, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.185437997724687, | |
| "grad_norm": 3.9954323768615723, | |
| "learning_rate": 3.429633867276888e-05, | |
| "loss": 1.7035, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.299203640500569, | |
| "grad_norm": 4.328066825866699, | |
| "learning_rate": 3.372425629290618e-05, | |
| "loss": 1.7343, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.4129692832764507, | |
| "grad_norm": 4.497200965881348, | |
| "learning_rate": 3.3152173913043475e-05, | |
| "loss": 1.7319, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.526734926052332, | |
| "grad_norm": 4.723243236541748, | |
| "learning_rate": 3.258009153318078e-05, | |
| "loss": 1.7294, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.640500568828214, | |
| "grad_norm": 6.760339260101318, | |
| "learning_rate": 3.200800915331808e-05, | |
| "loss": 1.7024, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.7542662116040955, | |
| "grad_norm": 3.54321026802063, | |
| "learning_rate": 3.1435926773455376e-05, | |
| "loss": 1.7029, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.868031854379977, | |
| "grad_norm": 5.660515785217285, | |
| "learning_rate": 3.086384439359268e-05, | |
| "loss": 1.7313, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.981797497155859, | |
| "grad_norm": 6.904107570648193, | |
| "learning_rate": 3.029176201372998e-05, | |
| "loss": 1.6599, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 133.89, | |
| "eval_loss": 1.8406709432601929, | |
| "eval_rouge1": 46.8883, | |
| "eval_rouge2": 22.317, | |
| "eval_rougeL": 30.9894, | |
| "eval_rougeLsum": 41.5511, | |
| "eval_runtime": 75.0208, | |
| "eval_samples_per_second": 1.333, | |
| "eval_steps_per_second": 0.173, | |
| "step": 3516 | |
| }, | |
| { | |
| "epoch": 4.09556313993174, | |
| "grad_norm": 3.598436117172241, | |
| "learning_rate": 2.9719679633867276e-05, | |
| "loss": 1.6754, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.2093287827076225, | |
| "grad_norm": 3.4669225215911865, | |
| "learning_rate": 2.9147597254004578e-05, | |
| "loss": 1.685, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 4.323094425483504, | |
| "grad_norm": 4.923774242401123, | |
| "learning_rate": 2.857551487414188e-05, | |
| "loss": 1.6468, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.436860068259386, | |
| "grad_norm": 5.548232078552246, | |
| "learning_rate": 2.8009153318077803e-05, | |
| "loss": 1.6389, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 4.550625711035267, | |
| "grad_norm": 6.222611904144287, | |
| "learning_rate": 2.7437070938215102e-05, | |
| "loss": 1.6251, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.664391353811149, | |
| "grad_norm": 4.012085437774658, | |
| "learning_rate": 2.6864988558352404e-05, | |
| "loss": 1.6708, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 4.778156996587031, | |
| "grad_norm": 4.607513904571533, | |
| "learning_rate": 2.62929061784897e-05, | |
| "loss": 1.6539, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 4.891922639362912, | |
| "grad_norm": 7.459988594055176, | |
| "learning_rate": 2.5720823798627002e-05, | |
| "loss": 1.6442, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 130.6, | |
| "eval_loss": 1.8186066150665283, | |
| "eval_rouge1": 46.7324, | |
| "eval_rouge2": 22.5522, | |
| "eval_rougeL": 30.8932, | |
| "eval_rougeLsum": 41.6596, | |
| "eval_runtime": 71.9117, | |
| "eval_samples_per_second": 1.391, | |
| "eval_steps_per_second": 0.181, | |
| "step": 4395 | |
| }, | |
| { | |
| "epoch": 5.005688282138794, | |
| "grad_norm": 3.9666900634765625, | |
| "learning_rate": 2.5148741418764304e-05, | |
| "loss": 1.6693, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 5.1194539249146755, | |
| "grad_norm": 3.8383851051330566, | |
| "learning_rate": 2.4576659038901603e-05, | |
| "loss": 1.5846, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.233219567690558, | |
| "grad_norm": 3.5106499195098877, | |
| "learning_rate": 2.4004576659038902e-05, | |
| "loss": 1.6043, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 5.346985210466439, | |
| "grad_norm": 4.501423358917236, | |
| "learning_rate": 2.3432494279176205e-05, | |
| "loss": 1.6203, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 5.460750853242321, | |
| "grad_norm": 3.3335440158843994, | |
| "learning_rate": 2.2860411899313504e-05, | |
| "loss": 1.6109, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.5745164960182025, | |
| "grad_norm": 10.799994468688965, | |
| "learning_rate": 2.2288329519450803e-05, | |
| "loss": 1.6036, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 5.688282138794084, | |
| "grad_norm": 3.463279962539673, | |
| "learning_rate": 2.17162471395881e-05, | |
| "loss": 1.6034, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 5.802047781569966, | |
| "grad_norm": 5.357439994812012, | |
| "learning_rate": 2.1149885583524028e-05, | |
| "loss": 1.6073, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 5.915813424345847, | |
| "grad_norm": 6.183532238006592, | |
| "learning_rate": 2.0577803203661326e-05, | |
| "loss": 1.65, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 129.34, | |
| "eval_loss": 1.804569959640503, | |
| "eval_rouge1": 46.7244, | |
| "eval_rouge2": 22.3848, | |
| "eval_rougeL": 31.2658, | |
| "eval_rougeLsum": 41.6427, | |
| "eval_runtime": 65.0318, | |
| "eval_samples_per_second": 1.538, | |
| "eval_steps_per_second": 0.2, | |
| "step": 5274 | |
| }, | |
| { | |
| "epoch": 6.0295790671217295, | |
| "grad_norm": 4.417309761047363, | |
| "learning_rate": 2.000572082379863e-05, | |
| "loss": 1.6041, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 6.143344709897611, | |
| "grad_norm": 5.918379306793213, | |
| "learning_rate": 1.9433638443935928e-05, | |
| "loss": 1.5789, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 6.257110352673493, | |
| "grad_norm": 3.504812240600586, | |
| "learning_rate": 1.8861556064073227e-05, | |
| "loss": 1.6221, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.370875995449374, | |
| "grad_norm": 4.689468860626221, | |
| "learning_rate": 1.828947368421053e-05, | |
| "loss": 1.5967, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 6.484641638225256, | |
| "grad_norm": 3.1115574836730957, | |
| "learning_rate": 1.7717391304347828e-05, | |
| "loss": 1.5745, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 6.598407281001138, | |
| "grad_norm": 5.300652503967285, | |
| "learning_rate": 1.7145308924485127e-05, | |
| "loss": 1.5666, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 6.712172923777019, | |
| "grad_norm": 3.5895206928253174, | |
| "learning_rate": 1.657322654462243e-05, | |
| "loss": 1.5531, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 6.825938566552901, | |
| "grad_norm": 3.9184463024139404, | |
| "learning_rate": 1.6001144164759728e-05, | |
| "loss": 1.583, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 6.939704209328783, | |
| "grad_norm": 4.801300048828125, | |
| "learning_rate": 1.5429061784897027e-05, | |
| "loss": 1.5859, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 128.86, | |
| "eval_loss": 1.7970900535583496, | |
| "eval_rouge1": 47.0912, | |
| "eval_rouge2": 22.2605, | |
| "eval_rougeL": 31.1363, | |
| "eval_rougeLsum": 41.6028, | |
| "eval_runtime": 65.7365, | |
| "eval_samples_per_second": 1.521, | |
| "eval_steps_per_second": 0.198, | |
| "step": 6153 | |
| }, | |
| { | |
| "epoch": 7.053469852104665, | |
| "grad_norm": 36.4532470703125, | |
| "learning_rate": 1.4856979405034328e-05, | |
| "loss": 1.539, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 7.167235494880546, | |
| "grad_norm": 5.765852451324463, | |
| "learning_rate": 1.4284897025171627e-05, | |
| "loss": 1.5805, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 7.281001137656427, | |
| "grad_norm": 5.349630832672119, | |
| "learning_rate": 1.3712814645308924e-05, | |
| "loss": 1.5332, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 7.39476678043231, | |
| "grad_norm": 100.44608306884766, | |
| "learning_rate": 1.3140732265446226e-05, | |
| "loss": 1.5277, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 7.508532423208191, | |
| "grad_norm": 3.3179357051849365, | |
| "learning_rate": 1.2568649885583525e-05, | |
| "loss": 1.5304, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 7.622298065984073, | |
| "grad_norm": 5.867196083068848, | |
| "learning_rate": 1.1996567505720824e-05, | |
| "loss": 1.5314, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 7.736063708759954, | |
| "grad_norm": 3.5859735012054443, | |
| "learning_rate": 1.1424485125858125e-05, | |
| "loss": 1.5665, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 7.849829351535837, | |
| "grad_norm": 3.854527711868286, | |
| "learning_rate": 1.0852402745995424e-05, | |
| "loss": 1.5633, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 7.963594994311718, | |
| "grad_norm": 3.261179208755493, | |
| "learning_rate": 1.0280320366132722e-05, | |
| "loss": 1.5773, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 128.57, | |
| "eval_loss": 1.7826117277145386, | |
| "eval_rouge1": 47.1155, | |
| "eval_rouge2": 22.756, | |
| "eval_rougeL": 31.6846, | |
| "eval_rougeLsum": 41.8634, | |
| "eval_runtime": 68.6593, | |
| "eval_samples_per_second": 1.456, | |
| "eval_steps_per_second": 0.189, | |
| "step": 7032 | |
| }, | |
| { | |
| "epoch": 8.0773606370876, | |
| "grad_norm": 3.9632034301757812, | |
| "learning_rate": 9.708237986270023e-06, | |
| "loss": 1.5591, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 8.19112627986348, | |
| "grad_norm": 6.473509311676025, | |
| "learning_rate": 9.136155606407324e-06, | |
| "loss": 1.5121, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 8.304891922639364, | |
| "grad_norm": 4.013639450073242, | |
| "learning_rate": 8.564073226544623e-06, | |
| "loss": 1.5342, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 8.418657565415245, | |
| "grad_norm": 6.20673942565918, | |
| "learning_rate": 7.991990846681922e-06, | |
| "loss": 1.536, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 8.532423208191126, | |
| "grad_norm": 3.9642581939697266, | |
| "learning_rate": 7.419908466819222e-06, | |
| "loss": 1.5527, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 8.646188850967008, | |
| "grad_norm": 4.914712429046631, | |
| "learning_rate": 6.847826086956521e-06, | |
| "loss": 1.5377, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 8.759954493742889, | |
| "grad_norm": 3.1526217460632324, | |
| "learning_rate": 6.275743707093822e-06, | |
| "loss": 1.5192, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 8.873720136518772, | |
| "grad_norm": 4.830049991607666, | |
| "learning_rate": 5.7036613272311215e-06, | |
| "loss": 1.5166, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 8.987485779294653, | |
| "grad_norm": 5.3716020584106445, | |
| "learning_rate": 5.131578947368421e-06, | |
| "loss": 1.5268, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 128.39, | |
| "eval_loss": 1.7820578813552856, | |
| "eval_rouge1": 47.0113, | |
| "eval_rouge2": 23.0256, | |
| "eval_rougeL": 31.9372, | |
| "eval_rougeLsum": 42.0294, | |
| "eval_runtime": 68.0652, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.191, | |
| "step": 7911 | |
| }, | |
| { | |
| "epoch": 9.101251422070535, | |
| "grad_norm": 5.304577350616455, | |
| "learning_rate": 4.559496567505721e-06, | |
| "loss": 1.5266, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 9.215017064846416, | |
| "grad_norm": 41.307308197021484, | |
| "learning_rate": 3.987414187643021e-06, | |
| "loss": 1.5269, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 9.328782707622299, | |
| "grad_norm": 3.451307773590088, | |
| "learning_rate": 3.4153318077803206e-06, | |
| "loss": 1.5135, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 9.44254835039818, | |
| "grad_norm": 3.4578561782836914, | |
| "learning_rate": 2.8432494279176204e-06, | |
| "loss": 1.5204, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 9.556313993174061, | |
| "grad_norm": 4.3368096351623535, | |
| "learning_rate": 2.2711670480549198e-06, | |
| "loss": 1.5315, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 9.670079635949943, | |
| "grad_norm": 4.57019567489624, | |
| "learning_rate": 1.6990846681922198e-06, | |
| "loss": 1.5206, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 9.783845278725824, | |
| "grad_norm": 4.861795425415039, | |
| "learning_rate": 1.1270022883295195e-06, | |
| "loss": 1.5193, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 9.897610921501707, | |
| "grad_norm": 3.933220863342285, | |
| "learning_rate": 5.549199084668192e-07, | |
| "loss": 1.5362, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 128.35, | |
| "eval_loss": 1.7812010049819946, | |
| "eval_rouge1": 46.8688, | |
| "eval_rouge2": 23.0889, | |
| "eval_rougeL": 31.9785, | |
| "eval_rougeLsum": 41.911, | |
| "eval_runtime": 68.0802, | |
| "eval_samples_per_second": 1.469, | |
| "eval_steps_per_second": 0.191, | |
| "step": 8790 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 8790, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.031293483188224e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |