| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.789693392831438, |
| "global_step": 34000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.994471638353009e-05, |
| "loss": 1.1578, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.98871292830406e-05, |
| "loss": 1.1739, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.982954218255111e-05, |
| "loss": 1.1411, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.977195508206162e-05, |
| "loss": 1.1673, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.971436798157213e-05, |
| "loss": 1.1968, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.965678088108264e-05, |
| "loss": 1.195, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9599193780593148e-05, |
| "loss": 1.1281, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.954218255110855e-05, |
| "loss": 1.182, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.9484595450619064e-05, |
| "loss": 1.1541, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.9427008350129574e-05, |
| "loss": 1.1361, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.936942124964008e-05, |
| "loss": 1.2445, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.9311834149150593e-05, |
| "loss": 1.2288, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.9254247048661102e-05, |
| "loss": 1.2621, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.9196659948171612e-05, |
| "loss": 1.1978, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.913907284768212e-05, |
| "loss": 1.176, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.908148574719263e-05, |
| "loss": 1.165, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.902389864670314e-05, |
| "loss": 1.1823, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.896631154621365e-05, |
| "loss": 1.212, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 1.890872444572416e-05, |
| "loss": 1.2491, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 1.885113734523467e-05, |
| "loss": 1.1902, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 1.8794126115750072e-05, |
| "loss": 1.22, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 1.8736539015260585e-05, |
| "loss": 1.1474, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 1.867895191477109e-05, |
| "loss": 1.1868, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.86213648142816e-05, |
| "loss": 1.2253, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 1.8563777713792114e-05, |
| "loss": 1.2124, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 1.850619061330262e-05, |
| "loss": 1.1693, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 1.8448603512813133e-05, |
| "loss": 1.16, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8391016412323642e-05, |
| "loss": 1.1936, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.833342931183415e-05, |
| "loss": 1.215, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.827584221134466e-05, |
| "loss": 1.1846, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 1.821825511085517e-05, |
| "loss": 1.1746, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 1.8160668010365677e-05, |
| "loss": 1.1871, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.810308090987619e-05, |
| "loss": 1.1507, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.80454938093867e-05, |
| "loss": 1.2005, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_gen_len": 60.5187, |
| "eval_loss": 1.355536699295044, |
| "eval_rouge1": 39.5211, |
| "eval_rouge2": 20.7719, |
| "eval_rougeL": 30.7229, |
| "eval_rougeLsum": 36.5645, |
| "eval_runtime": 1874.0573, |
| "eval_samples_per_second": 1.857, |
| "eval_steps_per_second": 0.928, |
| "step": 3473 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 1.798790670889721e-05, |
| "loss": 1.0801, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.793031960840772e-05, |
| "loss": 0.8483, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.7872732507918228e-05, |
| "loss": 0.875, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.7815145407428738e-05, |
| "loss": 0.8482, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 1.7757558306939247e-05, |
| "loss": 0.8639, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 1.7699971206449757e-05, |
| "loss": 0.8351, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.7642384105960266e-05, |
| "loss": 0.8661, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.7584797005470776e-05, |
| "loss": 0.9151, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 1.7527209904981285e-05, |
| "loss": 0.8422, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 1.7469622804491795e-05, |
| "loss": 0.9115, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.7412035704002304e-05, |
| "loss": 0.8703, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 1.7354448603512814e-05, |
| "loss": 0.8565, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.7296861503023323e-05, |
| "loss": 0.8967, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.7239274402533833e-05, |
| "loss": 0.8496, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 1.7181687302044342e-05, |
| "loss": 0.9083, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 1.7124100201554855e-05, |
| "loss": 0.8918, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.706651310106536e-05, |
| "loss": 0.8788, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 1.700892600057587e-05, |
| "loss": 0.8718, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 1.6951338900086384e-05, |
| "loss": 0.8925, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.689375179959689e-05, |
| "loss": 0.8785, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 1.6836164699107403e-05, |
| "loss": 0.889, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.6778577598617912e-05, |
| "loss": 0.9017, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.672099049812842e-05, |
| "loss": 0.9152, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.666340339763893e-05, |
| "loss": 0.9084, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 1.660581629714944e-05, |
| "loss": 0.9019, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.6548229196659947e-05, |
| "loss": 0.9162, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 1.649064209617046e-05, |
| "loss": 0.8894, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.643305499568097e-05, |
| "loss": 0.9131, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.637546789519148e-05, |
| "loss": 0.9156, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.6318456665706882e-05, |
| "loss": 0.8944, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.6260869565217392e-05, |
| "loss": 0.8816, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.62032824647279e-05, |
| "loss": 0.8735, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.614569536423841e-05, |
| "loss": 0.935, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.608810826374892e-05, |
| "loss": 0.8962, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.603052116325943e-05, |
| "loss": 0.9181, |
| "step": 6900 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_gen_len": 59.9184, |
| "eval_loss": 1.3943339586257935, |
| "eval_rouge1": 39.5279, |
| "eval_rouge2": 20.2797, |
| "eval_rougeL": 30.5951, |
| "eval_rougeLsum": 36.6789, |
| "eval_runtime": 1843.6748, |
| "eval_samples_per_second": 1.888, |
| "eval_steps_per_second": 0.944, |
| "step": 6946 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.597293406276994e-05, |
| "loss": 0.7443, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5915346962280452e-05, |
| "loss": 0.5957, |
| "step": 7100 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.585775986179096e-05, |
| "loss": 0.6329, |
| "step": 7200 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.5800172761301468e-05, |
| "loss": 0.6256, |
| "step": 7300 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.574258566081198e-05, |
| "loss": 0.6239, |
| "step": 7400 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.5684998560322487e-05, |
| "loss": 0.6247, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.5627411459833e-05, |
| "loss": 0.6203, |
| "step": 7600 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.556982435934351e-05, |
| "loss": 0.6359, |
| "step": 7700 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.5512237258854016e-05, |
| "loss": 0.6419, |
| "step": 7800 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.545465015836453e-05, |
| "loss": 0.6646, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.5397063057875038e-05, |
| "loss": 0.662, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.5339475957385548e-05, |
| "loss": 0.6209, |
| "step": 8100 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.5281888856896057e-05, |
| "loss": 0.6836, |
| "step": 8200 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.5224301756406565e-05, |
| "loss": 0.6497, |
| "step": 8300 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.5166714655917076e-05, |
| "loss": 0.6458, |
| "step": 8400 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.5109127555427586e-05, |
| "loss": 0.6631, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 1.5051540454938097e-05, |
| "loss": 0.6804, |
| "step": 8600 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 1.49945292254535e-05, |
| "loss": 0.6577, |
| "step": 8700 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 1.4936942124964008e-05, |
| "loss": 0.6427, |
| "step": 8800 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.487935502447452e-05, |
| "loss": 0.6792, |
| "step": 8900 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 1.4821767923985029e-05, |
| "loss": 0.6669, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 1.4764180823495537e-05, |
| "loss": 0.6682, |
| "step": 9100 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 1.4706593723006048e-05, |
| "loss": 0.6558, |
| "step": 9200 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 1.4649006622516557e-05, |
| "loss": 0.636, |
| "step": 9300 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 1.4591419522027069e-05, |
| "loss": 0.6373, |
| "step": 9400 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 1.4534408292542472e-05, |
| "loss": 0.633, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 1.4476821192052982e-05, |
| "loss": 0.7137, |
| "step": 9600 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 1.441923409156349e-05, |
| "loss": 0.6387, |
| "step": 9700 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.4361646991074e-05, |
| "loss": 0.6408, |
| "step": 9800 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.430405989058451e-05, |
| "loss": 0.6785, |
| "step": 9900 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.424647279009502e-05, |
| "loss": 0.6866, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.418888568960553e-05, |
| "loss": 0.6642, |
| "step": 10100 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.4131298589116039e-05, |
| "loss": 0.6602, |
| "step": 10200 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 1.4073711488626548e-05, |
| "loss": 0.6597, |
| "step": 10300 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.4016124388137058e-05, |
| "loss": 0.6889, |
| "step": 10400 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_gen_len": 60.1934, |
| "eval_loss": 1.4120277166366577, |
| "eval_rouge1": 39.6648, |
| "eval_rouge2": 20.1994, |
| "eval_rougeL": 30.7884, |
| "eval_rougeLsum": 36.5151, |
| "eval_runtime": 1868.7911, |
| "eval_samples_per_second": 1.862, |
| "eval_steps_per_second": 0.931, |
| "step": 10419 |
| }, |
| { |
| "epoch": 3.02, |
| "learning_rate": 1.3958537287647569e-05, |
| "loss": 0.4742, |
| "step": 10500 |
| }, |
| { |
| "epoch": 3.05, |
| "learning_rate": 1.3900950187158077e-05, |
| "loss": 0.4733, |
| "step": 10600 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 1.3843363086668586e-05, |
| "loss": 0.4536, |
| "step": 10700 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 1.3785775986179098e-05, |
| "loss": 0.4418, |
| "step": 10800 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 1.3728188885689605e-05, |
| "loss": 0.4492, |
| "step": 10900 |
| }, |
| { |
| "epoch": 3.17, |
| "learning_rate": 1.3670601785200117e-05, |
| "loss": 0.4429, |
| "step": 11000 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 1.3613014684710626e-05, |
| "loss": 0.4516, |
| "step": 11100 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 1.3555427584221134e-05, |
| "loss": 0.4559, |
| "step": 11200 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 1.3497840483731645e-05, |
| "loss": 0.4701, |
| "step": 11300 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 1.3440253383242155e-05, |
| "loss": 0.4586, |
| "step": 11400 |
| }, |
| { |
| "epoch": 3.31, |
| "learning_rate": 1.3382666282752666e-05, |
| "loss": 0.487, |
| "step": 11500 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 1.3325079182263174e-05, |
| "loss": 0.4554, |
| "step": 11600 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 1.3267492081773683e-05, |
| "loss": 0.4505, |
| "step": 11700 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 1.3209904981284194e-05, |
| "loss": 0.4586, |
| "step": 11800 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 1.3152317880794702e-05, |
| "loss": 0.4697, |
| "step": 11900 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 1.3094730780305214e-05, |
| "loss": 0.4477, |
| "step": 12000 |
| }, |
| { |
| "epoch": 3.48, |
| "learning_rate": 1.3037143679815723e-05, |
| "loss": 0.4631, |
| "step": 12100 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 1.2979556579326231e-05, |
| "loss": 0.4827, |
| "step": 12200 |
| }, |
| { |
| "epoch": 3.54, |
| "learning_rate": 1.2921969478836742e-05, |
| "loss": 0.4524, |
| "step": 12300 |
| }, |
| { |
| "epoch": 3.57, |
| "learning_rate": 1.2864382378347252e-05, |
| "loss": 0.484, |
| "step": 12400 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 1.2806795277857763e-05, |
| "loss": 0.4786, |
| "step": 12500 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 1.274920817736827e-05, |
| "loss": 0.4699, |
| "step": 12600 |
| }, |
| { |
| "epoch": 3.66, |
| "learning_rate": 1.269162107687878e-05, |
| "loss": 0.4754, |
| "step": 12700 |
| }, |
| { |
| "epoch": 3.69, |
| "learning_rate": 1.2634033976389291e-05, |
| "loss": 0.4851, |
| "step": 12800 |
| }, |
| { |
| "epoch": 3.71, |
| "learning_rate": 1.25764468758998e-05, |
| "loss": 0.4556, |
| "step": 12900 |
| }, |
| { |
| "epoch": 3.74, |
| "learning_rate": 1.2519435646415203e-05, |
| "loss": 0.4796, |
| "step": 13000 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 1.2461848545925714e-05, |
| "loss": 0.4667, |
| "step": 13100 |
| }, |
| { |
| "epoch": 3.8, |
| "learning_rate": 1.2404261445436223e-05, |
| "loss": 0.4604, |
| "step": 13200 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 1.2346674344946731e-05, |
| "loss": 0.4585, |
| "step": 13300 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 1.2289087244457242e-05, |
| "loss": 0.4569, |
| "step": 13400 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 1.2231500143967752e-05, |
| "loss": 0.481, |
| "step": 13500 |
| }, |
| { |
| "epoch": 3.92, |
| "learning_rate": 1.2173913043478263e-05, |
| "loss": 0.4908, |
| "step": 13600 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 1.2116325942988771e-05, |
| "loss": 0.503, |
| "step": 13700 |
| }, |
| { |
| "epoch": 3.97, |
| "learning_rate": 1.205873884249928e-05, |
| "loss": 0.5112, |
| "step": 13800 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_gen_len": 60.4152, |
| "eval_loss": 1.665522575378418, |
| "eval_rouge1": 39.8086, |
| "eval_rouge2": 19.9927, |
| "eval_rougeL": 30.635, |
| "eval_rougeLsum": 36.8366, |
| "eval_runtime": 1876.2894, |
| "eval_samples_per_second": 1.855, |
| "eval_steps_per_second": 0.927, |
| "step": 13892 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 1.2001151742009792e-05, |
| "loss": 0.4853, |
| "step": 13900 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 1.19435646415203e-05, |
| "loss": 0.316, |
| "step": 14000 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 1.188597754103081e-05, |
| "loss": 0.3047, |
| "step": 14100 |
| }, |
| { |
| "epoch": 4.09, |
| "learning_rate": 1.182839044054132e-05, |
| "loss": 0.2988, |
| "step": 14200 |
| }, |
| { |
| "epoch": 4.12, |
| "learning_rate": 1.1770803340051828e-05, |
| "loss": 0.3301, |
| "step": 14300 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 1.1713792110567235e-05, |
| "loss": 0.3166, |
| "step": 14400 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 1.1656205010077743e-05, |
| "loss": 0.3229, |
| "step": 14500 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 1.1598617909588252e-05, |
| "loss": 0.3222, |
| "step": 14600 |
| }, |
| { |
| "epoch": 4.23, |
| "learning_rate": 1.1541030809098764e-05, |
| "loss": 0.3276, |
| "step": 14700 |
| }, |
| { |
| "epoch": 4.26, |
| "learning_rate": 1.1483443708609271e-05, |
| "loss": 0.3232, |
| "step": 14800 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 1.1425856608119783e-05, |
| "loss": 0.3192, |
| "step": 14900 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 1.1368269507630292e-05, |
| "loss": 0.334, |
| "step": 15000 |
| }, |
| { |
| "epoch": 4.35, |
| "learning_rate": 1.13106824071408e-05, |
| "loss": 0.3217, |
| "step": 15100 |
| }, |
| { |
| "epoch": 4.38, |
| "learning_rate": 1.1253095306651311e-05, |
| "loss": 0.3363, |
| "step": 15200 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 1.119550820616182e-05, |
| "loss": 0.336, |
| "step": 15300 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.1137921105672332e-05, |
| "loss": 0.3283, |
| "step": 15400 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 1.108033400518284e-05, |
| "loss": 0.323, |
| "step": 15500 |
| }, |
| { |
| "epoch": 4.49, |
| "learning_rate": 1.102274690469335e-05, |
| "loss": 0.3122, |
| "step": 15600 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 1.096515980420386e-05, |
| "loss": 0.3443, |
| "step": 15700 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.0907572703714368e-05, |
| "loss": 0.329, |
| "step": 15800 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 1.084998560322488e-05, |
| "loss": 0.3214, |
| "step": 15900 |
| }, |
| { |
| "epoch": 4.61, |
| "learning_rate": 1.0792398502735389e-05, |
| "loss": 0.3444, |
| "step": 16000 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 1.0734811402245897e-05, |
| "loss": 0.3389, |
| "step": 16100 |
| }, |
| { |
| "epoch": 4.66, |
| "learning_rate": 1.0677224301756408e-05, |
| "loss": 0.3404, |
| "step": 16200 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 1.0619637201266918e-05, |
| "loss": 0.3405, |
| "step": 16300 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 1.0562050100777425e-05, |
| "loss": 0.324, |
| "step": 16400 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 1.0504463000287937e-05, |
| "loss": 0.3242, |
| "step": 16500 |
| }, |
| { |
| "epoch": 4.78, |
| "learning_rate": 1.0446875899798446e-05, |
| "loss": 0.3236, |
| "step": 16600 |
| }, |
| { |
| "epoch": 4.81, |
| "learning_rate": 1.0389288799308956e-05, |
| "loss": 0.3301, |
| "step": 16700 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 1.0331701698819465e-05, |
| "loss": 0.3501, |
| "step": 16800 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 1.0274114598329975e-05, |
| "loss": 0.3405, |
| "step": 16900 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1.0216527497840484e-05, |
| "loss": 0.3388, |
| "step": 17000 |
| }, |
| { |
| "epoch": 4.92, |
| "learning_rate": 1.0158940397350994e-05, |
| "loss": 0.3389, |
| "step": 17100 |
| }, |
| { |
| "epoch": 4.95, |
| "learning_rate": 1.0101353296861505e-05, |
| "loss": 0.3563, |
| "step": 17200 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 1.0043766196372013e-05, |
| "loss": 0.3554, |
| "step": 17300 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_gen_len": 59.7468, |
| "eval_loss": 1.7994598150253296, |
| "eval_rouge1": 40.9514, |
| "eval_rouge2": 20.4703, |
| "eval_rougeL": 31.7911, |
| "eval_rougeLsum": 37.7023, |
| "eval_runtime": 1848.9202, |
| "eval_samples_per_second": 1.882, |
| "eval_steps_per_second": 0.941, |
| "step": 17365 |
| }, |
| { |
| "epoch": 5.01, |
| "learning_rate": 9.986179095882524e-06, |
| "loss": 0.2886, |
| "step": 17400 |
| }, |
| { |
| "epoch": 5.04, |
| "learning_rate": 9.928591995393033e-06, |
| "loss": 0.2131, |
| "step": 17500 |
| }, |
| { |
| "epoch": 5.07, |
| "learning_rate": 9.871004894903541e-06, |
| "loss": 0.2163, |
| "step": 17600 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 9.813417794414052e-06, |
| "loss": 0.2184, |
| "step": 17700 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 9.755830693924562e-06, |
| "loss": 0.2381, |
| "step": 17800 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 9.698243593435071e-06, |
| "loss": 0.2174, |
| "step": 17900 |
| }, |
| { |
| "epoch": 5.18, |
| "learning_rate": 9.640656492945581e-06, |
| "loss": 0.2226, |
| "step": 18000 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 9.58306939245609e-06, |
| "loss": 0.2383, |
| "step": 18100 |
| }, |
| { |
| "epoch": 5.24, |
| "learning_rate": 9.5254822919666e-06, |
| "loss": 0.2366, |
| "step": 18200 |
| }, |
| { |
| "epoch": 5.27, |
| "learning_rate": 9.46789519147711e-06, |
| "loss": 0.2206, |
| "step": 18300 |
| }, |
| { |
| "epoch": 5.3, |
| "learning_rate": 9.410308090987619e-06, |
| "loss": 0.2302, |
| "step": 18400 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 9.352720990498129e-06, |
| "loss": 0.2204, |
| "step": 18500 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 9.295133890008638e-06, |
| "loss": 0.2414, |
| "step": 18600 |
| }, |
| { |
| "epoch": 5.38, |
| "learning_rate": 9.237546789519148e-06, |
| "loss": 0.2326, |
| "step": 18700 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 9.179959689029659e-06, |
| "loss": 0.2387, |
| "step": 18800 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 9.122372588540168e-06, |
| "loss": 0.2247, |
| "step": 18900 |
| }, |
| { |
| "epoch": 5.47, |
| "learning_rate": 9.064785488050676e-06, |
| "loss": 0.2306, |
| "step": 19000 |
| }, |
| { |
| "epoch": 5.5, |
| "learning_rate": 9.007198387561187e-06, |
| "loss": 0.2456, |
| "step": 19100 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 8.949611287071697e-06, |
| "loss": 0.2187, |
| "step": 19200 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 8.892024186582206e-06, |
| "loss": 0.2425, |
| "step": 19300 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 8.834437086092716e-06, |
| "loss": 0.2229, |
| "step": 19400 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 8.776849985603225e-06, |
| "loss": 0.2296, |
| "step": 19500 |
| }, |
| { |
| "epoch": 5.64, |
| "learning_rate": 8.71983875611863e-06, |
| "loss": 0.2354, |
| "step": 19600 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 8.66225165562914e-06, |
| "loss": 0.2258, |
| "step": 19700 |
| }, |
| { |
| "epoch": 5.7, |
| "learning_rate": 8.60466455513965e-06, |
| "loss": 0.2307, |
| "step": 19800 |
| }, |
| { |
| "epoch": 5.73, |
| "learning_rate": 8.54707745465016e-06, |
| "loss": 0.2257, |
| "step": 19900 |
| }, |
| { |
| "epoch": 5.76, |
| "learning_rate": 8.489490354160669e-06, |
| "loss": 0.2335, |
| "step": 20000 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 8.431903253671178e-06, |
| "loss": 0.2433, |
| "step": 20100 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 8.374316153181688e-06, |
| "loss": 0.2349, |
| "step": 20200 |
| }, |
| { |
| "epoch": 5.84, |
| "learning_rate": 8.316729052692197e-06, |
| "loss": 0.2372, |
| "step": 20300 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 8.259141952202707e-06, |
| "loss": 0.2479, |
| "step": 20400 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 8.201554851713216e-06, |
| "loss": 0.232, |
| "step": 20500 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 8.143967751223728e-06, |
| "loss": 0.2373, |
| "step": 20600 |
| }, |
| { |
| "epoch": 5.96, |
| "learning_rate": 8.086380650734235e-06, |
| "loss": 0.2175, |
| "step": 20700 |
| }, |
| { |
| "epoch": 5.99, |
| "learning_rate": 8.028793550244745e-06, |
| "loss": 0.2414, |
| "step": 20800 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_gen_len": 59.9523, |
| "eval_loss": 2.0208189487457275, |
| "eval_rouge1": 40.7013, |
| "eval_rouge2": 20.2748, |
| "eval_rougeL": 31.3653, |
| "eval_rougeLsum": 37.5646, |
| "eval_runtime": 1863.4517, |
| "eval_samples_per_second": 1.868, |
| "eval_steps_per_second": 0.934, |
| "step": 20838 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 7.971206449755256e-06, |
| "loss": 0.1724, |
| "step": 20900 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 7.913619349265766e-06, |
| "loss": 0.155, |
| "step": 21000 |
| }, |
| { |
| "epoch": 6.08, |
| "learning_rate": 7.856032248776275e-06, |
| "loss": 0.1492, |
| "step": 21100 |
| }, |
| { |
| "epoch": 6.1, |
| "learning_rate": 7.798445148286785e-06, |
| "loss": 0.1561, |
| "step": 21200 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 7.740858047797294e-06, |
| "loss": 0.1561, |
| "step": 21300 |
| }, |
| { |
| "epoch": 6.16, |
| "learning_rate": 7.683270947307804e-06, |
| "loss": 0.1572, |
| "step": 21400 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 7.625683846818314e-06, |
| "loss": 0.1755, |
| "step": 21500 |
| }, |
| { |
| "epoch": 6.22, |
| "learning_rate": 7.568096746328823e-06, |
| "loss": 0.1659, |
| "step": 21600 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 7.510509645839332e-06, |
| "loss": 0.1652, |
| "step": 21700 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 7.452922545349843e-06, |
| "loss": 0.1605, |
| "step": 21800 |
| }, |
| { |
| "epoch": 6.31, |
| "learning_rate": 7.395335444860352e-06, |
| "loss": 0.1558, |
| "step": 21900 |
| }, |
| { |
| "epoch": 6.33, |
| "learning_rate": 7.337748344370862e-06, |
| "loss": 0.1591, |
| "step": 22000 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 7.28016124388137e-06, |
| "loss": 0.1502, |
| "step": 22100 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 7.222574143391881e-06, |
| "loss": 0.1598, |
| "step": 22200 |
| }, |
| { |
| "epoch": 6.42, |
| "learning_rate": 7.16498704290239e-06, |
| "loss": 0.1536, |
| "step": 22300 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 7.1073999424129005e-06, |
| "loss": 0.1501, |
| "step": 22400 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 7.04981284192341e-06, |
| "loss": 0.1487, |
| "step": 22500 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 6.9928016124388144e-06, |
| "loss": 0.1563, |
| "step": 22600 |
| }, |
| { |
| "epoch": 6.54, |
| "learning_rate": 6.935214511949324e-06, |
| "loss": 0.1536, |
| "step": 22700 |
| }, |
| { |
| "epoch": 6.56, |
| "learning_rate": 6.877627411459834e-06, |
| "loss": 0.164, |
| "step": 22800 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 6.820040310970343e-06, |
| "loss": 0.1676, |
| "step": 22900 |
| }, |
| { |
| "epoch": 6.62, |
| "learning_rate": 6.7624532104808525e-06, |
| "loss": 0.1513, |
| "step": 23000 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 6.704866109991363e-06, |
| "loss": 0.1566, |
| "step": 23100 |
| }, |
| { |
| "epoch": 6.68, |
| "learning_rate": 6.647279009501872e-06, |
| "loss": 0.1511, |
| "step": 23200 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 6.589691909012382e-06, |
| "loss": 0.1608, |
| "step": 23300 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 6.532104808522891e-06, |
| "loss": 0.1633, |
| "step": 23400 |
| }, |
| { |
| "epoch": 6.77, |
| "learning_rate": 6.474517708033401e-06, |
| "loss": 0.1588, |
| "step": 23500 |
| }, |
| { |
| "epoch": 6.8, |
| "learning_rate": 6.4169306075439104e-06, |
| "loss": 0.1597, |
| "step": 23600 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 6.359343507054421e-06, |
| "loss": 0.1654, |
| "step": 23700 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 6.3017564065649295e-06, |
| "loss": 0.1615, |
| "step": 23800 |
| }, |
| { |
| "epoch": 6.88, |
| "learning_rate": 6.244169306075439e-06, |
| "loss": 0.1612, |
| "step": 23900 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 6.186582205585949e-06, |
| "loss": 0.1532, |
| "step": 24000 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 6.128995105096459e-06, |
| "loss": 0.148, |
| "step": 24100 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 6.071408004606969e-06, |
| "loss": 0.1671, |
| "step": 24200 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 6.013820904117478e-06, |
| "loss": 0.1529, |
| "step": 24300 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_gen_len": 60.129, |
| "eval_loss": 2.3271801471710205, |
| "eval_rouge1": 40.6267, |
| "eval_rouge2": 19.9817, |
| "eval_rougeL": 31.2612, |
| "eval_rougeLsum": 37.1969, |
| "eval_runtime": 1881.9513, |
| "eval_samples_per_second": 1.849, |
| "eval_steps_per_second": 0.925, |
| "step": 24311 |
| }, |
| { |
| "epoch": 7.03, |
| "learning_rate": 5.956233803627987e-06, |
| "loss": 0.1123, |
| "step": 24400 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 5.898646703138498e-06, |
| "loss": 0.1085, |
| "step": 24500 |
| }, |
| { |
| "epoch": 7.08, |
| "learning_rate": 5.841059602649007e-06, |
| "loss": 0.1008, |
| "step": 24600 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 5.783472502159518e-06, |
| "loss": 0.1167, |
| "step": 24700 |
| }, |
| { |
| "epoch": 7.14, |
| "learning_rate": 5.725885401670026e-06, |
| "loss": 0.1039, |
| "step": 24800 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 5.668298301180536e-06, |
| "loss": 0.1102, |
| "step": 24900 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 5.610711200691046e-06, |
| "loss": 0.1127, |
| "step": 25000 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 5.55369997120645e-06, |
| "loss": 0.1149, |
| "step": 25100 |
| }, |
| { |
| "epoch": 7.26, |
| "learning_rate": 5.49611287071696e-06, |
| "loss": 0.1194, |
| "step": 25200 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 5.43852577022747e-06, |
| "loss": 0.106, |
| "step": 25300 |
| }, |
| { |
| "epoch": 7.31, |
| "learning_rate": 5.380938669737979e-06, |
| "loss": 0.1123, |
| "step": 25400 |
| }, |
| { |
| "epoch": 7.34, |
| "learning_rate": 5.323351569248489e-06, |
| "loss": 0.1158, |
| "step": 25500 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 5.265764468758998e-06, |
| "loss": 0.1069, |
| "step": 25600 |
| }, |
| { |
| "epoch": 7.4, |
| "learning_rate": 5.208177368269508e-06, |
| "loss": 0.109, |
| "step": 25700 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 5.150590267780018e-06, |
| "loss": 0.1188, |
| "step": 25800 |
| }, |
| { |
| "epoch": 7.46, |
| "learning_rate": 5.0930031672905276e-06, |
| "loss": 0.1077, |
| "step": 25900 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 5.035416066801036e-06, |
| "loss": 0.1108, |
| "step": 26000 |
| }, |
| { |
| "epoch": 7.52, |
| "learning_rate": 4.977828966311547e-06, |
| "loss": 0.1136, |
| "step": 26100 |
| }, |
| { |
| "epoch": 7.54, |
| "learning_rate": 4.920817736826951e-06, |
| "loss": 0.1054, |
| "step": 26200 |
| }, |
| { |
| "epoch": 7.57, |
| "learning_rate": 4.863230636337461e-06, |
| "loss": 0.1104, |
| "step": 26300 |
| }, |
| { |
| "epoch": 7.6, |
| "learning_rate": 4.80564353584797e-06, |
| "loss": 0.1079, |
| "step": 26400 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 4.74805643535848e-06, |
| "loss": 0.1047, |
| "step": 26500 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 4.690469334868989e-06, |
| "loss": 0.1083, |
| "step": 26600 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 4.632882234379499e-06, |
| "loss": 0.1065, |
| "step": 26700 |
| }, |
| { |
| "epoch": 7.72, |
| "learning_rate": 4.575295133890009e-06, |
| "loss": 0.1095, |
| "step": 26800 |
| }, |
| { |
| "epoch": 7.75, |
| "learning_rate": 4.5177080334005185e-06, |
| "loss": 0.1056, |
| "step": 26900 |
| }, |
| { |
| "epoch": 7.77, |
| "learning_rate": 4.460120932911029e-06, |
| "loss": 0.1052, |
| "step": 27000 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 4.4025338324215375e-06, |
| "loss": 0.1107, |
| "step": 27100 |
| }, |
| { |
| "epoch": 7.83, |
| "learning_rate": 4.344946731932048e-06, |
| "loss": 0.1049, |
| "step": 27200 |
| }, |
| { |
| "epoch": 7.86, |
| "learning_rate": 4.287359631442557e-06, |
| "loss": 0.102, |
| "step": 27300 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 4.229772530953067e-06, |
| "loss": 0.1156, |
| "step": 27400 |
| }, |
| { |
| "epoch": 7.92, |
| "learning_rate": 4.172185430463576e-06, |
| "loss": 0.0907, |
| "step": 27500 |
| }, |
| { |
| "epoch": 7.95, |
| "learning_rate": 4.114598329974086e-06, |
| "loss": 0.1052, |
| "step": 27600 |
| }, |
| { |
| "epoch": 7.98, |
| "learning_rate": 4.057011229484596e-06, |
| "loss": 0.1236, |
| "step": 27700 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_gen_len": 59.8247, |
| "eval_loss": 2.426095485687256, |
| "eval_rouge1": 41.8276, |
| "eval_rouge2": 20.7036, |
| "eval_rougeL": 32.3998, |
| "eval_rougeLsum": 38.5374, |
| "eval_runtime": 1864.0588, |
| "eval_samples_per_second": 1.867, |
| "eval_steps_per_second": 0.933, |
| "step": 27784 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 3.999424128995105e-06, |
| "loss": 0.1053, |
| "step": 27800 |
| }, |
| { |
| "epoch": 8.03, |
| "learning_rate": 3.941837028505615e-06, |
| "loss": 0.0783, |
| "step": 27900 |
| }, |
| { |
| "epoch": 8.06, |
| "learning_rate": 3.884249928016125e-06, |
| "loss": 0.0753, |
| "step": 28000 |
| }, |
| { |
| "epoch": 8.09, |
| "learning_rate": 3.826662827526634e-06, |
| "loss": 0.0759, |
| "step": 28100 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 3.7690757270371443e-06, |
| "loss": 0.0737, |
| "step": 28200 |
| }, |
| { |
| "epoch": 8.15, |
| "learning_rate": 3.7114886265476534e-06, |
| "loss": 0.0733, |
| "step": 28300 |
| }, |
| { |
| "epoch": 8.18, |
| "learning_rate": 3.6539015260581633e-06, |
| "loss": 0.0771, |
| "step": 28400 |
| }, |
| { |
| "epoch": 8.21, |
| "learning_rate": 3.596314425568673e-06, |
| "loss": 0.081, |
| "step": 28500 |
| }, |
| { |
| "epoch": 8.23, |
| "learning_rate": 3.5387273250791828e-06, |
| "loss": 0.0757, |
| "step": 28600 |
| }, |
| { |
| "epoch": 8.26, |
| "learning_rate": 3.481140224589692e-06, |
| "loss": 0.0692, |
| "step": 28700 |
| }, |
| { |
| "epoch": 8.29, |
| "learning_rate": 3.4241289951050967e-06, |
| "loss": 0.0683, |
| "step": 28800 |
| }, |
| { |
| "epoch": 8.32, |
| "learning_rate": 3.3665418946156066e-06, |
| "loss": 0.074, |
| "step": 28900 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 3.3089547941261157e-06, |
| "loss": 0.0728, |
| "step": 29000 |
| }, |
| { |
| "epoch": 8.38, |
| "learning_rate": 3.2513676936366256e-06, |
| "loss": 0.0734, |
| "step": 29100 |
| }, |
| { |
| "epoch": 8.41, |
| "learning_rate": 3.1937805931471356e-06, |
| "loss": 0.0707, |
| "step": 29200 |
| }, |
| { |
| "epoch": 8.44, |
| "learning_rate": 3.1361934926576447e-06, |
| "loss": 0.0755, |
| "step": 29300 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 3.0786063921681546e-06, |
| "loss": 0.08, |
| "step": 29400 |
| }, |
| { |
| "epoch": 8.49, |
| "learning_rate": 3.021019291678664e-06, |
| "loss": 0.075, |
| "step": 29500 |
| }, |
| { |
| "epoch": 8.52, |
| "learning_rate": 2.963432191189174e-06, |
| "loss": 0.0709, |
| "step": 29600 |
| }, |
| { |
| "epoch": 8.55, |
| "learning_rate": 2.905845090699683e-06, |
| "loss": 0.0749, |
| "step": 29700 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 2.848257990210193e-06, |
| "loss": 0.0707, |
| "step": 29800 |
| }, |
| { |
| "epoch": 8.61, |
| "learning_rate": 2.790670889720703e-06, |
| "loss": 0.0748, |
| "step": 29900 |
| }, |
| { |
| "epoch": 8.64, |
| "learning_rate": 2.733083789231212e-06, |
| "loss": 0.0708, |
| "step": 30000 |
| }, |
| { |
| "epoch": 8.67, |
| "learning_rate": 2.675496688741722e-06, |
| "loss": 0.0784, |
| "step": 30100 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 2.6179095882522316e-06, |
| "loss": 0.069, |
| "step": 30200 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 2.5603224877627415e-06, |
| "loss": 0.0766, |
| "step": 30300 |
| }, |
| { |
| "epoch": 8.75, |
| "learning_rate": 2.5027353872732515e-06, |
| "loss": 0.0809, |
| "step": 30400 |
| }, |
| { |
| "epoch": 8.78, |
| "learning_rate": 2.4451482867837606e-06, |
| "loss": 0.0739, |
| "step": 30500 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 2.38756118629427e-06, |
| "loss": 0.0765, |
| "step": 30600 |
| }, |
| { |
| "epoch": 8.84, |
| "learning_rate": 2.3299740858047796e-06, |
| "loss": 0.0728, |
| "step": 30700 |
| }, |
| { |
| "epoch": 8.87, |
| "learning_rate": 2.2723869853152895e-06, |
| "loss": 0.0787, |
| "step": 30800 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 2.2147998848257995e-06, |
| "loss": 0.0847, |
| "step": 30900 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 2.157212784336309e-06, |
| "loss": 0.0791, |
| "step": 31000 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 2.0996256838468185e-06, |
| "loss": 0.0644, |
| "step": 31100 |
| }, |
| { |
| "epoch": 8.98, |
| "learning_rate": 2.042038583357328e-06, |
| "loss": 0.078, |
| "step": 31200 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_gen_len": 59.9862, |
| "eval_loss": 2.554534673690796, |
| "eval_rouge1": 41.439, |
| "eval_rouge2": 20.612, |
| "eval_rougeL": 32.0749, |
| "eval_rougeLsum": 38.1027, |
| "eval_runtime": 1862.3443, |
| "eval_samples_per_second": 1.869, |
| "eval_steps_per_second": 0.934, |
| "step": 31257 |
| }, |
| { |
| "epoch": 9.01, |
| "learning_rate": 1.9844514828678375e-06, |
| "loss": 0.0672, |
| "step": 31300 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 1.9268643823783475e-06, |
| "loss": 0.0623, |
| "step": 31400 |
| }, |
| { |
| "epoch": 9.07, |
| "learning_rate": 1.8692772818888572e-06, |
| "loss": 0.0516, |
| "step": 31500 |
| }, |
| { |
| "epoch": 9.1, |
| "learning_rate": 1.8116901813993667e-06, |
| "loss": 0.054, |
| "step": 31600 |
| }, |
| { |
| "epoch": 9.13, |
| "learning_rate": 1.7541030809098765e-06, |
| "loss": 0.0478, |
| "step": 31700 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 1.696515980420386e-06, |
| "loss": 0.0573, |
| "step": 31800 |
| }, |
| { |
| "epoch": 9.19, |
| "learning_rate": 1.6389288799308955e-06, |
| "loss": 0.0518, |
| "step": 31900 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 1.5813417794414052e-06, |
| "loss": 0.0531, |
| "step": 32000 |
| }, |
| { |
| "epoch": 9.24, |
| "learning_rate": 1.523754678951915e-06, |
| "loss": 0.0471, |
| "step": 32100 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 1.4661675784624247e-06, |
| "loss": 0.0488, |
| "step": 32200 |
| }, |
| { |
| "epoch": 9.3, |
| "learning_rate": 1.4085804779729342e-06, |
| "loss": 0.0477, |
| "step": 32300 |
| }, |
| { |
| "epoch": 9.33, |
| "learning_rate": 1.350993377483444e-06, |
| "loss": 0.0561, |
| "step": 32400 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 1.2934062769939534e-06, |
| "loss": 0.0563, |
| "step": 32500 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 1.235819176504463e-06, |
| "loss": 0.0486, |
| "step": 32600 |
| }, |
| { |
| "epoch": 9.42, |
| "learning_rate": 1.1782320760149727e-06, |
| "loss": 0.0568, |
| "step": 32700 |
| }, |
| { |
| "epoch": 9.44, |
| "learning_rate": 1.1206449755254824e-06, |
| "loss": 0.0504, |
| "step": 32800 |
| }, |
| { |
| "epoch": 9.47, |
| "learning_rate": 1.063057875035992e-06, |
| "loss": 0.0556, |
| "step": 32900 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 1.0054707745465017e-06, |
| "loss": 0.0525, |
| "step": 33000 |
| }, |
| { |
| "epoch": 9.53, |
| "learning_rate": 9.490354160668011e-07, |
| "loss": 0.057, |
| "step": 33100 |
| }, |
| { |
| "epoch": 9.56, |
| "learning_rate": 8.914483155773107e-07, |
| "loss": 0.0508, |
| "step": 33200 |
| }, |
| { |
| "epoch": 9.59, |
| "learning_rate": 8.338612150878205e-07, |
| "loss": 0.0489, |
| "step": 33300 |
| }, |
| { |
| "epoch": 9.62, |
| "learning_rate": 7.762741145983301e-07, |
| "loss": 0.0559, |
| "step": 33400 |
| }, |
| { |
| "epoch": 9.65, |
| "learning_rate": 7.186870141088396e-07, |
| "loss": 0.0545, |
| "step": 33500 |
| }, |
| { |
| "epoch": 9.67, |
| "learning_rate": 6.610999136193493e-07, |
| "loss": 0.0519, |
| "step": 33600 |
| }, |
| { |
| "epoch": 9.7, |
| "learning_rate": 6.03512813129859e-07, |
| "loss": 0.0478, |
| "step": 33700 |
| }, |
| { |
| "epoch": 9.73, |
| "learning_rate": 5.459257126403686e-07, |
| "loss": 0.0508, |
| "step": 33800 |
| }, |
| { |
| "epoch": 9.76, |
| "learning_rate": 4.883386121508782e-07, |
| "loss": 0.0523, |
| "step": 33900 |
| }, |
| { |
| "epoch": 9.79, |
| "learning_rate": 4.307515116613879e-07, |
| "loss": 0.0491, |
| "step": 34000 |
| } |
| ], |
| "max_steps": 34730, |
| "num_train_epochs": 10, |
| "total_flos": 8.56161528236114e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|