| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.78494623655914, | |
| "global_step": 13850, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.964157706093191e-06, | |
| "loss": 2.9056, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.928315412186381e-06, | |
| "loss": 2.7058, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.89247311827957e-06, | |
| "loss": 2.6284, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.85663082437276e-06, | |
| "loss": 2.6307, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.32030987739563, | |
| "eval_rouge1": 0.32786127224568706, | |
| "eval_rouge2": 0.19018098494575836, | |
| "eval_rougeL": 0.2859655333642799, | |
| "eval_rougeLsum": 0.2934811385373408, | |
| "eval_runtime": 123.7208, | |
| "eval_samples_per_second": 3.758, | |
| "eval_steps_per_second": 0.946, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.8207885304659505e-06, | |
| "loss": 2.5668, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 4.78494623655914e-06, | |
| "loss": 2.5604, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 4.74910394265233e-06, | |
| "loss": 2.5143, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.71326164874552e-06, | |
| "loss": 2.4849, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 4.67741935483871e-06, | |
| "loss": 2.5315, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.240959644317627, | |
| "eval_rouge1": 0.3420570918954662, | |
| "eval_rouge2": 0.2060604177908008, | |
| "eval_rougeL": 0.3043927804421642, | |
| "eval_rougeLsum": 0.31155659054764084, | |
| "eval_runtime": 120.8082, | |
| "eval_samples_per_second": 3.849, | |
| "eval_steps_per_second": 0.968, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 4.6415770609319e-06, | |
| "loss": 2.4132, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 4.60573476702509e-06, | |
| "loss": 2.4667, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.56989247311828e-06, | |
| "loss": 2.4937, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.5340501792114695e-06, | |
| "loss": 2.4156, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.1922943592071533, | |
| "eval_rouge1": 0.34994978899734663, | |
| "eval_rouge2": 0.21185951083528443, | |
| "eval_rougeL": 0.31353678054844464, | |
| "eval_rougeLsum": 0.3205520009823804, | |
| "eval_runtime": 128.6775, | |
| "eval_samples_per_second": 3.614, | |
| "eval_steps_per_second": 0.909, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 4.49820788530466e-06, | |
| "loss": 2.3945, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 4.46236559139785e-06, | |
| "loss": 2.4332, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 4.42652329749104e-06, | |
| "loss": 2.3549, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 4.39068100358423e-06, | |
| "loss": 2.3908, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 4.35483870967742e-06, | |
| "loss": 2.3961, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 2.15604567527771, | |
| "eval_rouge1": 0.35503098444416914, | |
| "eval_rouge2": 0.21345222912541117, | |
| "eval_rougeL": 0.3183951453816757, | |
| "eval_rougeLsum": 0.3245199945586198, | |
| "eval_runtime": 120.7681, | |
| "eval_samples_per_second": 3.85, | |
| "eval_steps_per_second": 0.969, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 4.31899641577061e-06, | |
| "loss": 2.3369, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 4.2831541218638e-06, | |
| "loss": 2.3171, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 4.2473118279569895e-06, | |
| "loss": 2.3699, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 4.211469534050179e-06, | |
| "loss": 2.3445, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 4.17562724014337e-06, | |
| "loss": 2.3595, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 2.128229856491089, | |
| "eval_rouge1": 0.35828380214110583, | |
| "eval_rouge2": 0.2154697652390767, | |
| "eval_rougeL": 0.32071886194683874, | |
| "eval_rougeLsum": 0.326832484203238, | |
| "eval_runtime": 121.0509, | |
| "eval_samples_per_second": 3.841, | |
| "eval_steps_per_second": 0.967, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 4.139784946236559e-06, | |
| "loss": 2.2672, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.103942652329749e-06, | |
| "loss": 2.2787, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.0681003584229395e-06, | |
| "loss": 2.2953, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 4.032258064516129e-06, | |
| "loss": 2.3433, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 2.1061463356018066, | |
| "eval_rouge1": 0.3602290098399774, | |
| "eval_rouge2": 0.21720084816876015, | |
| "eval_rougeL": 0.321761715180238, | |
| "eval_rougeLsum": 0.3282393870616483, | |
| "eval_runtime": 120.1834, | |
| "eval_samples_per_second": 3.869, | |
| "eval_steps_per_second": 0.974, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 3.996415770609319e-06, | |
| "loss": 2.2877, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 3.960573476702509e-06, | |
| "loss": 2.245, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 3.924731182795699e-06, | |
| "loss": 2.3009, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 2.2201, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 3.853046594982079e-06, | |
| "loss": 2.2686, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 2.085155487060547, | |
| "eval_rouge1": 0.36126040343454235, | |
| "eval_rouge2": 0.21547432955733056, | |
| "eval_rougeL": 0.3220760761874921, | |
| "eval_rougeLsum": 0.3290219607741144, | |
| "eval_runtime": 123.1351, | |
| "eval_samples_per_second": 3.776, | |
| "eval_steps_per_second": 0.95, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 3.817204301075269e-06, | |
| "loss": 2.2865, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.7813620071684594e-06, | |
| "loss": 2.2324, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 3.7455197132616487e-06, | |
| "loss": 2.1958, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 3.7096774193548392e-06, | |
| "loss": 2.247, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 3.6738351254480293e-06, | |
| "loss": 2.1938, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 2.067925214767456, | |
| "eval_rouge1": 0.3693846075750796, | |
| "eval_rouge2": 0.21744552490349583, | |
| "eval_rougeL": 0.33008227975421506, | |
| "eval_rougeLsum": 0.3366887788509312, | |
| "eval_runtime": 122.9004, | |
| "eval_samples_per_second": 3.784, | |
| "eval_steps_per_second": 0.952, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 3.637992831541219e-06, | |
| "loss": 2.2624, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 3.602150537634409e-06, | |
| "loss": 2.1922, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 3.5663082437275988e-06, | |
| "loss": 2.1876, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 3.530465949820789e-06, | |
| "loss": 2.2504, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 2.0534462928771973, | |
| "eval_rouge1": 0.3699290195093533, | |
| "eval_rouge2": 0.21788826762887392, | |
| "eval_rougeL": 0.3292248508798441, | |
| "eval_rougeLsum": 0.3357683304940864, | |
| "eval_runtime": 120.1177, | |
| "eval_samples_per_second": 3.871, | |
| "eval_steps_per_second": 0.974, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 9.03, | |
| "learning_rate": 3.494623655913979e-06, | |
| "loss": 2.1819, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 3.4587813620071686e-06, | |
| "loss": 2.1755, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 3.4229390681003587e-06, | |
| "loss": 2.2398, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 3.3870967741935484e-06, | |
| "loss": 2.0979, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 3.3512544802867385e-06, | |
| "loss": 2.2245, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 2.0405704975128174, | |
| "eval_rouge1": 0.36982395276855895, | |
| "eval_rouge2": 0.21943719038802345, | |
| "eval_rougeL": 0.32884583320308947, | |
| "eval_rougeLsum": 0.3352203622134837, | |
| "eval_runtime": 120.0171, | |
| "eval_samples_per_second": 3.874, | |
| "eval_steps_per_second": 0.975, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 3.3154121863799286e-06, | |
| "loss": 2.1732, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 10.32, | |
| "learning_rate": 3.2795698924731183e-06, | |
| "loss": 2.1806, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 3.2437275985663088e-06, | |
| "loss": 2.1288, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 3.207885304659498e-06, | |
| "loss": 2.1696, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 3.1720430107526885e-06, | |
| "loss": 2.2101, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 2.0279901027679443, | |
| "eval_rouge1": 0.3734606050199662, | |
| "eval_rouge2": 0.21830269894292892, | |
| "eval_rougeL": 0.33251263790168706, | |
| "eval_rougeLsum": 0.338738236025534, | |
| "eval_runtime": 123.6661, | |
| "eval_samples_per_second": 3.76, | |
| "eval_steps_per_second": 0.946, | |
| "step": 5115 | |
| }, | |
| { | |
| "epoch": 11.18, | |
| "learning_rate": 3.1362007168458786e-06, | |
| "loss": 2.184, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 3.1003584229390683e-06, | |
| "loss": 2.1577, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 11.61, | |
| "learning_rate": 3.0645161290322584e-06, | |
| "loss": 2.1454, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 3.0286738351254485e-06, | |
| "loss": 2.1668, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 2.019313335418701, | |
| "eval_rouge1": 0.3712265009866553, | |
| "eval_rouge2": 0.2205852840916728, | |
| "eval_rougeL": 0.33092303917646704, | |
| "eval_rougeLsum": 0.3368955402181135, | |
| "eval_runtime": 123.2988, | |
| "eval_samples_per_second": 3.771, | |
| "eval_steps_per_second": 0.949, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 12.04, | |
| "learning_rate": 2.992831541218638e-06, | |
| "loss": 2.1253, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 12.26, | |
| "learning_rate": 2.9569892473118283e-06, | |
| "loss": 2.1418, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "learning_rate": 2.921146953405018e-06, | |
| "loss": 2.1566, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 2.885304659498208e-06, | |
| "loss": 2.0878, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 2.849462365591398e-06, | |
| "loss": 2.1043, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 2.0104010105133057, | |
| "eval_rouge1": 0.37304680542883845, | |
| "eval_rouge2": 0.21933710465212664, | |
| "eval_rougeL": 0.3330043221849929, | |
| "eval_rougeLsum": 0.3386272891983395, | |
| "eval_runtime": 122.5914, | |
| "eval_samples_per_second": 3.793, | |
| "eval_steps_per_second": 0.954, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 2.813620071684588e-06, | |
| "loss": 2.1112, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 2.1108, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "learning_rate": 2.7419354838709676e-06, | |
| "loss": 2.0588, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "learning_rate": 2.706093189964158e-06, | |
| "loss": 2.1374, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 2.670250896057348e-06, | |
| "loss": 2.1105, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 2.0007288455963135, | |
| "eval_rouge1": 0.3726819527146452, | |
| "eval_rouge2": 0.21762553499708942, | |
| "eval_rougeL": 0.3318504683532579, | |
| "eval_rougeLsum": 0.33850204873356216, | |
| "eval_runtime": 121.8594, | |
| "eval_samples_per_second": 3.816, | |
| "eval_steps_per_second": 0.96, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "learning_rate": 2.634408602150538e-06, | |
| "loss": 2.0344, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 14.41, | |
| "learning_rate": 2.598566308243728e-06, | |
| "loss": 2.132, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 2.5627240143369176e-06, | |
| "loss": 2.0991, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "learning_rate": 2.5268817204301077e-06, | |
| "loss": 2.1107, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.9940224885940552, | |
| "eval_rouge1": 0.3706106479406549, | |
| "eval_rouge2": 0.21664139194802018, | |
| "eval_rougeL": 0.3308301392858899, | |
| "eval_rougeLsum": 0.33710889659653737, | |
| "eval_runtime": 122.4456, | |
| "eval_samples_per_second": 3.798, | |
| "eval_steps_per_second": 0.956, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 2.4910394265232974e-06, | |
| "loss": 2.0848, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 15.27, | |
| "learning_rate": 2.455197132616488e-06, | |
| "loss": 2.0577, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "learning_rate": 2.4193548387096776e-06, | |
| "loss": 2.1239, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 2.3835125448028677e-06, | |
| "loss": 2.0806, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 15.91, | |
| "learning_rate": 2.3476702508960574e-06, | |
| "loss": 2.0414, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.9866129159927368, | |
| "eval_rouge1": 0.37225097720253997, | |
| "eval_rouge2": 0.21677008547624982, | |
| "eval_rougeL": 0.33223185563868907, | |
| "eval_rougeLsum": 0.3386777282958562, | |
| "eval_runtime": 121.6974, | |
| "eval_samples_per_second": 3.821, | |
| "eval_steps_per_second": 0.961, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 16.13, | |
| "learning_rate": 2.3118279569892475e-06, | |
| "loss": 2.1197, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "learning_rate": 2.2759856630824376e-06, | |
| "loss": 2.0408, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 16.56, | |
| "learning_rate": 2.2401433691756277e-06, | |
| "loss": 2.1108, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "learning_rate": 2.2043010752688173e-06, | |
| "loss": 2.0352, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 2.1684587813620074e-06, | |
| "loss": 2.0967, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.9800302982330322, | |
| "eval_rouge1": 0.3727733319379002, | |
| "eval_rouge2": 0.21755161592237982, | |
| "eval_rougeL": 0.3335382496389028, | |
| "eval_rougeLsum": 0.33953249788570733, | |
| "eval_runtime": 121.0282, | |
| "eval_samples_per_second": 3.842, | |
| "eval_steps_per_second": 0.967, | |
| "step": 7905 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "learning_rate": 2.132616487455197e-06, | |
| "loss": 1.9761, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 2.096774193548387e-06, | |
| "loss": 2.1014, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 17.63, | |
| "learning_rate": 2.0609318996415773e-06, | |
| "loss": 2.0746, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "learning_rate": 2.025089605734767e-06, | |
| "loss": 2.0348, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.9747109413146973, | |
| "eval_rouge1": 0.37309307818896603, | |
| "eval_rouge2": 0.21910694351175308, | |
| "eval_rougeL": 0.3335975489248736, | |
| "eval_rougeLsum": 0.3397532162946465, | |
| "eval_runtime": 121.7383, | |
| "eval_samples_per_second": 3.82, | |
| "eval_steps_per_second": 0.961, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 18.06, | |
| "learning_rate": 1.989247311827957e-06, | |
| "loss": 2.0409, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 18.28, | |
| "learning_rate": 1.953405017921147e-06, | |
| "loss": 2.0045, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 18.49, | |
| "learning_rate": 1.9175627240143373e-06, | |
| "loss": 2.0677, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 18.71, | |
| "learning_rate": 1.881720430107527e-06, | |
| "loss": 2.0458, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "learning_rate": 1.845878136200717e-06, | |
| "loss": 2.0386, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.9697682857513428, | |
| "eval_rouge1": 0.37437363802915335, | |
| "eval_rouge2": 0.22043247557418233, | |
| "eval_rougeL": 0.3341597802534484, | |
| "eval_rougeLsum": 0.34031460529982227, | |
| "eval_runtime": 126.0543, | |
| "eval_samples_per_second": 3.689, | |
| "eval_steps_per_second": 0.928, | |
| "step": 8835 | |
| }, | |
| { | |
| "epoch": 19.14, | |
| "learning_rate": 1.810035842293907e-06, | |
| "loss": 2.0449, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 1.774193548387097e-06, | |
| "loss": 2.0316, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "learning_rate": 1.7383512544802869e-06, | |
| "loss": 2.0353, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 1.7025089605734768e-06, | |
| "loss": 2.0114, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 2.0455, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.9655319452285767, | |
| "eval_rouge1": 0.3742240497073518, | |
| "eval_rouge2": 0.22064343083294777, | |
| "eval_rougeL": 0.33460487470233025, | |
| "eval_rougeLsum": 0.340864806277791, | |
| "eval_runtime": 125.1186, | |
| "eval_samples_per_second": 3.716, | |
| "eval_steps_per_second": 0.935, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 20.22, | |
| "learning_rate": 1.630824372759857e-06, | |
| "loss": 2.0064, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 20.43, | |
| "learning_rate": 1.5949820788530469e-06, | |
| "loss": 2.0474, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 1.5591397849462367e-06, | |
| "loss": 2.0682, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "learning_rate": 1.5232974910394266e-06, | |
| "loss": 1.9697, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 1.9609689712524414, | |
| "eval_rouge1": 0.3746966258892264, | |
| "eval_rouge2": 0.21941379395076405, | |
| "eval_rougeL": 0.3355003953460309, | |
| "eval_rougeLsum": 0.34172515637886314, | |
| "eval_runtime": 128.3527, | |
| "eval_samples_per_second": 3.623, | |
| "eval_steps_per_second": 0.912, | |
| "step": 9765 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 1.4874551971326165e-06, | |
| "loss": 2.0284, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "learning_rate": 1.4516129032258066e-06, | |
| "loss": 2.073, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 21.51, | |
| "learning_rate": 1.4157706093189965e-06, | |
| "loss": 2.0222, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 21.72, | |
| "learning_rate": 1.3799283154121864e-06, | |
| "loss": 2.0401, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 21.94, | |
| "learning_rate": 1.3440860215053765e-06, | |
| "loss": 2.0002, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 1.957392692565918, | |
| "eval_rouge1": 0.37462712554849187, | |
| "eval_rouge2": 0.21992028420969706, | |
| "eval_rougeL": 0.33560886975550064, | |
| "eval_rougeLsum": 0.34162973068900326, | |
| "eval_runtime": 122.27, | |
| "eval_samples_per_second": 3.803, | |
| "eval_steps_per_second": 0.957, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 22.15, | |
| "learning_rate": 1.3082437275985666e-06, | |
| "loss": 1.9805, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 22.37, | |
| "learning_rate": 1.2724014336917565e-06, | |
| "loss": 2.0016, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 22.58, | |
| "learning_rate": 1.2365591397849463e-06, | |
| "loss": 2.0184, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "learning_rate": 1.2007168458781362e-06, | |
| "loss": 2.0238, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 1.9541218280792236, | |
| "eval_rouge1": 0.37477500267736275, | |
| "eval_rouge2": 0.21863545453296632, | |
| "eval_rougeL": 0.3354338958099685, | |
| "eval_rougeLsum": 0.3414596818505722, | |
| "eval_runtime": 120.9729, | |
| "eval_samples_per_second": 3.844, | |
| "eval_steps_per_second": 0.967, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 23.01, | |
| "learning_rate": 1.1648745519713263e-06, | |
| "loss": 2.0737, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 1.1290322580645162e-06, | |
| "loss": 2.013, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 23.44, | |
| "learning_rate": 1.0931899641577063e-06, | |
| "loss": 1.9844, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 23.66, | |
| "learning_rate": 1.0573476702508962e-06, | |
| "loss": 2.014, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 23.87, | |
| "learning_rate": 1.021505376344086e-06, | |
| "loss": 2.0421, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 1.9520260095596313, | |
| "eval_rouge1": 0.3771021997183883, | |
| "eval_rouge2": 0.21990144851690735, | |
| "eval_rougeL": 0.3374010954787996, | |
| "eval_rougeLsum": 0.3439051210524349, | |
| "eval_runtime": 119.9413, | |
| "eval_samples_per_second": 3.877, | |
| "eval_steps_per_second": 0.975, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 24.09, | |
| "learning_rate": 9.856630824372762e-07, | |
| "loss": 1.9993, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 9.498207885304659e-07, | |
| "loss": 2.0148, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 24.52, | |
| "learning_rate": 9.13978494623656e-07, | |
| "loss": 2.0094, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 24.73, | |
| "learning_rate": 8.781362007168459e-07, | |
| "loss": 2.0395, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 8.422939068100359e-07, | |
| "loss": 2.0001, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 1.9496192932128906, | |
| "eval_rouge1": 0.3773377632907566, | |
| "eval_rouge2": 0.2204940276436899, | |
| "eval_rougeL": 0.33747034227999706, | |
| "eval_rougeLsum": 0.34393368298115334, | |
| "eval_runtime": 120.2791, | |
| "eval_samples_per_second": 3.866, | |
| "eval_steps_per_second": 0.973, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 8.064516129032258e-07, | |
| "loss": 2.0302, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 25.38, | |
| "learning_rate": 7.706093189964159e-07, | |
| "loss": 2.045, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 25.59, | |
| "learning_rate": 7.347670250896058e-07, | |
| "loss": 1.9614, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 25.81, | |
| "learning_rate": 6.989247311827957e-07, | |
| "loss": 1.93, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 1.9484126567840576, | |
| "eval_rouge1": 0.37597520520383987, | |
| "eval_rouge2": 0.21986201392787488, | |
| "eval_rougeL": 0.3357415231000408, | |
| "eval_rougeLsum": 0.34205147158317384, | |
| "eval_runtime": 121.7783, | |
| "eval_samples_per_second": 3.818, | |
| "eval_steps_per_second": 0.961, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 26.02, | |
| "learning_rate": 6.630824372759858e-07, | |
| "loss": 2.0524, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 26.24, | |
| "learning_rate": 6.272401433691756e-07, | |
| "loss": 1.9644, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 5.913978494623656e-07, | |
| "loss": 2.0117, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.9847, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "learning_rate": 5.197132616487455e-07, | |
| "loss": 2.0453, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 1.9469475746154785, | |
| "eval_rouge1": 0.3740165446355651, | |
| "eval_rouge2": 0.21772737050061053, | |
| "eval_rougeL": 0.333510117519976, | |
| "eval_rougeLsum": 0.34028741836853005, | |
| "eval_runtime": 119.9633, | |
| "eval_samples_per_second": 3.876, | |
| "eval_steps_per_second": 0.975, | |
| "step": 12555 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "learning_rate": 4.838709677419355e-07, | |
| "loss": 2.0035, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 27.31, | |
| "learning_rate": 4.480286738351255e-07, | |
| "loss": 1.9638, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 27.53, | |
| "learning_rate": 4.1218637992831543e-07, | |
| "loss": 1.9471, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 27.74, | |
| "learning_rate": 3.763440860215054e-07, | |
| "loss": 2.0266, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "learning_rate": 3.405017921146954e-07, | |
| "loss": 1.9894, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_loss": 1.9458811283111572, | |
| "eval_rouge1": 0.3742795595221444, | |
| "eval_rouge2": 0.21805194369064132, | |
| "eval_rougeL": 0.3337992892424988, | |
| "eval_rougeLsum": 0.34075989492689374, | |
| "eval_runtime": 125.2596, | |
| "eval_samples_per_second": 3.712, | |
| "eval_steps_per_second": 0.934, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 28.17, | |
| "learning_rate": 3.0465949820788535e-07, | |
| "loss": 1.9882, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 2.688172043010753e-07, | |
| "loss": 2.0092, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 28.6, | |
| "learning_rate": 2.3297491039426527e-07, | |
| "loss": 2.0301, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 28.82, | |
| "learning_rate": 1.971326164874552e-07, | |
| "loss": 2.0282, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_loss": 1.9454594850540161, | |
| "eval_rouge1": 0.3743638040177606, | |
| "eval_rouge2": 0.21796734115768185, | |
| "eval_rougeL": 0.33380358105146785, | |
| "eval_rougeLsum": 0.3407996691800308, | |
| "eval_runtime": 123.622, | |
| "eval_samples_per_second": 3.761, | |
| "eval_steps_per_second": 0.946, | |
| "step": 13485 | |
| }, | |
| { | |
| "epoch": 29.03, | |
| "learning_rate": 1.6129032258064518e-07, | |
| "loss": 1.9566, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "learning_rate": 1.2544802867383514e-07, | |
| "loss": 1.9519, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 29.46, | |
| "learning_rate": 8.960573476702509e-08, | |
| "loss": 1.9502, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 29.68, | |
| "learning_rate": 5.376344086021506e-08, | |
| "loss": 2.0183, | |
| "step": 13800 | |
| } | |
| ], | |
| "max_steps": 13950, | |
| "num_train_epochs": 30, | |
| "total_flos": 2196944996493312.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |