{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.78494623655914, "global_step": 13850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "learning_rate": 4.964157706093191e-06, "loss": 2.9056, "step": 100 }, { "epoch": 0.43, "learning_rate": 4.928315412186381e-06, "loss": 2.7058, "step": 200 }, { "epoch": 0.65, "learning_rate": 4.89247311827957e-06, "loss": 2.6284, "step": 300 }, { "epoch": 0.86, "learning_rate": 4.85663082437276e-06, "loss": 2.6307, "step": 400 }, { "epoch": 1.0, "eval_loss": 2.32030987739563, "eval_rouge1": 0.32786127224568706, "eval_rouge2": 0.19018098494575836, "eval_rougeL": 0.2859655333642799, "eval_rougeLsum": 0.2934811385373408, "eval_runtime": 123.7208, "eval_samples_per_second": 3.758, "eval_steps_per_second": 0.946, "step": 465 }, { "epoch": 1.08, "learning_rate": 4.8207885304659505e-06, "loss": 2.5668, "step": 500 }, { "epoch": 1.29, "learning_rate": 4.78494623655914e-06, "loss": 2.5604, "step": 600 }, { "epoch": 1.51, "learning_rate": 4.74910394265233e-06, "loss": 2.5143, "step": 700 }, { "epoch": 1.72, "learning_rate": 4.71326164874552e-06, "loss": 2.4849, "step": 800 }, { "epoch": 1.94, "learning_rate": 4.67741935483871e-06, "loss": 2.5315, "step": 900 }, { "epoch": 2.0, "eval_loss": 2.240959644317627, "eval_rouge1": 0.3420570918954662, "eval_rouge2": 0.2060604177908008, "eval_rougeL": 0.3043927804421642, "eval_rougeLsum": 0.31155659054764084, "eval_runtime": 120.8082, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.968, "step": 930 }, { "epoch": 2.15, "learning_rate": 4.6415770609319e-06, "loss": 2.4132, "step": 1000 }, { "epoch": 2.37, "learning_rate": 4.60573476702509e-06, "loss": 2.4667, "step": 1100 }, { "epoch": 2.58, "learning_rate": 4.56989247311828e-06, "loss": 2.4937, "step": 1200 }, { "epoch": 2.8, "learning_rate": 4.5340501792114695e-06, "loss": 2.4156, "step": 1300 }, { "epoch": 3.0, "eval_loss": 2.1922943592071533, "eval_rouge1": 0.34994978899734663, "eval_rouge2": 0.21185951083528443, "eval_rougeL": 0.31353678054844464, "eval_rougeLsum": 0.3205520009823804, "eval_runtime": 128.6775, "eval_samples_per_second": 3.614, "eval_steps_per_second": 0.909, "step": 1395 }, { "epoch": 3.01, "learning_rate": 4.49820788530466e-06, "loss": 2.3945, "step": 1400 }, { "epoch": 3.23, "learning_rate": 4.46236559139785e-06, "loss": 2.4332, "step": 1500 }, { "epoch": 3.44, "learning_rate": 4.42652329749104e-06, "loss": 2.3549, "step": 1600 }, { "epoch": 3.66, "learning_rate": 4.39068100358423e-06, "loss": 2.3908, "step": 1700 }, { "epoch": 3.87, "learning_rate": 4.35483870967742e-06, "loss": 2.3961, "step": 1800 }, { "epoch": 4.0, "eval_loss": 2.15604567527771, "eval_rouge1": 0.35503098444416914, "eval_rouge2": 0.21345222912541117, "eval_rougeL": 0.3183951453816757, "eval_rougeLsum": 0.3245199945586198, "eval_runtime": 120.7681, "eval_samples_per_second": 3.85, "eval_steps_per_second": 0.969, "step": 1860 }, { "epoch": 4.09, "learning_rate": 4.31899641577061e-06, "loss": 2.3369, "step": 1900 }, { "epoch": 4.3, "learning_rate": 4.2831541218638e-06, "loss": 2.3171, "step": 2000 }, { "epoch": 4.52, "learning_rate": 4.2473118279569895e-06, "loss": 2.3699, "step": 2100 }, { "epoch": 4.73, "learning_rate": 4.211469534050179e-06, "loss": 2.3445, "step": 2200 }, { "epoch": 4.95, "learning_rate": 4.17562724014337e-06, "loss": 2.3595, "step": 2300 }, { "epoch": 5.0, "eval_loss": 2.128229856491089, "eval_rouge1": 0.35828380214110583, "eval_rouge2": 0.2154697652390767, "eval_rougeL": 0.32071886194683874, "eval_rougeLsum": 0.326832484203238, "eval_runtime": 121.0509, "eval_samples_per_second": 3.841, "eval_steps_per_second": 0.967, "step": 2325 }, { "epoch": 5.16, "learning_rate": 4.139784946236559e-06, "loss": 2.2672, "step": 2400 }, { "epoch": 5.38, "learning_rate": 4.103942652329749e-06, "loss": 2.2787, "step": 2500 }, { "epoch": 5.59, "learning_rate": 4.0681003584229395e-06, "loss": 2.2953, "step": 2600 }, { "epoch": 5.81, "learning_rate": 4.032258064516129e-06, "loss": 2.3433, "step": 2700 }, { "epoch": 6.0, "eval_loss": 2.1061463356018066, "eval_rouge1": 0.3602290098399774, "eval_rouge2": 0.21720084816876015, "eval_rougeL": 0.321761715180238, "eval_rougeLsum": 0.3282393870616483, "eval_runtime": 120.1834, "eval_samples_per_second": 3.869, "eval_steps_per_second": 0.974, "step": 2790 }, { "epoch": 6.02, "learning_rate": 3.996415770609319e-06, "loss": 2.2877, "step": 2800 }, { "epoch": 6.24, "learning_rate": 3.960573476702509e-06, "loss": 2.245, "step": 2900 }, { "epoch": 6.45, "learning_rate": 3.924731182795699e-06, "loss": 2.3009, "step": 3000 }, { "epoch": 6.67, "learning_rate": 3.88888888888889e-06, "loss": 2.2201, "step": 3100 }, { "epoch": 6.88, "learning_rate": 3.853046594982079e-06, "loss": 2.2686, "step": 3200 }, { "epoch": 7.0, "eval_loss": 2.085155487060547, "eval_rouge1": 0.36126040343454235, "eval_rouge2": 0.21547432955733056, "eval_rougeL": 0.3220760761874921, "eval_rougeLsum": 0.3290219607741144, "eval_runtime": 123.1351, "eval_samples_per_second": 3.776, "eval_steps_per_second": 0.95, "step": 3255 }, { "epoch": 7.1, "learning_rate": 3.817204301075269e-06, "loss": 2.2865, "step": 3300 }, { "epoch": 7.31, "learning_rate": 3.7813620071684594e-06, "loss": 2.2324, "step": 3400 }, { "epoch": 7.53, "learning_rate": 3.7455197132616487e-06, "loss": 2.1958, "step": 3500 }, { "epoch": 7.74, "learning_rate": 3.7096774193548392e-06, "loss": 2.247, "step": 3600 }, { "epoch": 7.96, "learning_rate": 3.6738351254480293e-06, "loss": 2.1938, "step": 3700 }, { "epoch": 8.0, "eval_loss": 2.067925214767456, "eval_rouge1": 0.3693846075750796, "eval_rouge2": 0.21744552490349583, "eval_rougeL": 0.33008227975421506, "eval_rougeLsum": 0.3366887788509312, "eval_runtime": 122.9004, "eval_samples_per_second": 3.784, "eval_steps_per_second": 0.952, "step": 3720 }, { "epoch": 8.17, "learning_rate": 3.637992831541219e-06, "loss": 2.2624, "step": 3800 }, { "epoch": 8.39, "learning_rate": 3.602150537634409e-06, "loss": 2.1922, "step": 3900 }, { "epoch": 8.6, "learning_rate": 3.5663082437275988e-06, "loss": 2.1876, "step": 4000 }, { "epoch": 8.82, "learning_rate": 3.530465949820789e-06, "loss": 2.2504, "step": 4100 }, { "epoch": 9.0, "eval_loss": 2.0534462928771973, "eval_rouge1": 0.3699290195093533, "eval_rouge2": 0.21788826762887392, "eval_rougeL": 0.3292248508798441, "eval_rougeLsum": 0.3357683304940864, "eval_runtime": 120.1177, "eval_samples_per_second": 3.871, "eval_steps_per_second": 0.974, "step": 4185 }, { "epoch": 9.03, "learning_rate": 3.494623655913979e-06, "loss": 2.1819, "step": 4200 }, { "epoch": 9.25, "learning_rate": 3.4587813620071686e-06, "loss": 2.1755, "step": 4300 }, { "epoch": 9.46, "learning_rate": 3.4229390681003587e-06, "loss": 2.2398, "step": 4400 }, { "epoch": 9.68, "learning_rate": 3.3870967741935484e-06, "loss": 2.0979, "step": 4500 }, { "epoch": 9.89, "learning_rate": 3.3512544802867385e-06, "loss": 2.2245, "step": 4600 }, { "epoch": 10.0, "eval_loss": 2.0405704975128174, "eval_rouge1": 0.36982395276855895, "eval_rouge2": 0.21943719038802345, "eval_rougeL": 0.32884583320308947, "eval_rougeLsum": 0.3352203622134837, "eval_runtime": 120.0171, "eval_samples_per_second": 3.874, "eval_steps_per_second": 0.975, "step": 4650 }, { "epoch": 10.11, "learning_rate": 3.3154121863799286e-06, "loss": 2.1732, "step": 4700 }, { "epoch": 10.32, "learning_rate": 3.2795698924731183e-06, "loss": 2.1806, "step": 4800 }, { "epoch": 10.54, "learning_rate": 3.2437275985663088e-06, "loss": 2.1288, "step": 4900 }, { "epoch": 10.75, "learning_rate": 3.207885304659498e-06, "loss": 2.1696, "step": 5000 }, { "epoch": 10.97, "learning_rate": 3.1720430107526885e-06, "loss": 2.2101, "step": 5100 }, { "epoch": 11.0, "eval_loss": 2.0279901027679443, "eval_rouge1": 0.3734606050199662, "eval_rouge2": 0.21830269894292892, "eval_rougeL": 0.33251263790168706, "eval_rougeLsum": 0.338738236025534, "eval_runtime": 123.6661, "eval_samples_per_second": 3.76, "eval_steps_per_second": 0.946, "step": 5115 }, { "epoch": 11.18, "learning_rate": 3.1362007168458786e-06, "loss": 2.184, "step": 5200 }, { "epoch": 11.4, "learning_rate": 3.1003584229390683e-06, "loss": 2.1577, "step": 5300 }, { "epoch": 11.61, "learning_rate": 3.0645161290322584e-06, "loss": 2.1454, "step": 5400 }, { "epoch": 11.83, "learning_rate": 3.0286738351254485e-06, "loss": 2.1668, "step": 5500 }, { "epoch": 12.0, "eval_loss": 2.019313335418701, "eval_rouge1": 0.3712265009866553, "eval_rouge2": 0.2205852840916728, "eval_rougeL": 0.33092303917646704, "eval_rougeLsum": 0.3368955402181135, "eval_runtime": 123.2988, "eval_samples_per_second": 3.771, "eval_steps_per_second": 0.949, "step": 5580 }, { "epoch": 12.04, "learning_rate": 2.992831541218638e-06, "loss": 2.1253, "step": 5600 }, { "epoch": 12.26, "learning_rate": 2.9569892473118283e-06, "loss": 2.1418, "step": 5700 }, { "epoch": 12.47, "learning_rate": 2.921146953405018e-06, "loss": 2.1566, "step": 5800 }, { "epoch": 12.69, "learning_rate": 2.885304659498208e-06, "loss": 2.0878, "step": 5900 }, { "epoch": 12.9, "learning_rate": 2.849462365591398e-06, "loss": 2.1043, "step": 6000 }, { "epoch": 13.0, "eval_loss": 2.0104010105133057, "eval_rouge1": 0.37304680542883845, "eval_rouge2": 0.21933710465212664, "eval_rougeL": 0.3330043221849929, "eval_rougeLsum": 0.3386272891983395, "eval_runtime": 122.5914, "eval_samples_per_second": 3.793, "eval_steps_per_second": 0.954, "step": 6045 }, { "epoch": 13.12, "learning_rate": 2.813620071684588e-06, "loss": 2.1112, "step": 6100 }, { "epoch": 13.33, "learning_rate": 2.7777777777777783e-06, "loss": 2.1108, "step": 6200 }, { "epoch": 13.55, "learning_rate": 2.7419354838709676e-06, "loss": 2.0588, "step": 6300 }, { "epoch": 13.76, "learning_rate": 2.706093189964158e-06, "loss": 2.1374, "step": 6400 }, { "epoch": 13.98, "learning_rate": 2.670250896057348e-06, "loss": 2.1105, "step": 6500 }, { "epoch": 14.0, "eval_loss": 2.0007288455963135, "eval_rouge1": 0.3726819527146452, "eval_rouge2": 0.21762553499708942, "eval_rougeL": 0.3318504683532579, "eval_rougeLsum": 0.33850204873356216, "eval_runtime": 121.8594, "eval_samples_per_second": 3.816, "eval_steps_per_second": 0.96, "step": 6510 }, { "epoch": 14.19, "learning_rate": 2.634408602150538e-06, "loss": 2.0344, "step": 6600 }, { "epoch": 14.41, "learning_rate": 2.598566308243728e-06, "loss": 2.132, "step": 6700 }, { "epoch": 14.62, "learning_rate": 2.5627240143369176e-06, "loss": 2.0991, "step": 6800 }, { "epoch": 14.84, "learning_rate": 2.5268817204301077e-06, "loss": 2.1107, "step": 6900 }, { "epoch": 15.0, "eval_loss": 1.9940224885940552, "eval_rouge1": 0.3706106479406549, "eval_rouge2": 0.21664139194802018, "eval_rougeL": 0.3308301392858899, "eval_rougeLsum": 0.33710889659653737, "eval_runtime": 122.4456, "eval_samples_per_second": 3.798, "eval_steps_per_second": 0.956, "step": 6975 }, { "epoch": 15.05, "learning_rate": 2.4910394265232974e-06, "loss": 2.0848, "step": 7000 }, { "epoch": 15.27, "learning_rate": 2.455197132616488e-06, "loss": 2.0577, "step": 7100 }, { "epoch": 15.48, "learning_rate": 2.4193548387096776e-06, "loss": 2.1239, "step": 7200 }, { "epoch": 15.7, "learning_rate": 2.3835125448028677e-06, "loss": 2.0806, "step": 7300 }, { "epoch": 15.91, "learning_rate": 2.3476702508960574e-06, "loss": 2.0414, "step": 7400 }, { "epoch": 16.0, "eval_loss": 1.9866129159927368, "eval_rouge1": 0.37225097720253997, "eval_rouge2": 0.21677008547624982, "eval_rougeL": 0.33223185563868907, "eval_rougeLsum": 0.3386777282958562, "eval_runtime": 121.6974, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.961, "step": 7440 }, { "epoch": 16.13, "learning_rate": 2.3118279569892475e-06, "loss": 2.1197, "step": 7500 }, { "epoch": 16.34, "learning_rate": 2.2759856630824376e-06, "loss": 2.0408, "step": 7600 }, { "epoch": 16.56, "learning_rate": 2.2401433691756277e-06, "loss": 2.1108, "step": 7700 }, { "epoch": 16.77, "learning_rate": 2.2043010752688173e-06, "loss": 2.0352, "step": 7800 }, { "epoch": 16.99, "learning_rate": 2.1684587813620074e-06, "loss": 2.0967, "step": 7900 }, { "epoch": 17.0, "eval_loss": 1.9800302982330322, "eval_rouge1": 0.3727733319379002, "eval_rouge2": 0.21755161592237982, "eval_rougeL": 0.3335382496389028, "eval_rougeLsum": 0.33953249788570733, "eval_runtime": 121.0282, "eval_samples_per_second": 3.842, "eval_steps_per_second": 0.967, "step": 7905 }, { "epoch": 17.2, "learning_rate": 2.132616487455197e-06, "loss": 1.9761, "step": 8000 }, { "epoch": 17.42, "learning_rate": 2.096774193548387e-06, "loss": 2.1014, "step": 8100 }, { "epoch": 17.63, "learning_rate": 2.0609318996415773e-06, "loss": 2.0746, "step": 8200 }, { "epoch": 17.85, "learning_rate": 2.025089605734767e-06, "loss": 2.0348, "step": 8300 }, { "epoch": 18.0, "eval_loss": 1.9747109413146973, "eval_rouge1": 0.37309307818896603, "eval_rouge2": 0.21910694351175308, "eval_rougeL": 0.3335975489248736, "eval_rougeLsum": 0.3397532162946465, "eval_runtime": 121.7383, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.961, "step": 8370 }, { "epoch": 18.06, "learning_rate": 1.989247311827957e-06, "loss": 2.0409, "step": 8400 }, { "epoch": 18.28, "learning_rate": 1.953405017921147e-06, "loss": 2.0045, "step": 8500 }, { "epoch": 18.49, "learning_rate": 1.9175627240143373e-06, "loss": 2.0677, "step": 8600 }, { "epoch": 18.71, "learning_rate": 1.881720430107527e-06, "loss": 2.0458, "step": 8700 }, { "epoch": 18.92, "learning_rate": 1.845878136200717e-06, "loss": 2.0386, "step": 8800 }, { "epoch": 19.0, "eval_loss": 1.9697682857513428, "eval_rouge1": 0.37437363802915335, "eval_rouge2": 0.22043247557418233, "eval_rougeL": 0.3341597802534484, "eval_rougeLsum": 0.34031460529982227, "eval_runtime": 126.0543, "eval_samples_per_second": 3.689, "eval_steps_per_second": 0.928, "step": 8835 }, { "epoch": 19.14, "learning_rate": 1.810035842293907e-06, "loss": 2.0449, "step": 8900 }, { "epoch": 19.35, "learning_rate": 1.774193548387097e-06, "loss": 2.0316, "step": 9000 }, { "epoch": 19.57, "learning_rate": 1.7383512544802869e-06, "loss": 2.0353, "step": 9100 }, { "epoch": 19.78, "learning_rate": 1.7025089605734768e-06, "loss": 2.0114, "step": 9200 }, { "epoch": 20.0, "learning_rate": 1.6666666666666667e-06, "loss": 2.0455, "step": 9300 }, { "epoch": 20.0, "eval_loss": 1.9655319452285767, "eval_rouge1": 0.3742240497073518, "eval_rouge2": 0.22064343083294777, "eval_rougeL": 0.33460487470233025, "eval_rougeLsum": 0.340864806277791, "eval_runtime": 125.1186, "eval_samples_per_second": 3.716, "eval_steps_per_second": 0.935, "step": 9300 }, { "epoch": 20.22, "learning_rate": 1.630824372759857e-06, "loss": 2.0064, "step": 9400 }, { "epoch": 20.43, "learning_rate": 1.5949820788530469e-06, "loss": 2.0474, "step": 9500 }, { "epoch": 20.65, "learning_rate": 1.5591397849462367e-06, "loss": 2.0682, "step": 9600 }, { "epoch": 20.86, "learning_rate": 1.5232974910394266e-06, "loss": 1.9697, "step": 9700 }, { "epoch": 21.0, "eval_loss": 1.9609689712524414, "eval_rouge1": 0.3746966258892264, "eval_rouge2": 0.21941379395076405, "eval_rougeL": 0.3355003953460309, "eval_rougeLsum": 0.34172515637886314, "eval_runtime": 128.3527, "eval_samples_per_second": 3.623, "eval_steps_per_second": 0.912, "step": 9765 }, { "epoch": 21.08, "learning_rate": 1.4874551971326165e-06, "loss": 2.0284, "step": 9800 }, { "epoch": 21.29, "learning_rate": 1.4516129032258066e-06, "loss": 2.073, "step": 9900 }, { "epoch": 21.51, "learning_rate": 1.4157706093189965e-06, "loss": 2.0222, "step": 10000 }, { "epoch": 21.72, "learning_rate": 1.3799283154121864e-06, "loss": 2.0401, "step": 10100 }, { "epoch": 21.94, "learning_rate": 1.3440860215053765e-06, "loss": 2.0002, "step": 10200 }, { "epoch": 22.0, "eval_loss": 1.957392692565918, "eval_rouge1": 0.37462712554849187, "eval_rouge2": 0.21992028420969706, "eval_rougeL": 0.33560886975550064, "eval_rougeLsum": 0.34162973068900326, "eval_runtime": 122.27, "eval_samples_per_second": 3.803, "eval_steps_per_second": 0.957, "step": 10230 }, { "epoch": 22.15, "learning_rate": 1.3082437275985666e-06, "loss": 1.9805, "step": 10300 }, { "epoch": 22.37, "learning_rate": 1.2724014336917565e-06, "loss": 2.0016, "step": 10400 }, { "epoch": 22.58, "learning_rate": 1.2365591397849463e-06, "loss": 2.0184, "step": 10500 }, { "epoch": 22.8, "learning_rate": 1.2007168458781362e-06, "loss": 2.0238, "step": 10600 }, { "epoch": 23.0, "eval_loss": 1.9541218280792236, "eval_rouge1": 0.37477500267736275, "eval_rouge2": 0.21863545453296632, "eval_rougeL": 0.3354338958099685, "eval_rougeLsum": 0.3414596818505722, "eval_runtime": 120.9729, "eval_samples_per_second": 3.844, "eval_steps_per_second": 0.967, "step": 10695 }, { "epoch": 23.01, "learning_rate": 1.1648745519713263e-06, "loss": 2.0737, "step": 10700 }, { "epoch": 23.23, "learning_rate": 1.1290322580645162e-06, "loss": 2.013, "step": 10800 }, { "epoch": 23.44, "learning_rate": 1.0931899641577063e-06, "loss": 1.9844, "step": 10900 }, { "epoch": 23.66, "learning_rate": 1.0573476702508962e-06, "loss": 2.014, "step": 11000 }, { "epoch": 23.87, "learning_rate": 1.021505376344086e-06, "loss": 2.0421, "step": 11100 }, { "epoch": 24.0, "eval_loss": 1.9520260095596313, "eval_rouge1": 0.3771021997183883, "eval_rouge2": 0.21990144851690735, "eval_rougeL": 0.3374010954787996, "eval_rougeLsum": 0.3439051210524349, "eval_runtime": 119.9413, "eval_samples_per_second": 3.877, "eval_steps_per_second": 0.975, "step": 11160 }, { "epoch": 24.09, "learning_rate": 9.856630824372762e-07, "loss": 1.9993, "step": 11200 }, { "epoch": 24.3, "learning_rate": 9.498207885304659e-07, "loss": 2.0148, "step": 11300 }, { "epoch": 24.52, "learning_rate": 9.13978494623656e-07, "loss": 2.0094, "step": 11400 }, { "epoch": 24.73, "learning_rate": 8.781362007168459e-07, "loss": 2.0395, "step": 11500 }, { "epoch": 24.95, "learning_rate": 8.422939068100359e-07, "loss": 2.0001, "step": 11600 }, { "epoch": 25.0, "eval_loss": 1.9496192932128906, "eval_rouge1": 0.3773377632907566, "eval_rouge2": 0.2204940276436899, "eval_rougeL": 0.33747034227999706, "eval_rougeLsum": 0.34393368298115334, "eval_runtime": 120.2791, "eval_samples_per_second": 3.866, "eval_steps_per_second": 0.973, "step": 11625 }, { "epoch": 25.16, "learning_rate": 8.064516129032258e-07, "loss": 2.0302, "step": 11700 }, { "epoch": 25.38, "learning_rate": 7.706093189964159e-07, "loss": 2.045, "step": 11800 }, { "epoch": 25.59, "learning_rate": 7.347670250896058e-07, "loss": 1.9614, "step": 11900 }, { "epoch": 25.81, "learning_rate": 6.989247311827957e-07, "loss": 1.93, "step": 12000 }, { "epoch": 26.0, "eval_loss": 1.9484126567840576, "eval_rouge1": 0.37597520520383987, "eval_rouge2": 0.21986201392787488, "eval_rougeL": 0.3357415231000408, "eval_rougeLsum": 0.34205147158317384, "eval_runtime": 121.7783, "eval_samples_per_second": 3.818, "eval_steps_per_second": 0.961, "step": 12090 }, { "epoch": 26.02, "learning_rate": 6.630824372759858e-07, "loss": 2.0524, "step": 12100 }, { "epoch": 26.24, "learning_rate": 6.272401433691756e-07, "loss": 1.9644, "step": 12200 }, { "epoch": 26.45, "learning_rate": 5.913978494623656e-07, "loss": 2.0117, "step": 12300 }, { "epoch": 26.67, "learning_rate": 5.555555555555555e-07, "loss": 1.9847, "step": 12400 }, { "epoch": 26.88, "learning_rate": 5.197132616487455e-07, "loss": 2.0453, "step": 12500 }, { "epoch": 27.0, "eval_loss": 1.9469475746154785, "eval_rouge1": 0.3740165446355651, "eval_rouge2": 0.21772737050061053, "eval_rougeL": 0.333510117519976, "eval_rougeLsum": 0.34028741836853005, "eval_runtime": 119.9633, "eval_samples_per_second": 3.876, "eval_steps_per_second": 0.975, "step": 12555 }, { "epoch": 27.1, "learning_rate": 4.838709677419355e-07, "loss": 2.0035, "step": 12600 }, { "epoch": 27.31, "learning_rate": 4.480286738351255e-07, "loss": 1.9638, "step": 12700 }, { "epoch": 27.53, "learning_rate": 4.1218637992831543e-07, "loss": 1.9471, "step": 12800 }, { "epoch": 27.74, "learning_rate": 3.763440860215054e-07, "loss": 2.0266, "step": 12900 }, { "epoch": 27.96, "learning_rate": 3.405017921146954e-07, "loss": 1.9894, "step": 13000 }, { "epoch": 28.0, "eval_loss": 1.9458811283111572, "eval_rouge1": 0.3742795595221444, "eval_rouge2": 0.21805194369064132, "eval_rougeL": 0.3337992892424988, "eval_rougeLsum": 0.34075989492689374, "eval_runtime": 125.2596, "eval_samples_per_second": 3.712, "eval_steps_per_second": 0.934, "step": 13020 }, { "epoch": 28.17, "learning_rate": 3.0465949820788535e-07, "loss": 1.9882, "step": 13100 }, { "epoch": 28.39, "learning_rate": 2.688172043010753e-07, "loss": 2.0092, "step": 13200 }, { "epoch": 28.6, "learning_rate": 2.3297491039426527e-07, "loss": 2.0301, "step": 13300 }, { "epoch": 28.82, "learning_rate": 1.971326164874552e-07, "loss": 2.0282, "step": 13400 }, { "epoch": 29.0, "eval_loss": 1.9454594850540161, "eval_rouge1": 0.3743638040177606, "eval_rouge2": 0.21796734115768185, "eval_rougeL": 0.33380358105146785, "eval_rougeLsum": 0.3407996691800308, "eval_runtime": 123.622, "eval_samples_per_second": 3.761, "eval_steps_per_second": 0.946, "step": 13485 }, { "epoch": 29.03, "learning_rate": 1.6129032258064518e-07, "loss": 1.9566, "step": 13500 }, { "epoch": 29.25, "learning_rate": 1.2544802867383514e-07, "loss": 1.9519, "step": 13600 }, { "epoch": 29.46, "learning_rate": 8.960573476702509e-08, "loss": 1.9502, "step": 13700 }, { "epoch": 29.68, "learning_rate": 5.376344086021506e-08, "loss": 2.0183, "step": 13800 } ], "max_steps": 13950, "num_train_epochs": 30, "total_flos": 2196944996493312.0, "trial_name": null, "trial_params": null }