| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.0, | |
| "eval_steps": 100, | |
| "global_step": 4500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5.555555555555555e-05, | |
| "loss": 41.2711, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0001111111111111111, | |
| "loss": 36.9402, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 30.5965, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002222222222222222, | |
| "loss": 25.168, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 20.2342, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 15.0822, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0003888888888888889, | |
| "loss": 10.8182, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004444444444444444, | |
| "loss": 7.5259, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0005, | |
| "loss": 5.6658, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004989816700610998, | |
| "loss": 4.1956, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 3.1490368843078613, | |
| "eval_rouge1": 0.15401031761230216, | |
| "eval_rouge2": 0.06183084855722348, | |
| "eval_rougeL": 0.1487638574423534, | |
| "eval_rougeLsum": 0.1488931379114581, | |
| "eval_runtime": 12.0897, | |
| "eval_samples_per_second": 1.654, | |
| "eval_steps_per_second": 0.827, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004979633401221996, | |
| "loss": 3.6249, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004969450101832995, | |
| "loss": 3.271, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004959266802443992, | |
| "loss": 3.1977, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.000494908350305499, | |
| "loss": 2.8583, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004938900203665988, | |
| "loss": 2.5156, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0004928716904276986, | |
| "loss": 2.1851, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0004918533604887983, | |
| "loss": 1.9845, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0004908350305498982, | |
| "loss": 1.8303, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0004898167006109979, | |
| "loss": 1.7903, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0004887983706720978, | |
| "loss": 1.7158, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 1.259326696395874, | |
| "eval_rouge1": 0.3038539506809194, | |
| "eval_rouge2": 0.1446061100175765, | |
| "eval_rougeL": 0.2647138754144953, | |
| "eval_rougeLsum": 0.26637210414553936, | |
| "eval_runtime": 18.5868, | |
| "eval_samples_per_second": 1.076, | |
| "eval_steps_per_second": 0.538, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0004877800407331975, | |
| "loss": 1.7002, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0004867617107942974, | |
| "loss": 1.6374, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00048574338085539715, | |
| "loss": 1.7013, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0004847250509164969, | |
| "loss": 1.624, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004837067209775968, | |
| "loss": 1.6216, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00048268839103869654, | |
| "loss": 1.5784, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00048167006109979635, | |
| "loss": 1.5394, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00048065173116089617, | |
| "loss": 1.5302, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00047963340122199593, | |
| "loss": 1.5071, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00047861507128309574, | |
| "loss": 1.526, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 1.1715915203094482, | |
| "eval_rouge1": 0.29557394630545675, | |
| "eval_rouge2": 0.1360179746171492, | |
| "eval_rougeL": 0.25515731019715504, | |
| "eval_rougeLsum": 0.25695835000784906, | |
| "eval_runtime": 27.3621, | |
| "eval_samples_per_second": 0.731, | |
| "eval_steps_per_second": 0.365, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004775967413441955, | |
| "loss": 1.4737, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0004765784114052953, | |
| "loss": 1.4955, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00047556008146639513, | |
| "loss": 1.4392, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0004745417515274949, | |
| "loss": 1.4535, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00047352342158859476, | |
| "loss": 1.4625, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0004725050916496945, | |
| "loss": 1.4353, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0004714867617107943, | |
| "loss": 1.4088, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0004704684317718941, | |
| "loss": 1.4472, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0004694501018329939, | |
| "loss": 1.4318, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0004684317718940937, | |
| "loss": 1.4318, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 1.1233234405517578, | |
| "eval_rouge1": 0.33840282066392835, | |
| "eval_rouge2": 0.16041046986254748, | |
| "eval_rougeL": 0.2758850020825072, | |
| "eval_rougeLsum": 0.27526491091998107, | |
| "eval_runtime": 25.6589, | |
| "eval_samples_per_second": 0.779, | |
| "eval_steps_per_second": 0.39, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0004674134419551935, | |
| "loss": 1.3872, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004663951120162933, | |
| "loss": 1.4539, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00046537678207739307, | |
| "loss": 1.4651, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004643584521384929, | |
| "loss": 1.3875, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00046334012219959264, | |
| "loss": 1.4463, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0004623217922606925, | |
| "loss": 1.4106, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00046130346232179227, | |
| "loss": 1.3987, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.00046028513238289203, | |
| "loss": 1.4239, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0004592668024439919, | |
| "loss": 1.4184, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00045824847250509166, | |
| "loss": 1.438, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.0997542142868042, | |
| "eval_rouge1": 0.31808868620058295, | |
| "eval_rouge2": 0.15095978898822576, | |
| "eval_rougeL": 0.2758976305910347, | |
| "eval_rougeLsum": 0.2783141924283456, | |
| "eval_runtime": 24.4917, | |
| "eval_samples_per_second": 0.817, | |
| "eval_steps_per_second": 0.408, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0004572301425661914, | |
| "loss": 1.3892, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0004562118126272913, | |
| "loss": 1.3238, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00045519348268839105, | |
| "loss": 1.3365, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0004541751527494908, | |
| "loss": 1.3769, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0004531568228105906, | |
| "loss": 1.337, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00045213849287169044, | |
| "loss": 1.3606, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00045112016293279026, | |
| "loss": 1.3643, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00045010183299389, | |
| "loss": 1.297, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00044908350305498983, | |
| "loss": 1.3871, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00044806517311608965, | |
| "loss": 1.3163, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 1.0784223079681396, | |
| "eval_rouge1": 0.3334137177907346, | |
| "eval_rouge2": 0.15261190778459077, | |
| "eval_rougeL": 0.2719649985784092, | |
| "eval_rougeLsum": 0.27272497195004963, | |
| "eval_runtime": 21.6088, | |
| "eval_samples_per_second": 0.926, | |
| "eval_steps_per_second": 0.463, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.0004470468431771894, | |
| "loss": 1.3417, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.00044602851323828917, | |
| "loss": 1.3073, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00044501018329938904, | |
| "loss": 1.3036, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 0.0004439918533604888, | |
| "loss": 1.3535, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.0004429735234215886, | |
| "loss": 1.334, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.00044195519348268843, | |
| "loss": 1.372, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0004409368635437882, | |
| "loss": 1.3312, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 0.000439918533604888, | |
| "loss": 1.3126, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 0.00043890020366598776, | |
| "loss": 1.3103, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.0004378818737270876, | |
| "loss": 1.3625, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 1.0480375289916992, | |
| "eval_rouge1": 0.3252587017685009, | |
| "eval_rouge2": 0.14695387868011492, | |
| "eval_rougeL": 0.2819448898014987, | |
| "eval_rougeLsum": 0.2825983453315859, | |
| "eval_runtime": 21.3066, | |
| "eval_samples_per_second": 0.939, | |
| "eval_steps_per_second": 0.469, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.0004368635437881874, | |
| "loss": 1.3607, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 0.00043584521384928715, | |
| "loss": 1.3053, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.00043482688391038697, | |
| "loss": 1.306, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.0004338085539714868, | |
| "loss": 1.3133, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00043279022403258654, | |
| "loss": 1.3261, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 0.0004317718940936864, | |
| "loss": 1.2707, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0004307535641547862, | |
| "loss": 1.305, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 0.00042973523421588593, | |
| "loss": 1.3298, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 0.00042871690427698575, | |
| "loss": 1.295, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.00042769857433808556, | |
| "loss": 1.3147, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 1.0463201999664307, | |
| "eval_rouge1": 0.3472651013247176, | |
| "eval_rouge2": 0.16642476533004819, | |
| "eval_rougeL": 0.2866317550606935, | |
| "eval_rougeLsum": 0.2880956525742435, | |
| "eval_runtime": 23.4232, | |
| "eval_samples_per_second": 0.854, | |
| "eval_steps_per_second": 0.427, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 0.0004266802443991853, | |
| "loss": 1.258, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 0.00042566191446028514, | |
| "loss": 1.3465, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00042464358452138495, | |
| "loss": 1.3383, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.0004236252545824847, | |
| "loss": 1.2732, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 0.00042260692464358453, | |
| "loss": 1.2712, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.0004215885947046843, | |
| "loss": 1.2621, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 0.00042057026476578416, | |
| "loss": 1.2981, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0004195519348268839, | |
| "loss": 1.3079, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.0004185336048879837, | |
| "loss": 1.3029, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.00041751527494908355, | |
| "loss": 1.2826, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 1.0260637998580933, | |
| "eval_rouge1": 0.3271562809197811, | |
| "eval_rouge2": 0.14988747443733796, | |
| "eval_rougeL": 0.27055113042556145, | |
| "eval_rougeLsum": 0.27258271275088874, | |
| "eval_runtime": 21.6038, | |
| "eval_samples_per_second": 0.926, | |
| "eval_steps_per_second": 0.463, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 0.0004164969450101833, | |
| "loss": 1.296, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00041547861507128307, | |
| "loss": 1.2451, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0004144602851323829, | |
| "loss": 1.2716, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.0004134419551934827, | |
| "loss": 1.2908, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.0004124236252545825, | |
| "loss": 1.2433, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0004114052953156823, | |
| "loss": 1.2606, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0004103869653767821, | |
| "loss": 1.2975, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0004093686354378819, | |
| "loss": 1.245, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00040835030549898167, | |
| "loss": 1.2774, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0004073319755600815, | |
| "loss": 1.297, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.02230966091156, | |
| "eval_rouge1": 0.33765263015500535, | |
| "eval_rouge2": 0.16173841205100162, | |
| "eval_rougeL": 0.2891261055238028, | |
| "eval_rougeLsum": 0.2913636739603367, | |
| "eval_runtime": 21.7283, | |
| "eval_samples_per_second": 0.92, | |
| "eval_steps_per_second": 0.46, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.0004063136456211813, | |
| "loss": 1.2437, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.00040529531568228106, | |
| "loss": 1.228, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.0004042769857433808, | |
| "loss": 1.1951, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.0004032586558044807, | |
| "loss": 1.205, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 0.00040224032586558045, | |
| "loss": 1.2154, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00040122199592668026, | |
| "loss": 1.2449, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.0004002036659877801, | |
| "loss": 1.2331, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00039918533604887984, | |
| "loss": 1.1912, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 0.00039816700610997965, | |
| "loss": 1.2084, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.0003971486761710794, | |
| "loss": 1.2038, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 1.014428973197937, | |
| "eval_rouge1": 0.32994381772002457, | |
| "eval_rouge2": 0.1560313842830252, | |
| "eval_rougeL": 0.2757795040195402, | |
| "eval_rougeLsum": 0.27717385084380153, | |
| "eval_runtime": 21.4325, | |
| "eval_samples_per_second": 0.933, | |
| "eval_steps_per_second": 0.467, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00039613034623217923, | |
| "loss": 1.281, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.00039511201629327904, | |
| "loss": 1.1934, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.0003940936863543788, | |
| "loss": 1.2564, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.0003930753564154787, | |
| "loss": 1.2175, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.00039205702647657843, | |
| "loss": 1.2252, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.0003910386965376782, | |
| "loss": 1.2343, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.000390020366598778, | |
| "loss": 1.2715, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 0.0003890020366598778, | |
| "loss": 1.2275, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.0003879837067209776, | |
| "loss": 1.2482, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.0003869653767820774, | |
| "loss": 1.2617, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 0.9945599436759949, | |
| "eval_rouge1": 0.34311982189768164, | |
| "eval_rouge2": 0.16176698655947228, | |
| "eval_rougeL": 0.2933063191954748, | |
| "eval_rougeLsum": 0.29522199653692416, | |
| "eval_runtime": 20.2751, | |
| "eval_samples_per_second": 0.986, | |
| "eval_steps_per_second": 0.493, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 0.0003859470468431772, | |
| "loss": 1.19, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 0.000384928716904277, | |
| "loss": 1.2462, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 0.0003839103869653768, | |
| "loss": 1.2387, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.00038289205702647655, | |
| "loss": 1.2516, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 0.0003818737270875764, | |
| "loss": 1.2543, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.0003808553971486762, | |
| "loss": 1.246, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 0.00037983706720977594, | |
| "loss": 1.2356, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 0.0003788187372708758, | |
| "loss": 1.2301, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 0.00037780040733197557, | |
| "loss": 1.2465, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 0.00037678207739307533, | |
| "loss": 1.2219, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.9984191656112671, | |
| "eval_rouge1": 0.35530591683468205, | |
| "eval_rouge2": 0.1738300131149214, | |
| "eval_rougeL": 0.3007508066988732, | |
| "eval_rougeLsum": 0.30111433503149587, | |
| "eval_runtime": 21.0854, | |
| "eval_samples_per_second": 0.949, | |
| "eval_steps_per_second": 0.474, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.0003757637474541752, | |
| "loss": 1.2142, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 0.00037474541751527496, | |
| "loss": 1.1964, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.0003737270875763747, | |
| "loss": 1.2053, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 0.00037270875763747454, | |
| "loss": 1.2118, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.00037169042769857435, | |
| "loss": 1.1915, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 0.00037067209775967417, | |
| "loss": 1.2272, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00036965376782077393, | |
| "loss": 1.1926, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 0.00036863543788187374, | |
| "loss": 1.2056, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00036761710794297356, | |
| "loss": 1.1868, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.0003665987780040733, | |
| "loss": 1.1906, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 0.9987648129463196, | |
| "eval_rouge1": 0.3497044345695397, | |
| "eval_rouge2": 0.17488344746541123, | |
| "eval_rougeL": 0.2920352923144545, | |
| "eval_rougeLsum": 0.2936840461100848, | |
| "eval_runtime": 20.5611, | |
| "eval_samples_per_second": 0.973, | |
| "eval_steps_per_second": 0.486, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0003655804480651731, | |
| "loss": 1.1981, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.00036456211812627295, | |
| "loss": 1.2079, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.0003635437881873727, | |
| "loss": 1.2036, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 0.00036252545824847247, | |
| "loss": 1.1892, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00036150712830957234, | |
| "loss": 1.2223, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 0.0003604887983706721, | |
| "loss": 1.2214, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.0003594704684317719, | |
| "loss": 1.2206, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 0.0003584521384928717, | |
| "loss": 1.2218, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.0003574338085539715, | |
| "loss": 1.1856, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0003564154786150713, | |
| "loss": 1.2303, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.9856168031692505, | |
| "eval_rouge1": 0.3750110269055352, | |
| "eval_rouge2": 0.17459799489090816, | |
| "eval_rougeL": 0.3003592834064919, | |
| "eval_rougeLsum": 0.30119936143252757, | |
| "eval_runtime": 23.4082, | |
| "eval_samples_per_second": 0.854, | |
| "eval_steps_per_second": 0.427, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.00035539714867617106, | |
| "loss": 1.1498, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 0.0003543788187372709, | |
| "loss": 1.2083, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.0003533604887983707, | |
| "loss": 1.2291, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.00035234215885947045, | |
| "loss": 1.1733, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0003513238289205703, | |
| "loss": 1.1716, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.0003503054989816701, | |
| "loss": 1.1657, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00034928716904276985, | |
| "loss": 1.1538, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 0.00034826883910386966, | |
| "loss": 1.1579, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0003472505091649695, | |
| "loss": 1.1945, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 0.00034623217922606924, | |
| "loss": 1.18, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_loss": 0.9823005795478821, | |
| "eval_rouge1": 0.35334084985835634, | |
| "eval_rouge2": 0.1773564424695835, | |
| "eval_rougeL": 0.30516391982892244, | |
| "eval_rougeLsum": 0.30596841516938367, | |
| "eval_runtime": 22.0682, | |
| "eval_samples_per_second": 0.906, | |
| "eval_steps_per_second": 0.453, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.00034521384928716905, | |
| "loss": 1.1782, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 0.00034419551934826887, | |
| "loss": 1.2039, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.0003431771894093686, | |
| "loss": 1.1961, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 0.00034215885947046844, | |
| "loss": 1.1846, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 0.0003411405295315682, | |
| "loss": 1.1776, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 0.00034012219959266807, | |
| "loss": 1.1549, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 0.00033910386965376783, | |
| "loss": 1.1442, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.0003380855397148676, | |
| "loss": 1.1672, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 0.00033706720977596746, | |
| "loss": 1.1854, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 0.0003360488798370672, | |
| "loss": 1.1435, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_loss": 0.9792933464050293, | |
| "eval_rouge1": 0.3566145348467804, | |
| "eval_rouge2": 0.16981457897259283, | |
| "eval_rougeL": 0.29555110085672354, | |
| "eval_rougeLsum": 0.29638538572800865, | |
| "eval_runtime": 20.8074, | |
| "eval_samples_per_second": 0.961, | |
| "eval_steps_per_second": 0.481, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 0.000335030549898167, | |
| "loss": 1.1721, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 0.0003340122199592668, | |
| "loss": 1.1546, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 0.0003329938900203666, | |
| "loss": 1.1843, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 0.00033197556008146637, | |
| "loss": 1.1968, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 0.0003309572301425662, | |
| "loss": 1.1481, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 0.000329938900203666, | |
| "loss": 1.1473, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 0.0003289205702647658, | |
| "loss": 1.1729, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 0.0003279022403258656, | |
| "loss": 1.1401, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 0.0003268839103869654, | |
| "loss": 1.1602, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 0.0003258655804480652, | |
| "loss": 1.1473, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_loss": 0.9619871973991394, | |
| "eval_rouge1": 0.36668217495119926, | |
| "eval_rouge2": 0.18802901856822518, | |
| "eval_rougeL": 0.3009048329724593, | |
| "eval_rougeLsum": 0.3020958756940847, | |
| "eval_runtime": 20.3135, | |
| "eval_samples_per_second": 0.985, | |
| "eval_steps_per_second": 0.492, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 0.00032484725050916497, | |
| "loss": 1.1533, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 0.00032382892057026473, | |
| "loss": 1.1557, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.0003228105906313646, | |
| "loss": 1.2091, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 0.00032179226069246436, | |
| "loss": 1.1791, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 0.0003207739307535642, | |
| "loss": 1.1407, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.000319755600814664, | |
| "loss": 1.1498, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.00031873727087576375, | |
| "loss": 1.1368, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 0.00031771894093686356, | |
| "loss": 1.1634, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.0003167006109979633, | |
| "loss": 1.168, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.00031568228105906314, | |
| "loss": 1.1588, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "eval_loss": 0.9718366861343384, | |
| "eval_rouge1": 0.36821337929430387, | |
| "eval_rouge2": 0.17821834663511393, | |
| "eval_rougeL": 0.30543977513475806, | |
| "eval_rougeLsum": 0.3075353240284504, | |
| "eval_runtime": 20.7174, | |
| "eval_samples_per_second": 0.965, | |
| "eval_steps_per_second": 0.483, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 0.00031466395112016295, | |
| "loss": 1.1989, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.0003136456211812627, | |
| "loss": 1.1542, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 0.00031262729124236253, | |
| "loss": 1.203, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00031160896130346234, | |
| "loss": 1.1644, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 0.0003105906313645621, | |
| "loss": 1.1266, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.0003095723014256619, | |
| "loss": 1.1994, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.00030855397148676173, | |
| "loss": 1.1402, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 0.0003075356415478615, | |
| "loss": 1.1348, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0003065173116089613, | |
| "loss": 1.1582, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0003054989816700611, | |
| "loss": 1.127, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.9626486897468567, | |
| "eval_rouge1": 0.33933999922378055, | |
| "eval_rouge2": 0.15960640801652384, | |
| "eval_rougeL": 0.27841295690954404, | |
| "eval_rougeLsum": 0.2805778340104648, | |
| "eval_runtime": 20.5994, | |
| "eval_samples_per_second": 0.971, | |
| "eval_steps_per_second": 0.485, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 0.0003044806517311609, | |
| "loss": 1.1229, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.0003034623217922607, | |
| "loss": 1.1027, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.0003024439918533605, | |
| "loss": 1.0923, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.0003014256619144603, | |
| "loss": 1.1416, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 0.0003004073319755601, | |
| "loss": 1.1066, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.00029938900203665985, | |
| "loss": 1.1045, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 0.0002983706720977597, | |
| "loss": 1.1172, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0002973523421588595, | |
| "loss": 1.1535, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 0.00029633401221995924, | |
| "loss": 1.1866, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.0002953156822810591, | |
| "loss": 1.1251, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "eval_loss": 0.9701215028762817, | |
| "eval_rouge1": 0.36918849108015117, | |
| "eval_rouge2": 0.1756845524684395, | |
| "eval_rougeL": 0.29888162339098345, | |
| "eval_rougeLsum": 0.30162046588140573, | |
| "eval_runtime": 24.6363, | |
| "eval_samples_per_second": 0.812, | |
| "eval_steps_per_second": 0.406, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.00029429735234215887, | |
| "loss": 1.1246, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 0.00029327902240325863, | |
| "loss": 1.1562, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 0.00029226069246435845, | |
| "loss": 1.1627, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.00029124236252545826, | |
| "loss": 1.1157, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.0002902240325865581, | |
| "loss": 1.1398, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 0.00028920570264765784, | |
| "loss": 1.1432, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.00028818737270875765, | |
| "loss": 1.1318, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 0.00028716904276985747, | |
| "loss": 1.1461, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 0.00028615071283095723, | |
| "loss": 1.13, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 0.000285132382892057, | |
| "loss": 1.085, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_loss": 0.9604999423027039, | |
| "eval_rouge1": 0.3531685802104979, | |
| "eval_rouge2": 0.16500844586291133, | |
| "eval_rougeL": 0.28475165873194214, | |
| "eval_rougeLsum": 0.2861591101839957, | |
| "eval_runtime": 20.5667, | |
| "eval_samples_per_second": 0.972, | |
| "eval_steps_per_second": 0.486, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 0.00028411405295315686, | |
| "loss": 1.1408, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 0.0002830957230142566, | |
| "loss": 1.1458, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 0.0002820773930753564, | |
| "loss": 1.1056, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 0.00028105906313645625, | |
| "loss": 1.1933, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 0.000280040733197556, | |
| "loss": 1.1478, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 0.0002790224032586558, | |
| "loss": 1.0862, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 0.0002780040733197556, | |
| "loss": 1.1433, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 0.0002769857433808554, | |
| "loss": 1.124, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.0002759674134419552, | |
| "loss": 1.095, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 0.000274949083503055, | |
| "loss": 1.0922, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "eval_loss": 0.9633736610412598, | |
| "eval_rouge1": 0.35626224117845484, | |
| "eval_rouge2": 0.16997535406808995, | |
| "eval_rougeL": 0.2941903529387039, | |
| "eval_rougeLsum": 0.2963873855076279, | |
| "eval_runtime": 22.0443, | |
| "eval_samples_per_second": 0.907, | |
| "eval_steps_per_second": 0.454, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 0.0002739307535641548, | |
| "loss": 1.1468, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 0.0002729124236252546, | |
| "loss": 1.1456, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 0.00027189409368635437, | |
| "loss": 1.1317, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 0.0002708757637474542, | |
| "loss": 1.1078, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 0.000269857433808554, | |
| "loss": 1.143, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 0.00026883910386965376, | |
| "loss": 1.1194, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 0.00026782077393075357, | |
| "loss": 1.1122, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 0.0002668024439918534, | |
| "loss": 1.1275, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.00026578411405295315, | |
| "loss": 1.1305, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 0.00026476578411405296, | |
| "loss": 1.1649, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_loss": 0.9545726776123047, | |
| "eval_rouge1": 0.38101044159436365, | |
| "eval_rouge2": 0.1957583700370854, | |
| "eval_rougeL": 0.3214174366996265, | |
| "eval_rougeLsum": 0.3228583149166323, | |
| "eval_runtime": 21.4511, | |
| "eval_samples_per_second": 0.932, | |
| "eval_steps_per_second": 0.466, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.0002637474541751528, | |
| "loss": 1.104, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 0.00026272912423625254, | |
| "loss": 1.1482, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.00026171079429735235, | |
| "loss": 1.1361, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 0.0002606924643584521, | |
| "loss": 1.0924, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 0.000259674134419552, | |
| "loss": 1.1313, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 0.00025865580448065174, | |
| "loss": 1.0971, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.0002576374745417515, | |
| "loss": 1.1653, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 0.00025661914460285137, | |
| "loss": 1.1438, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 0.00025560081466395113, | |
| "loss": 1.139, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0002545824847250509, | |
| "loss": 1.1279, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.9487113952636719, | |
| "eval_rouge1": 0.3782997160632432, | |
| "eval_rouge2": 0.20289958909161465, | |
| "eval_rougeL": 0.3222664292441296, | |
| "eval_rougeLsum": 0.3240112049119128, | |
| "eval_runtime": 21.5846, | |
| "eval_samples_per_second": 0.927, | |
| "eval_steps_per_second": 0.463, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 0.0002535641547861507, | |
| "loss": 1.1028, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 0.0002525458248472505, | |
| "loss": 1.0659, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.0002515274949083503, | |
| "loss": 1.115, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 0.0002505091649694501, | |
| "loss": 1.0952, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 0.0002494908350305499, | |
| "loss": 1.1092, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 0.00024847250509164973, | |
| "loss": 1.1194, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.0002474541751527495, | |
| "loss": 1.11, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 0.0002464358452138493, | |
| "loss": 1.1524, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 0.0002454175152749491, | |
| "loss": 1.1122, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 0.0002443991853360489, | |
| "loss": 1.0798, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_loss": 0.9421226382255554, | |
| "eval_rouge1": 0.3597269877576089, | |
| "eval_rouge2": 0.1775049607682675, | |
| "eval_rougeL": 0.2919995300536511, | |
| "eval_rougeLsum": 0.29538083346340016, | |
| "eval_runtime": 22.4063, | |
| "eval_samples_per_second": 0.893, | |
| "eval_steps_per_second": 0.446, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.0002433808553971487, | |
| "loss": 1.1141, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 0.00024236252545824845, | |
| "loss": 1.11, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 0.00024134419551934827, | |
| "loss": 1.1206, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 0.00024032586558044808, | |
| "loss": 1.1035, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 0.00023930753564154787, | |
| "loss": 1.1362, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 0.00023828920570264766, | |
| "loss": 1.1023, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 0.00023727087576374745, | |
| "loss": 1.1237, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 0.00023625254582484726, | |
| "loss": 1.0934, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 0.00023523421588594705, | |
| "loss": 1.0976, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "learning_rate": 0.00023421588594704684, | |
| "loss": 1.1468, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "eval_loss": 0.9474976658821106, | |
| "eval_rouge1": 0.37719594840753984, | |
| "eval_rouge2": 0.19434086595475156, | |
| "eval_rougeL": 0.312798697930013, | |
| "eval_rougeLsum": 0.3158141266137696, | |
| "eval_runtime": 21.5064, | |
| "eval_samples_per_second": 0.93, | |
| "eval_steps_per_second": 0.465, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.00023319755600814665, | |
| "loss": 1.1283, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 0.00023217922606924644, | |
| "loss": 1.105, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 0.00023116089613034625, | |
| "loss": 1.0968, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 0.00023014256619144602, | |
| "loss": 1.0931, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 0.00022912423625254583, | |
| "loss": 1.1181, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 0.00022810590631364565, | |
| "loss": 1.0492, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 0.0002270875763747454, | |
| "loss": 1.0888, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 0.00022606924643584522, | |
| "loss": 1.0944, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 0.000225050916496945, | |
| "loss": 1.0851, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 0.00022403258655804482, | |
| "loss": 1.073, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_loss": 0.9429187774658203, | |
| "eval_rouge1": 0.3534213889657237, | |
| "eval_rouge2": 0.1666029802662448, | |
| "eval_rougeL": 0.298122774116486, | |
| "eval_rougeLsum": 0.3017447168430196, | |
| "eval_runtime": 21.9067, | |
| "eval_samples_per_second": 0.913, | |
| "eval_steps_per_second": 0.456, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.00022301425661914458, | |
| "loss": 1.1145, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "learning_rate": 0.0002219959266802444, | |
| "loss": 1.081, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 0.00022097759674134421, | |
| "loss": 1.1057, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 0.000219959266802444, | |
| "loss": 1.1037, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 0.0002189409368635438, | |
| "loss": 1.0963, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 0.00021792260692464358, | |
| "loss": 1.0834, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 0.0002169042769857434, | |
| "loss": 1.1108, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 0.0002158859470468432, | |
| "loss": 1.1077, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 0.00021486761710794297, | |
| "loss": 1.0936, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 0.00021384928716904278, | |
| "loss": 1.0598, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "eval_loss": 0.9476393461227417, | |
| "eval_rouge1": 0.3460493427394816, | |
| "eval_rouge2": 0.16425447886287378, | |
| "eval_rougeL": 0.29176256122671795, | |
| "eval_rougeLsum": 0.2932874710130029, | |
| "eval_runtime": 22.7632, | |
| "eval_samples_per_second": 0.879, | |
| "eval_steps_per_second": 0.439, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 0.00021283095723014257, | |
| "loss": 1.1012, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.00021181262729124236, | |
| "loss": 1.1155, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 0.00021079429735234215, | |
| "loss": 1.0611, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.00020977596741344196, | |
| "loss": 1.1044, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 0.00020875763747454178, | |
| "loss": 1.0781, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.00020773930753564154, | |
| "loss": 1.118, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.00020672097759674135, | |
| "loss": 1.0767, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 0.00020570264765784114, | |
| "loss": 1.1183, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.00020468431771894095, | |
| "loss": 1.0675, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00020366598778004074, | |
| "loss": 1.0567, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.9474280476570129, | |
| "eval_rouge1": 0.3549405529957428, | |
| "eval_rouge2": 0.17220677580168336, | |
| "eval_rougeL": 0.29622719821503607, | |
| "eval_rougeLsum": 0.29765510347367236, | |
| "eval_runtime": 21.6142, | |
| "eval_samples_per_second": 0.925, | |
| "eval_steps_per_second": 0.463, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 0.00020264765784114053, | |
| "loss": 1.082, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 0.00020162932790224034, | |
| "loss": 1.0653, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.00020061099796334013, | |
| "loss": 1.0819, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 0.00019959266802443992, | |
| "loss": 1.0833, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 0.0001985743380855397, | |
| "loss": 1.0683, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.00019755600814663952, | |
| "loss": 1.0805, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 0.00019653767820773934, | |
| "loss": 1.0535, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.0001955193482688391, | |
| "loss": 1.0952, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.0001945010183299389, | |
| "loss": 1.0997, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 0.0001934826883910387, | |
| "loss": 1.0773, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "eval_loss": 0.9442319869995117, | |
| "eval_rouge1": 0.3697494602820765, | |
| "eval_rouge2": 0.17309826892569075, | |
| "eval_rougeL": 0.30189218077829605, | |
| "eval_rougeLsum": 0.3058880236996683, | |
| "eval_runtime": 22.8637, | |
| "eval_samples_per_second": 0.875, | |
| "eval_steps_per_second": 0.437, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 0.0001924643584521385, | |
| "loss": 1.0739, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 0.00019144602851323828, | |
| "loss": 1.0964, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 0.0001904276985743381, | |
| "loss": 1.0914, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.0001894093686354379, | |
| "loss": 1.0836, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00018839103869653767, | |
| "loss": 1.0812, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 0.00018737270875763748, | |
| "loss": 1.1139, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00018635437881873727, | |
| "loss": 1.0565, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.00018533604887983708, | |
| "loss": 1.1065, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 0.00018431771894093687, | |
| "loss": 1.0972, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 0.00018329938900203666, | |
| "loss": 1.0704, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_loss": 0.9415400624275208, | |
| "eval_rouge1": 0.3552015796577298, | |
| "eval_rouge2": 0.17624288378508868, | |
| "eval_rougeL": 0.29158581025602837, | |
| "eval_rougeLsum": 0.2947848268366889, | |
| "eval_runtime": 21.6045, | |
| "eval_samples_per_second": 0.926, | |
| "eval_steps_per_second": 0.463, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 0.00018228105906313647, | |
| "loss": 1.0911, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 0.00018126272912423623, | |
| "loss": 1.1143, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 0.00018024439918533605, | |
| "loss": 1.0511, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 0.00017922606924643584, | |
| "loss": 1.0629, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 0.00017820773930753565, | |
| "loss": 1.0291, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 0.00017718940936863544, | |
| "loss": 1.0673, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.00017617107942973523, | |
| "loss": 1.0804, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 0.00017515274949083504, | |
| "loss": 1.0569, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 0.00017413441955193483, | |
| "loss": 1.086, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 0.00017311608961303462, | |
| "loss": 1.0676, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "eval_loss": 0.9348514676094055, | |
| "eval_rouge1": 0.37110363769688715, | |
| "eval_rouge2": 0.18311514856569724, | |
| "eval_rougeL": 0.3001004216892586, | |
| "eval_rougeLsum": 0.30109761355013054, | |
| "eval_runtime": 22.0391, | |
| "eval_samples_per_second": 0.907, | |
| "eval_steps_per_second": 0.454, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 0.00017209775967413443, | |
| "loss": 1.0646, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 0.00017107942973523422, | |
| "loss": 1.1062, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 0.00017006109979633404, | |
| "loss": 1.0522, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 0.0001690427698574338, | |
| "loss": 1.0598, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 0.0001680244399185336, | |
| "loss": 1.0777, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 0.0001670061099796334, | |
| "loss": 1.0967, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 0.00016598778004073319, | |
| "loss": 1.0654, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 0.000164969450101833, | |
| "loss": 1.0776, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 0.0001639511201629328, | |
| "loss": 1.126, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 0.0001629327902240326, | |
| "loss": 1.1015, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_loss": 0.9340616464614868, | |
| "eval_rouge1": 0.3645773285826771, | |
| "eval_rouge2": 0.1797763488186414, | |
| "eval_rougeL": 0.3043672589098563, | |
| "eval_rougeLsum": 0.30620848729732686, | |
| "eval_runtime": 22.359, | |
| "eval_samples_per_second": 0.894, | |
| "eval_steps_per_second": 0.447, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 0.00016191446028513236, | |
| "loss": 1.0782, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 0.00016089613034623218, | |
| "loss": 1.0664, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.000159877800407332, | |
| "loss": 1.0867, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 0.00015885947046843178, | |
| "loss": 1.0813, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 0.00015784114052953157, | |
| "loss": 1.0541, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 0.00015682281059063136, | |
| "loss": 1.0479, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 0.00015580448065173117, | |
| "loss": 1.0574, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 0.00015478615071283096, | |
| "loss": 1.0775, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 0.00015376782077393075, | |
| "loss": 1.0567, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.00015274949083503056, | |
| "loss": 1.0895, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.9335128664970398, | |
| "eval_rouge1": 0.3564737046975759, | |
| "eval_rouge2": 0.17665902660761595, | |
| "eval_rougeL": 0.2936209010289881, | |
| "eval_rougeLsum": 0.29567036214117715, | |
| "eval_runtime": 21.7504, | |
| "eval_samples_per_second": 0.92, | |
| "eval_steps_per_second": 0.46, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 0.00015173116089613035, | |
| "loss": 1.019, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 0.00015071283095723014, | |
| "loss": 1.0701, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 0.00014969450101832993, | |
| "loss": 1.064, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 0.00014867617107942974, | |
| "loss": 1.0464, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 0.00014765784114052956, | |
| "loss": 1.0958, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 0.00014663951120162932, | |
| "loss": 1.0458, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 0.00014562118126272913, | |
| "loss": 1.0012, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 0.00014460285132382892, | |
| "loss": 1.0629, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 0.00014358452138492873, | |
| "loss": 1.049, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.0001425661914460285, | |
| "loss": 1.0839, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_loss": 0.9262797236442566, | |
| "eval_rouge1": 0.3608261849266158, | |
| "eval_rouge2": 0.16796265957036072, | |
| "eval_rougeL": 0.300587364099427, | |
| "eval_rougeLsum": 0.3018268665573097, | |
| "eval_runtime": 22.8495, | |
| "eval_samples_per_second": 0.875, | |
| "eval_steps_per_second": 0.438, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 0.0001415478615071283, | |
| "loss": 1.0231, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 0.00014052953156822812, | |
| "loss": 1.043, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 7.26, | |
| "learning_rate": 0.0001395112016293279, | |
| "loss": 1.0423, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 0.0001384928716904277, | |
| "loss": 1.1131, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 0.0001374745417515275, | |
| "loss": 1.0106, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 0.0001364562118126273, | |
| "loss": 1.0663, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 0.0001354378818737271, | |
| "loss": 1.0842, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 0.00013441955193482688, | |
| "loss": 1.0482, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 0.0001334012219959267, | |
| "loss": 1.1137, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 0.00013238289205702648, | |
| "loss": 1.0737, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "eval_loss": 0.9295714497566223, | |
| "eval_rouge1": 0.35713241736503193, | |
| "eval_rouge2": 0.18064337174705725, | |
| "eval_rougeL": 0.2977802249700461, | |
| "eval_rougeLsum": 0.2990767538657372, | |
| "eval_runtime": 21.6594, | |
| "eval_samples_per_second": 0.923, | |
| "eval_steps_per_second": 0.462, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 7.42, | |
| "learning_rate": 0.00013136456211812627, | |
| "loss": 1.0793, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 0.00013034623217922606, | |
| "loss": 1.0964, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 0.00012932790224032587, | |
| "loss": 1.0471, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 0.00012830957230142569, | |
| "loss": 1.0647, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 0.00012729124236252545, | |
| "loss": 1.095, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "learning_rate": 0.00012627291242362526, | |
| "loss": 1.0891, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 0.00012525458248472505, | |
| "loss": 1.0876, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 0.00012423625254582486, | |
| "loss": 1.0112, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 0.00012321792260692465, | |
| "loss": 1.0352, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 0.00012219959266802444, | |
| "loss": 1.0549, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_loss": 0.9341118931770325, | |
| "eval_rouge1": 0.35969569162284754, | |
| "eval_rouge2": 0.17023162775203884, | |
| "eval_rougeL": 0.28257423918684926, | |
| "eval_rougeLsum": 0.28412150462963986, | |
| "eval_runtime": 22.2811, | |
| "eval_samples_per_second": 0.898, | |
| "eval_steps_per_second": 0.449, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 0.00012118126272912423, | |
| "loss": 1.0636, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 0.00012016293279022404, | |
| "loss": 1.0719, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00011914460285132383, | |
| "loss": 1.0451, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 0.00011812627291242363, | |
| "loss": 1.0509, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.00011710794297352342, | |
| "loss": 1.0515, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 0.00011608961303462322, | |
| "loss": 1.0525, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 0.00011507128309572301, | |
| "loss": 1.0751, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 0.00011405295315682282, | |
| "loss": 1.059, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.00011303462321792261, | |
| "loss": 1.0352, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 0.00011201629327902241, | |
| "loss": 1.065, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "eval_loss": 0.9318963885307312, | |
| "eval_rouge1": 0.36719535582225143, | |
| "eval_rouge2": 0.18487793490577756, | |
| "eval_rougeL": 0.3063474590760422, | |
| "eval_rougeLsum": 0.3072518409290927, | |
| "eval_runtime": 22.5338, | |
| "eval_samples_per_second": 0.888, | |
| "eval_steps_per_second": 0.444, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 0.0001109979633401222, | |
| "loss": 1.0591, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 0.000109979633401222, | |
| "loss": 1.0553, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 0.00010896130346232179, | |
| "loss": 1.0766, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 0.0001079429735234216, | |
| "loss": 1.0434, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 0.00010692464358452139, | |
| "loss": 1.0453, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 0.00010590631364562118, | |
| "loss": 1.0754, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 0.00010488798370672098, | |
| "loss": 1.0754, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 0.00010386965376782077, | |
| "loss": 1.0743, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 0.00010285132382892057, | |
| "loss": 1.0792, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.00010183299389002037, | |
| "loss": 1.0676, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.9239376783370972, | |
| "eval_rouge1": 0.3708780006445588, | |
| "eval_rouge2": 0.18486714155230236, | |
| "eval_rougeL": 0.30909528682097154, | |
| "eval_rougeLsum": 0.31117436309534663, | |
| "eval_runtime": 22.2042, | |
| "eval_samples_per_second": 0.901, | |
| "eval_steps_per_second": 0.45, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 0.00010081466395112017, | |
| "loss": 1.0105, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 9.979633401221996e-05, | |
| "loss": 1.0505, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 9.877800407331976e-05, | |
| "loss": 1.0375, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.775967413441955e-05, | |
| "loss": 1.0465, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 9.674134419551935e-05, | |
| "loss": 1.0249, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 9.572301425661914e-05, | |
| "loss": 1.0806, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.470468431771895e-05, | |
| "loss": 1.0233, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 9.368635437881874e-05, | |
| "loss": 1.0605, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 9.266802443991854e-05, | |
| "loss": 1.0336, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "learning_rate": 9.164969450101833e-05, | |
| "loss": 1.0327, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "eval_loss": 0.9313555955886841, | |
| "eval_rouge1": 0.3612621102138621, | |
| "eval_rouge2": 0.17914853351838206, | |
| "eval_rougeL": 0.3058448839143484, | |
| "eval_rougeLsum": 0.30736619803392246, | |
| "eval_runtime": 21.8087, | |
| "eval_samples_per_second": 0.917, | |
| "eval_steps_per_second": 0.459, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 9.063136456211812e-05, | |
| "loss": 1.0526, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 8.961303462321792e-05, | |
| "loss": 1.0063, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 8.859470468431772e-05, | |
| "loss": 1.071, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 8.757637474541752e-05, | |
| "loss": 1.06, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 8.655804480651731e-05, | |
| "loss": 1.016, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 8.553971486761711e-05, | |
| "loss": 1.0764, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 8.45213849287169e-05, | |
| "loss": 1.0446, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 8.35030549898167e-05, | |
| "loss": 1.0479, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 8.24847250509165e-05, | |
| "loss": 1.0283, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 8.14663951120163e-05, | |
| "loss": 1.0532, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "eval_loss": 0.9309422373771667, | |
| "eval_rouge1": 0.3629265409740602, | |
| "eval_rouge2": 0.18256578670757062, | |
| "eval_rougeL": 0.30644935758402025, | |
| "eval_rougeLsum": 0.3074292385019225, | |
| "eval_runtime": 21.7991, | |
| "eval_samples_per_second": 0.917, | |
| "eval_steps_per_second": 0.459, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 8.044806517311609e-05, | |
| "loss": 1.0408, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 7.942973523421589e-05, | |
| "loss": 1.0554, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 7.841140529531568e-05, | |
| "loss": 1.086, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 7.739307535641548e-05, | |
| "loss": 1.0316, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 7.637474541751528e-05, | |
| "loss": 1.052, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 7.535641547861507e-05, | |
| "loss": 1.0419, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 7.433808553971487e-05, | |
| "loss": 1.0657, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.331975560081466e-05, | |
| "loss": 1.03, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 7.230142566191446e-05, | |
| "loss": 1.042, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 7.128309572301425e-05, | |
| "loss": 1.0649, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "eval_loss": 0.9269277453422546, | |
| "eval_rouge1": 0.3670666365189951, | |
| "eval_rouge2": 0.18356209763485815, | |
| "eval_rougeL": 0.3012090907355077, | |
| "eval_rougeLsum": 0.3039222282541081, | |
| "eval_runtime": 22.1694, | |
| "eval_samples_per_second": 0.902, | |
| "eval_steps_per_second": 0.451, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 7.026476578411406e-05, | |
| "loss": 1.0671, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 6.924643584521385e-05, | |
| "loss": 1.0237, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 6.822810590631365e-05, | |
| "loss": 1.0582, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "learning_rate": 6.720977596741344e-05, | |
| "loss": 1.0325, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 6.619144602851324e-05, | |
| "loss": 1.0474, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 6.517311608961303e-05, | |
| "loss": 1.0346, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 6.415478615071284e-05, | |
| "loss": 1.063, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "learning_rate": 6.313645621181263e-05, | |
| "loss": 1.0337, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 6.211812627291243e-05, | |
| "loss": 1.0531, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 6.109979633401222e-05, | |
| "loss": 1.073, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_loss": 0.9253420829772949, | |
| "eval_rouge1": 0.3653533114499009, | |
| "eval_rouge2": 0.17889561755188924, | |
| "eval_rougeL": 0.30070525024462247, | |
| "eval_rougeLsum": 0.3024312386336895, | |
| "eval_runtime": 21.8579, | |
| "eval_samples_per_second": 0.915, | |
| "eval_steps_per_second": 0.458, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "learning_rate": 6.008146639511202e-05, | |
| "loss": 1.0716, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 5.9063136456211815e-05, | |
| "loss": 1.0522, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "learning_rate": 5.804480651731161e-05, | |
| "loss": 1.0401, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 5.702647657841141e-05, | |
| "loss": 1.0178, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 5.6008146639511206e-05, | |
| "loss": 1.1043, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 5.4989816700611e-05, | |
| "loss": 1.0424, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.39714867617108e-05, | |
| "loss": 1.0599, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 5.295315682281059e-05, | |
| "loss": 1.0331, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "learning_rate": 5.1934826883910384e-05, | |
| "loss": 1.0467, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 5.0916496945010185e-05, | |
| "loss": 1.0491, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.9241307973861694, | |
| "eval_rouge1": 0.3664328537702999, | |
| "eval_rouge2": 0.18102691382663055, | |
| "eval_rougeL": 0.2998899427822319, | |
| "eval_rougeLsum": 0.301943630367904, | |
| "eval_runtime": 22.1092, | |
| "eval_samples_per_second": 0.905, | |
| "eval_steps_per_second": 0.452, | |
| "step": 4500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 3.0455982194688e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |