diff --git "a/checkpoint-7665/trainer_state.json" "b/checkpoint-7665/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/checkpoint-7665/trainer_state.json"
@@ -0,0 +1,5781 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 35.0,
+  "eval_steps": 500,
+  "global_step": 7665,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.045662100456621,
+      "grad_norm": 3.6685774326324463,
+      "learning_rate": 4.994129158512721e-05,
+      "loss": 3.1406,
+      "step": 10
+    },
+    {
+      "epoch": 0.091324200913242,
+      "grad_norm": 1.5006240606307983,
+      "learning_rate": 4.987606001304632e-05,
+      "loss": 1.6914,
+      "step": 20
+    },
+    {
+      "epoch": 0.136986301369863,
+      "grad_norm": 1.5749850273132324,
+      "learning_rate": 4.981082844096543e-05,
+      "loss": 1.0225,
+      "step": 30
+    },
+    {
+      "epoch": 0.182648401826484,
+      "grad_norm": 1.263222575187683,
+      "learning_rate": 4.974559686888454e-05,
+      "loss": 0.5903,
+      "step": 40
+    },
+    {
+      "epoch": 0.228310502283105,
+      "grad_norm": 1.1013864278793335,
+      "learning_rate": 4.9680365296803655e-05,
+      "loss": 0.3666,
+      "step": 50
+    },
+    {
+      "epoch": 0.273972602739726,
+      "grad_norm": 0.7766101956367493,
+      "learning_rate": 4.961513372472277e-05,
+      "loss": 0.2453,
+      "step": 60
+    },
+    {
+      "epoch": 0.319634703196347,
+      "grad_norm": 0.699148416519165,
+      "learning_rate": 4.954990215264188e-05,
+      "loss": 0.166,
+      "step": 70
+    },
+    {
+      "epoch": 0.365296803652968,
+      "grad_norm": 0.8860347270965576,
+      "learning_rate": 4.948467058056099e-05,
+      "loss": 0.1778,
+      "step": 80
+    },
+    {
+      "epoch": 0.410958904109589,
+      "grad_norm": 1.2396528720855713,
+      "learning_rate": 4.9419439008480104e-05,
+      "loss": 0.169,
+      "step": 90
+    },
+    {
+      "epoch": 0.45662100456621,
+      "grad_norm": 0.5089202523231506,
+      "learning_rate": 4.9354207436399216e-05,
+      "loss": 0.1386,
+      "step": 100
+    },
+    {
+      "epoch": 0.502283105022831,
+      "grad_norm": 4.915615081787109,
+      "learning_rate": 4.9288975864318335e-05,
+      "loss": 0.1191,
+      "step": 110
+    },
+    {
+      "epoch": 0.547945205479452,
+      "grad_norm": 0.44728991389274597,
+      "learning_rate": 4.922374429223745e-05,
+      "loss": 0.1137,
+      "step": 120
+    },
+    {
+      "epoch": 0.593607305936073,
+      "grad_norm": 0.48462724685668945,
+      "learning_rate": 4.915851272015656e-05,
+      "loss": 0.1289,
+      "step": 130
+    },
+    {
+      "epoch": 0.639269406392694,
+      "grad_norm": 0.428950697183609,
+      "learning_rate": 4.909328114807567e-05,
+      "loss": 0.1206,
+      "step": 140
+    },
+    {
+      "epoch": 0.684931506849315,
+      "grad_norm": 0.41929879784584045,
+      "learning_rate": 4.9028049575994784e-05,
+      "loss": 0.0686,
+      "step": 150
+    },
+    {
+      "epoch": 0.730593607305936,
+      "grad_norm": 0.4039075970649719,
+      "learning_rate": 4.8962818003913896e-05,
+      "loss": 0.0717,
+      "step": 160
+    },
+    {
+      "epoch": 0.776255707762557,
+      "grad_norm": 0.9115980863571167,
+      "learning_rate": 4.889758643183301e-05,
+      "loss": 0.1013,
+      "step": 170
+    },
+    {
+      "epoch": 0.821917808219178,
+      "grad_norm": 0.37252315878868103,
+      "learning_rate": 4.883235485975212e-05,
+      "loss": 0.0831,
+      "step": 180
+    },
+    {
+      "epoch": 0.867579908675799,
+      "grad_norm": 0.594161331653595,
+      "learning_rate": 4.876712328767123e-05,
+      "loss": 0.0694,
+      "step": 190
+    },
+    {
+      "epoch": 0.91324200913242,
+      "grad_norm": 0.3747114837169647,
+      "learning_rate": 4.8701891715590345e-05,
+      "loss": 0.09,
+      "step": 200
+    },
+    {
+      "epoch": 0.958904109589041,
+      "grad_norm": 0.3669376075267792,
+      "learning_rate": 4.8636660143509464e-05,
+      "loss": 0.0591,
+      "step": 210
+    },
+    {
+      "epoch": 1.0,
+      "eval_bertscore_f1": 0.8792882986807934,
+      "eval_bleu": 0.7712648078243911,
+      "eval_loss": 0.057190317660570145,
+      "eval_rougeL": 0.29946539136986206,
+      "eval_runtime": 99.9649,
+      "eval_samples_per_second": 15.035,
+      "eval_steps_per_second": 0.94,
+      "step": 219
+    },
+    {
+      "epoch": 1.004566210045662,
+      "grad_norm": 1.0320205688476562,
+      "learning_rate": 4.8571428571428576e-05,
+      "loss": 0.0721,
+      "step": 220
+    },
+    {
+      "epoch": 1.0502283105022832,
+      "grad_norm": 0.32598379254341125,
+      "learning_rate": 4.850619699934769e-05,
+      "loss": 0.0588,
+      "step": 230
+    },
+    {
+      "epoch": 1.095890410958904,
+      "grad_norm": 0.3275991678237915,
+      "learning_rate": 4.84409654272668e-05,
+      "loss": 0.0578,
+      "step": 240
+    },
+    {
+      "epoch": 1.1415525114155252,
+      "grad_norm": 0.32213452458381653,
+      "learning_rate": 4.837573385518591e-05,
+      "loss": 0.0804,
+      "step": 250
+    },
+    {
+      "epoch": 1.187214611872146,
+      "grad_norm": 0.22395305335521698,
+      "learning_rate": 4.8310502283105025e-05,
+      "loss": 0.0618,
+      "step": 260
+    },
+    {
+      "epoch": 1.2328767123287672,
+      "grad_norm": 0.2619684338569641,
+      "learning_rate": 4.824527071102414e-05,
+      "loss": 0.1243,
+      "step": 270
+    },
+    {
+      "epoch": 1.278538812785388,
+      "grad_norm": 0.4555642604827881,
+      "learning_rate": 4.818003913894325e-05,
+      "loss": 0.0678,
+      "step": 280
+    },
+    {
+      "epoch": 1.3242009132420092,
+      "grad_norm": 0.39453741908073425,
+      "learning_rate": 4.811480756686236e-05,
+      "loss": 0.0625,
+      "step": 290
+    },
+    {
+      "epoch": 1.36986301369863,
+      "grad_norm": 0.5136148929595947,
+      "learning_rate": 4.804957599478147e-05,
+      "loss": 0.0607,
+      "step": 300
+    },
+    {
+      "epoch": 1.4155251141552512,
+      "grad_norm": 0.2983837425708771,
+      "learning_rate": 4.798434442270059e-05,
+      "loss": 0.0582,
+      "step": 310
+    },
+    {
+      "epoch": 1.461187214611872,
+      "grad_norm": 0.21343404054641724,
+      "learning_rate": 4.7919112850619704e-05,
+      "loss": 0.0532,
+      "step": 320
+    },
+    {
+      "epoch": 1.5068493150684932,
+      "grad_norm": 0.25173795223236084,
+      "learning_rate": 4.7853881278538817e-05,
+      "loss": 0.0437,
+      "step": 330
+    },
+    {
+      "epoch": 1.5525114155251143,
+      "grad_norm": 0.631598949432373,
+      "learning_rate": 4.778864970645793e-05,
+      "loss": 0.1192,
+      "step": 340
+    },
+    {
+      "epoch": 1.5981735159817352,
+      "grad_norm": 0.2316834181547165,
+      "learning_rate": 4.772341813437704e-05,
+      "loss": 0.0566,
+      "step": 350
+    },
+    {
+      "epoch": 1.643835616438356,
+      "grad_norm": 0.15093661844730377,
+      "learning_rate": 4.765818656229615e-05,
+      "loss": 0.0772,
+      "step": 360
+    },
+    {
+      "epoch": 1.6894977168949772,
+      "grad_norm": 0.21192322671413422,
+      "learning_rate": 4.7592954990215265e-05,
+      "loss": 0.0812,
+      "step": 370
+    },
+    {
+      "epoch": 1.7351598173515983,
+      "grad_norm": 0.21022455394268036,
+      "learning_rate": 4.752772341813438e-05,
+      "loss": 0.0624,
+      "step": 380
+    },
+    {
+      "epoch": 1.7808219178082192,
+      "grad_norm": 0.2214512974023819,
+      "learning_rate": 4.746249184605349e-05,
+      "loss": 0.0714,
+      "step": 390
+    },
+    {
+      "epoch": 1.82648401826484,
+      "grad_norm": 0.5700469017028809,
+      "learning_rate": 4.73972602739726e-05,
+      "loss": 0.0503,
+      "step": 400
+    },
+    {
+      "epoch": 1.8721461187214612,
+      "grad_norm": 0.15681976079940796,
+      "learning_rate": 4.733202870189172e-05,
+      "loss": 0.0601,
+      "step": 410
+    },
+    {
+      "epoch": 1.9178082191780823,
+      "grad_norm": 0.5676538348197937,
+      "learning_rate": 4.726679712981083e-05,
+      "loss": 0.0539,
+      "step": 420
+    },
+    {
+      "epoch": 1.9634703196347032,
+      "grad_norm": 0.26402613520622253,
+      "learning_rate": 4.7201565557729945e-05,
+      "loss": 0.0459,
+      "step": 430
+    },
+    {
+      "epoch": 2.0,
+      "eval_bertscore_f1": 0.8805641725471,
+      "eval_bleu": 0.5660743945408303,
+      "eval_loss": 0.043494194746017456,
+      "eval_rougeL": 0.31573197270677456,
+      "eval_runtime": 86.9461,
+      "eval_samples_per_second": 17.287,
+      "eval_steps_per_second": 1.081,
+      "step": 438
+    },
+    {
+      "epoch": 2.009132420091324,
+      "grad_norm": 0.8411938548088074,
+      "learning_rate": 4.713633398564906e-05,
+      "loss": 0.097,
+      "step": 440
+    },
+    {
+      "epoch": 2.0547945205479454,
+      "grad_norm": 0.20442543923854828,
+      "learning_rate": 4.707110241356817e-05,
+      "loss": 0.0657,
+      "step": 450
+    },
+    {
+      "epoch": 2.1004566210045663,
+      "grad_norm": 0.20288924872875214,
+      "learning_rate": 4.700587084148728e-05,
+      "loss": 0.0548,
+      "step": 460
+    },
+    {
+      "epoch": 2.146118721461187,
+      "grad_norm": 0.3183929920196533,
+      "learning_rate": 4.6940639269406394e-05,
+      "loss": 0.0465,
+      "step": 470
+    },
+    {
+      "epoch": 2.191780821917808,
+      "grad_norm": 0.28004202246665955,
+      "learning_rate": 4.6875407697325506e-05,
+      "loss": 0.0622,
+      "step": 480
+    },
+    {
+      "epoch": 2.237442922374429,
+      "grad_norm": 0.33859482407569885,
+      "learning_rate": 4.681017612524462e-05,
+      "loss": 0.0521,
+      "step": 490
+    },
+    {
+      "epoch": 2.2831050228310503,
+      "grad_norm": 0.17048068344593048,
+      "learning_rate": 4.674494455316373e-05,
+      "loss": 0.0662,
+      "step": 500
+    },
+    {
+      "epoch": 2.328767123287671,
+      "grad_norm": 0.18371596932411194,
+      "learning_rate": 4.667971298108285e-05,
+      "loss": 0.0718,
+      "step": 510
+    },
+    {
+      "epoch": 2.374429223744292,
+      "grad_norm": 0.15471260249614716,
+      "learning_rate": 4.661448140900196e-05,
+      "loss": 0.0527,
+      "step": 520
+    },
+    {
+      "epoch": 2.4200913242009134,
+      "grad_norm": 0.25129422545433044,
+      "learning_rate": 4.6549249836921074e-05,
+      "loss": 0.0644,
+      "step": 530
+    },
+    {
+      "epoch": 2.4657534246575343,
+      "grad_norm": 0.1418524533510208,
+      "learning_rate": 4.6484018264840186e-05,
+      "loss": 0.0415,
+      "step": 540
+    },
+    {
+      "epoch": 2.5114155251141552,
+      "grad_norm": 0.14907291531562805,
+      "learning_rate": 4.64187866927593e-05,
+      "loss": 0.0369,
+      "step": 550
+    },
+    {
+      "epoch": 2.557077625570776,
+      "grad_norm": 0.2596917748451233,
+      "learning_rate": 4.635355512067841e-05,
+      "loss": 0.0492,
+      "step": 560
+    },
+    {
+      "epoch": 2.602739726027397,
+      "grad_norm": 0.4573097229003906,
+      "learning_rate": 4.628832354859752e-05,
+      "loss": 0.0542,
+      "step": 570
+    },
+    {
+      "epoch": 2.6484018264840183,
+      "grad_norm": 0.17177747189998627,
+      "learning_rate": 4.6223091976516634e-05,
+      "loss": 0.0492,
+      "step": 580
+    },
+    {
+      "epoch": 2.6940639269406392,
+      "grad_norm": 0.160361185669899,
+      "learning_rate": 4.6157860404435747e-05,
+      "loss": 0.0574,
+      "step": 590
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.14502376317977905,
+      "learning_rate": 4.609262883235486e-05,
+      "loss": 0.0487,
+      "step": 600
+    },
+    {
+      "epoch": 2.7853881278538815,
+      "grad_norm": 0.15075726807117462,
+      "learning_rate": 4.602739726027398e-05,
+      "loss": 0.0584,
+      "step": 610
+    },
+    {
+      "epoch": 2.8310502283105023,
+      "grad_norm": 0.29456523060798645,
+      "learning_rate": 4.596216568819309e-05,
+      "loss": 0.0661,
+      "step": 620
+    },
+    {
+      "epoch": 2.8767123287671232,
+      "grad_norm": 0.20278586447238922,
+      "learning_rate": 4.58969341161122e-05,
+      "loss": 0.0463,
+      "step": 630
+    },
+    {
+      "epoch": 2.922374429223744,
+      "grad_norm": 0.12769503891468048,
+      "learning_rate": 4.5831702544031314e-05,
+      "loss": 0.0496,
+      "step": 640
+    },
+    {
+      "epoch": 2.968036529680365,
+      "grad_norm": 0.2201366275548935,
+      "learning_rate": 4.5766470971950426e-05,
+      "loss": 0.0376,
+      "step": 650
+    },
+    {
+      "epoch": 3.0,
+      "eval_bertscore_f1": 0.8785978137693322,
+      "eval_bleu": 0.7393077025329909,
+      "eval_loss": 0.040518876165151596,
+      "eval_rougeL": 0.298014790853214,
+      "eval_runtime": 85.5219,
+      "eval_samples_per_second": 17.574,
+      "eval_steps_per_second": 1.099,
+      "step": 657
+    },
+    {
+      "epoch": 3.0136986301369864,
+      "grad_norm": 0.4501720666885376,
+      "learning_rate": 4.570123939986954e-05,
+      "loss": 0.0526,
+      "step": 660
+    },
+    {
+      "epoch": 3.0593607305936072,
+      "grad_norm": 0.18561075627803802,
+      "learning_rate": 4.563600782778865e-05,
+      "loss": 0.048,
+      "step": 670
+    },
+    {
+      "epoch": 3.105022831050228,
+      "grad_norm": 0.13908711075782776,
+      "learning_rate": 4.557077625570776e-05,
+      "loss": 0.0445,
+      "step": 680
+    },
+    {
+      "epoch": 3.1506849315068495,
+      "grad_norm": 0.1476353406906128,
+      "learning_rate": 4.5505544683626875e-05,
+      "loss": 0.0587,
+      "step": 690
+    },
+    {
+      "epoch": 3.1963470319634704,
+      "grad_norm": 0.19854065775871277,
+      "learning_rate": 4.544031311154599e-05,
+      "loss": 0.041,
+      "step": 700
+    },
+    {
+      "epoch": 3.2420091324200913,
+      "grad_norm": 0.17018119990825653,
+      "learning_rate": 4.5375081539465106e-05,
+      "loss": 0.0355,
+      "step": 710
+    },
+    {
+      "epoch": 3.287671232876712,
+      "grad_norm": 0.45481783151626587,
+      "learning_rate": 4.530984996738422e-05,
+      "loss": 0.0647,
+      "step": 720
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.13204658031463623,
+      "learning_rate": 4.524461839530333e-05,
+      "loss": 0.0533,
+      "step": 730
+    },
+    {
+      "epoch": 3.3789954337899544,
+      "grad_norm": 0.1771598607301712,
+      "learning_rate": 4.517938682322244e-05,
+      "loss": 0.0407,
+      "step": 740
+    },
+    {
+      "epoch": 3.4246575342465753,
+      "grad_norm": 0.32262006402015686,
+      "learning_rate": 4.5114155251141555e-05,
+      "loss": 0.0361,
+      "step": 750
+    },
+    {
+      "epoch": 3.470319634703196,
+      "grad_norm": 0.16268010437488556,
+      "learning_rate": 4.504892367906067e-05,
+      "loss": 0.0401,
+      "step": 760
+    },
+    {
+      "epoch": 3.5159817351598175,
+      "grad_norm": 0.15328466892242432,
+      "learning_rate": 4.498369210697978e-05,
+      "loss": 0.068,
+      "step": 770
+    },
+    {
+      "epoch": 3.5616438356164384,
+      "grad_norm": 0.11483100056648254,
+      "learning_rate": 4.491846053489889e-05,
+      "loss": 0.0405,
+      "step": 780
+    },
+    {
+      "epoch": 3.6073059360730593,
+      "grad_norm": 0.12601321935653687,
+      "learning_rate": 4.4853228962818004e-05,
+      "loss": 0.0552,
+      "step": 790
+    },
+    {
+      "epoch": 3.65296803652968,
+      "grad_norm": 0.141522616147995,
+      "learning_rate": 4.4787997390737116e-05,
+      "loss": 0.0542,
+      "step": 800
+    },
+    {
+      "epoch": 3.6986301369863015,
+      "grad_norm": 0.3403891324996948,
+      "learning_rate": 4.4722765818656235e-05,
+      "loss": 0.0495,
+      "step": 810
+    },
+    {
+      "epoch": 3.7442922374429224,
+      "grad_norm": 0.1596749722957611,
+      "learning_rate": 4.465753424657535e-05,
+      "loss": 0.0441,
+      "step": 820
+    },
+    {
+      "epoch": 3.7899543378995433,
+      "grad_norm": 0.3847573697566986,
+      "learning_rate": 4.459230267449446e-05,
+      "loss": 0.0542,
+      "step": 830
+    },
+    {
+      "epoch": 3.8356164383561646,
+      "grad_norm": 0.2511195242404938,
+      "learning_rate": 4.452707110241357e-05,
+      "loss": 0.0434,
+      "step": 840
+    },
+    {
+      "epoch": 3.8812785388127855,
+      "grad_norm": 0.13712382316589355,
+      "learning_rate": 4.446183953033268e-05,
+      "loss": 0.0347,
+      "step": 850
+    },
+    {
+      "epoch": 3.9269406392694064,
+      "grad_norm": 0.1607000082731247,
+      "learning_rate": 4.4396607958251795e-05,
+      "loss": 0.0461,
+      "step": 860
+    },
+    {
+      "epoch": 3.9726027397260273,
+      "grad_norm": 0.26414012908935547,
+      "learning_rate": 4.433137638617091e-05,
+      "loss": 0.0663,
+      "step": 870
+    },
+    {
+      "epoch": 4.0,
+      "eval_bertscore_f1": 0.8776752933294711,
+      "eval_bleu": 0.7660390563095643,
+      "eval_loss": 0.03776135668158531,
+      "eval_rougeL": 0.2900147272161592,
+      "eval_runtime": 85.7788,
+      "eval_samples_per_second": 17.522,
+      "eval_steps_per_second": 1.096,
+      "step": 876
+    },
+    {
+      "epoch": 4.018264840182648,
+      "grad_norm": 0.39637818932533264,
+      "learning_rate": 4.426614481409002e-05,
+      "loss": 0.0413,
+      "step": 880
+    },
+    {
+      "epoch": 4.063926940639269,
+      "grad_norm": 0.1926361322402954,
+      "learning_rate": 4.420091324200913e-05,
+      "loss": 0.0653,
+      "step": 890
+    },
+    {
+      "epoch": 4.109589041095891,
+      "grad_norm": 0.18935950100421906,
+      "learning_rate": 4.4135681669928244e-05,
+      "loss": 0.0435,
+      "step": 900
+    },
+    {
+      "epoch": 4.155251141552512,
+      "grad_norm": 0.17452946305274963,
+      "learning_rate": 4.407045009784736e-05,
+      "loss": 0.0441,
+      "step": 910
+    },
+    {
+      "epoch": 4.200913242009133,
+      "grad_norm": 0.13483966886997223,
+      "learning_rate": 4.4005218525766475e-05,
+      "loss": 0.047,
+      "step": 920
+    },
+    {
+      "epoch": 4.2465753424657535,
+      "grad_norm": 0.12774349749088287,
+      "learning_rate": 4.393998695368559e-05,
+      "loss": 0.0438,
+      "step": 930
+    },
+    {
+      "epoch": 4.292237442922374,
+      "grad_norm": 0.3918684720993042,
+      "learning_rate": 4.38747553816047e-05,
+      "loss": 0.0592,
+      "step": 940
+    },
+    {
+      "epoch": 4.337899543378995,
+      "grad_norm": 0.12250568717718124,
+      "learning_rate": 4.380952380952381e-05,
+      "loss": 0.0375,
+      "step": 950
+    },
+    {
+      "epoch": 4.383561643835616,
+      "grad_norm": 0.10988382250070572,
+      "learning_rate": 4.3744292237442924e-05,
+      "loss": 0.0393,
+      "step": 960
+    },
+    {
+      "epoch": 4.429223744292237,
+      "grad_norm": 0.2036619633436203,
+      "learning_rate": 4.3679060665362036e-05,
+      "loss": 0.0426,
+      "step": 970
+    },
+    {
+      "epoch": 4.474885844748858,
+      "grad_norm": 0.1382388472557068,
+      "learning_rate": 4.361382909328115e-05,
+      "loss": 0.0405,
+      "step": 980
+    },
+    {
+      "epoch": 4.52054794520548,
+      "grad_norm": 0.18227025866508484,
+      "learning_rate": 4.354859752120026e-05,
+      "loss": 0.057,
+      "step": 990
+    },
+    {
+      "epoch": 4.566210045662101,
+      "grad_norm": 0.11121729016304016,
+      "learning_rate": 4.348336594911937e-05,
+      "loss": 0.0462,
+      "step": 1000
+    },
+    {
+      "epoch": 4.6118721461187215,
+      "grad_norm": 0.10684996098279953,
+      "learning_rate": 4.341813437703849e-05,
+      "loss": 0.0491,
+      "step": 1010
+    },
+    {
+      "epoch": 4.657534246575342,
+      "grad_norm": 0.20612068474292755,
+      "learning_rate": 4.3352902804957604e-05,
+      "loss": 0.0444,
+      "step": 1020
+    },
+    {
+      "epoch": 4.703196347031963,
+      "grad_norm": 0.18251581490039825,
+      "learning_rate": 4.3287671232876716e-05,
+      "loss": 0.0406,
+      "step": 1030
+    },
+    {
+      "epoch": 4.748858447488584,
+      "grad_norm": 0.12324585765600204,
+      "learning_rate": 4.322243966079583e-05,
+      "loss": 0.0622,
+      "step": 1040
+    },
+    {
+      "epoch": 4.794520547945205,
+      "grad_norm": 0.24980604648590088,
+      "learning_rate": 4.315720808871494e-05,
+      "loss": 0.0369,
+      "step": 1050
+    },
+    {
+      "epoch": 4.840182648401827,
+      "grad_norm": 0.11414594203233719,
+      "learning_rate": 4.309197651663405e-05,
+      "loss": 0.0337,
+      "step": 1060
+    },
+    {
+      "epoch": 4.885844748858448,
+      "grad_norm": 0.09755056351423264,
+      "learning_rate": 4.3026744944553165e-05,
+      "loss": 0.0375,
+      "step": 1070
+    },
+    {
+      "epoch": 4.931506849315069,
+      "grad_norm": 0.17053422331809998,
+      "learning_rate": 4.296151337247228e-05,
+      "loss": 0.037,
+      "step": 1080
+    },
+    {
+      "epoch": 4.9771689497716896,
+      "grad_norm": 0.1424807757139206,
+      "learning_rate": 4.289628180039139e-05,
+      "loss": 0.0335,
+      "step": 1090
+    },
+    {
+      "epoch": 5.0,
+      "eval_bertscore_f1": 0.8830310614285117,
+      "eval_bleu": 0.6535866956857873,
+      "eval_loss": 0.036899685859680176,
+      "eval_rougeL": 0.315426501376588,
+      "eval_runtime": 86.4385,
+      "eval_samples_per_second": 17.388,
+      "eval_steps_per_second": 1.087,
+      "step": 1095
+    },
+    {
+      "epoch": 5.0228310502283104,
+      "grad_norm": 0.16293296217918396,
+      "learning_rate": 4.28310502283105e-05,
+      "loss": 0.0436,
+      "step": 1100
+    },
+    {
+      "epoch": 5.068493150684931,
+      "grad_norm": 0.12882739305496216,
+      "learning_rate": 4.276581865622962e-05,
+      "loss": 0.0422,
+      "step": 1110
+    },
+    {
+      "epoch": 5.114155251141552,
+      "grad_norm": 0.1830970197916031,
+      "learning_rate": 4.270058708414873e-05,
+      "loss": 0.0429,
+      "step": 1120
+    },
+    {
+      "epoch": 5.159817351598173,
+      "grad_norm": 0.18178118765354156,
+      "learning_rate": 4.2635355512067844e-05,
+      "loss": 0.0377,
+      "step": 1130
+    },
+    {
+      "epoch": 5.205479452054795,
+      "grad_norm": 0.11018156260251999,
+      "learning_rate": 4.257012393998696e-05,
+      "loss": 0.0524,
+      "step": 1140
+    },
+    {
+      "epoch": 5.251141552511416,
+      "grad_norm": 0.14554737508296967,
+      "learning_rate": 4.250489236790607e-05,
+      "loss": 0.0518,
+      "step": 1150
+    },
+    {
+      "epoch": 5.296803652968037,
+      "grad_norm": 0.13553589582443237,
+      "learning_rate": 4.243966079582518e-05,
+      "loss": 0.0385,
+      "step": 1160
+    },
+    {
+      "epoch": 5.342465753424658,
+      "grad_norm": 0.4197877049446106,
+      "learning_rate": 4.237442922374429e-05,
+      "loss": 0.0462,
+      "step": 1170
+    },
+    {
+      "epoch": 5.3881278538812785,
+      "grad_norm": 0.08472498506307602,
+      "learning_rate": 4.2309197651663405e-05,
+      "loss": 0.0439,
+      "step": 1180
+    },
+    {
+      "epoch": 5.433789954337899,
+      "grad_norm": 0.13307222723960876,
+      "learning_rate": 4.224396607958252e-05,
+      "loss": 0.0511,
+      "step": 1190
+    },
+    {
+      "epoch": 5.47945205479452,
+      "grad_norm": 0.11114447563886642,
+      "learning_rate": 4.217873450750163e-05,
+      "loss": 0.0359,
+      "step": 1200
+    },
+    {
+      "epoch": 5.525114155251142,
+      "grad_norm": 0.09421250224113464,
+      "learning_rate": 4.211350293542075e-05,
+      "loss": 0.0343,
+      "step": 1210
+    },
+    {
+      "epoch": 5.570776255707763,
+      "grad_norm": 0.12763556838035583,
+      "learning_rate": 4.204827136333986e-05,
+      "loss": 0.0478,
+      "step": 1220
+    },
+    {
+      "epoch": 5.616438356164384,
+      "grad_norm": 0.3674803078174591,
+      "learning_rate": 4.198303979125897e-05,
+      "loss": 0.0424,
+      "step": 1230
+    },
+    {
+      "epoch": 5.662100456621005,
+      "grad_norm": 0.19763565063476562,
+      "learning_rate": 4.1917808219178085e-05,
+      "loss": 0.0375,
+      "step": 1240
+    },
+    {
+      "epoch": 5.707762557077626,
+      "grad_norm": 0.15177254378795624,
+      "learning_rate": 4.18525766470972e-05,
+      "loss": 0.0397,
+      "step": 1250
+    },
+    {
+      "epoch": 5.7534246575342465,
+      "grad_norm": 0.1431536078453064,
+      "learning_rate": 4.178734507501631e-05,
+      "loss": 0.0393,
+      "step": 1260
+    },
+    {
+      "epoch": 5.799086757990867,
+      "grad_norm": 0.1322961449623108,
+      "learning_rate": 4.172211350293542e-05,
+      "loss": 0.0342,
+      "step": 1270
+    },
+    {
+      "epoch": 5.844748858447488,
+      "grad_norm": 0.15804563462734222,
+      "learning_rate": 4.1656881930854534e-05,
+      "loss": 0.0499,
+      "step": 1280
+    },
+    {
+      "epoch": 5.890410958904109,
+      "grad_norm": 0.09370295703411102,
+      "learning_rate": 4.1591650358773646e-05,
+      "loss": 0.0339,
+      "step": 1290
+    },
+    {
+      "epoch": 5.936073059360731,
+      "grad_norm": 0.2806883454322815,
+      "learning_rate": 4.152641878669276e-05,
+      "loss": 0.035,
+      "step": 1300
+    },
+    {
+      "epoch": 5.981735159817352,
+      "grad_norm": 0.09985365718603134,
+      "learning_rate": 4.146118721461188e-05,
+      "loss": 0.0461,
+      "step": 1310
+    },
+    {
+      "epoch": 6.0,
+      "eval_bertscore_f1": 0.8816708240680352,
+      "eval_bleu": 0.6032876773464483,
+      "eval_loss": 0.036029841750860214,
+      "eval_rougeL": 0.31834801433173276,
+      "eval_runtime": 86.97,
+      "eval_samples_per_second": 17.282,
+      "eval_steps_per_second": 1.081,
+      "step": 1314
+    },
+    {
+      "epoch": 6.027397260273973,
+      "grad_norm": 0.10236047208309174,
+      "learning_rate": 4.139595564253099e-05,
+      "loss": 0.0336,
+      "step": 1320
+    },
+    {
+      "epoch": 6.073059360730594,
+      "grad_norm": 0.24231013655662537,
+      "learning_rate": 4.13307240704501e-05,
+      "loss": 0.0496,
+      "step": 1330
+    },
+    {
+      "epoch": 6.1187214611872145,
+      "grad_norm": 0.11029026657342911,
+      "learning_rate": 4.1265492498369214e-05,
+      "loss": 0.0403,
+      "step": 1340
+    },
+    {
+      "epoch": 6.164383561643835,
+      "grad_norm": 0.14063166081905365,
+      "learning_rate": 4.1200260926288326e-05,
+      "loss": 0.0473,
+      "step": 1350
+    },
+    {
+      "epoch": 6.210045662100456,
+      "grad_norm": 0.10346683859825134,
+      "learning_rate": 4.113502935420744e-05,
+      "loss": 0.0362,
+      "step": 1360
+    },
+    {
+      "epoch": 6.255707762557078,
+      "grad_norm": 0.1653313785791397,
+      "learning_rate": 4.106979778212655e-05,
+      "loss": 0.0327,
+      "step": 1370
+    },
+    {
+      "epoch": 6.301369863013699,
+      "grad_norm": 0.1193709447979927,
+      "learning_rate": 4.100456621004566e-05,
+      "loss": 0.0341,
+      "step": 1380
+    },
+    {
+      "epoch": 6.34703196347032,
+      "grad_norm": 0.34356066584587097,
+      "learning_rate": 4.0939334637964774e-05,
+      "loss": 0.0431,
+      "step": 1390
+    },
+    {
+      "epoch": 6.392694063926941,
+      "grad_norm": 0.2641497552394867,
+      "learning_rate": 4.087410306588389e-05,
+      "loss": 0.0423,
+      "step": 1400
+    },
+    {
+      "epoch": 6.438356164383562,
+      "grad_norm": 0.14067210257053375,
+      "learning_rate": 4.0808871493803006e-05,
+      "loss": 0.0354,
+      "step": 1410
+    },
+    {
+      "epoch": 6.4840182648401825,
+      "grad_norm": 0.11391960829496384,
+      "learning_rate": 4.074363992172212e-05,
+      "loss": 0.0321,
+      "step": 1420
+    },
+    {
+      "epoch": 6.529680365296803,
+      "grad_norm": 0.15210473537445068,
+      "learning_rate": 4.067840834964123e-05,
+      "loss": 0.038,
+      "step": 1430
+    },
+    {
+      "epoch": 6.575342465753424,
+      "grad_norm": 0.22131510078907013,
+      "learning_rate": 4.061317677756034e-05,
+      "loss": 0.0385,
+      "step": 1440
+    },
+    {
+      "epoch": 6.621004566210045,
+      "grad_norm": 0.10140376538038254,
+      "learning_rate": 4.0547945205479454e-05,
+      "loss": 0.0499,
+      "step": 1450
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.15117721259593964,
+      "learning_rate": 4.0482713633398566e-05,
+      "loss": 0.0375,
+      "step": 1460
+    },
+    {
+      "epoch": 6.712328767123288,
+      "grad_norm": 0.15028707683086395,
+      "learning_rate": 4.041748206131768e-05,
+      "loss": 0.0528,
+      "step": 1470
+    },
+    {
+      "epoch": 6.757990867579909,
+      "grad_norm": 0.16894899308681488,
+      "learning_rate": 4.035225048923679e-05,
+      "loss": 0.0381,
+      "step": 1480
+    },
+    {
+      "epoch": 6.80365296803653,
+      "grad_norm": 0.10997270792722702,
+      "learning_rate": 4.02870189171559e-05,
+      "loss": 0.0345,
+      "step": 1490
+    },
+    {
+      "epoch": 6.8493150684931505,
+      "grad_norm": 0.2839312255382538,
+      "learning_rate": 4.0221787345075015e-05,
+      "loss": 0.0507,
+      "step": 1500
+    },
+    {
+      "epoch": 6.894977168949771,
+      "grad_norm": 0.1316707879304886,
+      "learning_rate": 4.0156555772994134e-05,
+      "loss": 0.0375,
+      "step": 1510
+    },
+    {
+      "epoch": 6.940639269406392,
+      "grad_norm": 0.4014754295349121,
+      "learning_rate": 4.0091324200913246e-05,
+      "loss": 0.0444,
+      "step": 1520
+    },
+    {
+      "epoch": 6.986301369863014,
+      "grad_norm": 0.10761623084545135,
+      "learning_rate": 4.002609262883236e-05,
+      "loss": 0.0334,
+      "step": 1530
+    },
+    {
+      "epoch": 7.0,
+      "eval_bertscore_f1": 0.8739348690825467,
+      "eval_bleu": 0.5975825525895913,
+      "eval_loss": 0.034862346947193146,
+      "eval_rougeL": 0.2734166668626543,
+      "eval_runtime": 86.9967,
+      "eval_samples_per_second": 17.277,
+      "eval_steps_per_second": 1.081,
+      "step": 1533
+    },
+    {
+      "epoch": 7.031963470319635,
+      "grad_norm": 0.2581014931201935,
+      "learning_rate": 3.996086105675147e-05,
+      "loss": 0.0377,
+      "step": 1540
+    },
+    {
+      "epoch": 7.077625570776256,
+      "grad_norm": 0.3633580803871155,
+      "learning_rate": 3.989562948467058e-05,
+      "loss": 0.0409,
+      "step": 1550
+    },
+    {
+      "epoch": 7.123287671232877,
+      "grad_norm": 0.2476946860551834,
+      "learning_rate": 3.9830397912589695e-05,
+      "loss": 0.0432,
+      "step": 1560
+    },
+    {
+      "epoch": 7.168949771689498,
+      "grad_norm": 0.12708741426467896,
+      "learning_rate": 3.976516634050881e-05,
+      "loss": 0.0344,
+      "step": 1570
+    },
+    {
+      "epoch": 7.2146118721461185,
+      "grad_norm": 0.0819052904844284,
+      "learning_rate": 3.969993476842792e-05,
+      "loss": 0.0411,
+      "step": 1580
+    },
+    {
+      "epoch": 7.260273972602739,
+      "grad_norm": 0.09047359228134155,
+      "learning_rate": 3.963470319634703e-05,
+      "loss": 0.031,
+      "step": 1590
+    },
+    {
+      "epoch": 7.30593607305936,
+      "grad_norm": 0.15134736895561218,
+      "learning_rate": 3.9569471624266144e-05,
+      "loss": 0.038,
+      "step": 1600
+    },
+    {
+      "epoch": 7.351598173515982,
+      "grad_norm": 0.09916142374277115,
+      "learning_rate": 3.950424005218526e-05,
+      "loss": 0.0318,
+      "step": 1610
+    },
+    {
+      "epoch": 7.397260273972603,
+      "grad_norm": 0.11288363486528397,
+      "learning_rate": 3.9439008480104375e-05,
+      "loss": 0.0411,
+      "step": 1620
+    },
+    {
+      "epoch": 7.442922374429224,
+      "grad_norm": 0.1381136178970337,
+      "learning_rate": 3.937377690802349e-05,
+      "loss": 0.0396,
+      "step": 1630
+    },
+    {
+      "epoch": 7.488584474885845,
+      "grad_norm": 0.08881582319736481,
+      "learning_rate": 3.93085453359426e-05,
+      "loss": 0.0298,
+      "step": 1640
+    },
+    {
+      "epoch": 7.534246575342466,
+      "grad_norm": 0.10169164836406708,
+      "learning_rate": 3.924331376386171e-05,
+      "loss": 0.0391,
+      "step": 1650
+    },
+    {
+      "epoch": 7.579908675799087,
+      "grad_norm": 0.24670983850955963,
+      "learning_rate": 3.9178082191780823e-05,
+      "loss": 0.041,
+      "step": 1660
+    },
+    {
+      "epoch": 7.6255707762557075,
+      "grad_norm": 0.10547634214162827,
+      "learning_rate": 3.9112850619699936e-05,
+      "loss": 0.042,
+      "step": 1670
+    },
+    {
+      "epoch": 7.671232876712329,
+      "grad_norm": 0.30908140540122986,
+      "learning_rate": 3.904761904761905e-05,
+      "loss": 0.0382,
+      "step": 1680
+    },
+    {
+      "epoch": 7.71689497716895,
+      "grad_norm": 0.19618338346481323,
+      "learning_rate": 3.898238747553816e-05,
+      "loss": 0.0414,
+      "step": 1690
+    },
+    {
+      "epoch": 7.762557077625571,
+      "grad_norm": 0.0793214961886406,
+      "learning_rate": 3.891715590345727e-05,
+      "loss": 0.032,
+      "step": 1700
+    },
+    {
+      "epoch": 7.808219178082192,
+      "grad_norm": 0.12131261080503464,
+      "learning_rate": 3.885192433137639e-05,
+      "loss": 0.0417,
+      "step": 1710
+    },
+    {
+      "epoch": 7.853881278538813,
+      "grad_norm": 0.41418784856796265,
+      "learning_rate": 3.87866927592955e-05,
+      "loss": 0.0395,
+      "step": 1720
+    },
+    {
+      "epoch": 7.899543378995434,
+      "grad_norm": 0.1027241051197052,
+      "learning_rate": 3.8721461187214615e-05,
+      "loss": 0.0299,
+      "step": 1730
+    },
+    {
+      "epoch": 7.945205479452055,
+      "grad_norm": 0.1369609236717224,
+      "learning_rate": 3.865622961513373e-05,
+      "loss": 0.0375,
+      "step": 1740
+    },
+    {
+      "epoch": 7.9908675799086755,
+      "grad_norm": 0.0653514638543129,
+      "learning_rate": 3.859099804305284e-05,
+      "loss": 0.0466,
+      "step": 1750
+    },
+    {
+      "epoch": 8.0,
+      "eval_bertscore_f1": 0.882495258898237,
+      "eval_bleu": 0.6683821586112519,
+      "eval_loss": 0.033862482756376266,
+      "eval_rougeL": 0.3127432355714671,
+      "eval_runtime": 86.7914,
+      "eval_samples_per_second": 17.317,
+      "eval_steps_per_second": 1.083,
+      "step": 1752
+    },
+    {
+      "epoch": 8.036529680365296,
+      "grad_norm": 0.07512130588293076,
+      "learning_rate": 3.852576647097195e-05,
+      "loss": 0.0428,
+      "step": 1760
+    },
+    {
+      "epoch": 8.082191780821917,
+      "grad_norm": 0.10082229226827621,
+      "learning_rate": 3.8460534898891064e-05,
+      "loss": 0.0394,
+      "step": 1770
+    },
+    {
+      "epoch": 8.127853881278538,
+      "grad_norm": 0.08132240921258926,
+      "learning_rate": 3.8395303326810176e-05,
+      "loss": 0.0315,
+      "step": 1780
+    },
+    {
+      "epoch": 8.173515981735159,
+      "grad_norm": 0.09185563027858734,
+      "learning_rate": 3.833007175472929e-05,
+      "loss": 0.0403,
+      "step": 1790
+    },
+    {
+      "epoch": 8.219178082191782,
+      "grad_norm": 0.08592450618743896,
+      "learning_rate": 3.82648401826484e-05,
+      "loss": 0.0356,
+      "step": 1800
+    },
+    {
+      "epoch": 8.264840182648403,
+      "grad_norm": 0.11714768409729004,
+      "learning_rate": 3.819960861056752e-05,
+      "loss": 0.0374,
+      "step": 1810
+    },
+    {
+      "epoch": 8.310502283105023,
+      "grad_norm": 0.0861973762512207,
+      "learning_rate": 3.813437703848663e-05,
+      "loss": 0.043,
+      "step": 1820
+    },
+    {
+      "epoch": 8.356164383561644,
+      "grad_norm": 0.36329740285873413,
+      "learning_rate": 3.8069145466405744e-05,
+      "loss": 0.0379,
+      "step": 1830
+    },
+    {
+      "epoch": 8.401826484018265,
+      "grad_norm": 0.0876651480793953,
+      "learning_rate": 3.8003913894324856e-05,
+      "loss": 0.0332,
+      "step": 1840
+    },
+    {
+      "epoch": 8.447488584474886,
+      "grad_norm": 0.08967719227075577,
+      "learning_rate": 3.793868232224397e-05,
+      "loss": 0.0337,
+      "step": 1850
+    },
+    {
+      "epoch": 8.493150684931507,
+      "grad_norm": 0.07374356687068939,
+      "learning_rate": 3.787345075016308e-05,
+      "loss": 0.0285,
+      "step": 1860
+    },
+    {
+      "epoch": 8.538812785388128,
+      "grad_norm": 0.345527321100235,
+      "learning_rate": 3.780821917808219e-05,
+      "loss": 0.0459,
+      "step": 1870
+    },
+    {
+      "epoch": 8.584474885844749,
+      "grad_norm": 0.07992502301931381,
+      "learning_rate": 3.7742987606001305e-05,
+      "loss": 0.0384,
+      "step": 1880
+    },
+    {
+      "epoch": 8.63013698630137,
+      "grad_norm": 0.1169479638338089,
+      "learning_rate": 3.767775603392042e-05,
+      "loss": 0.0329,
+      "step": 1890
+    },
+    {
+      "epoch": 8.67579908675799,
+      "grad_norm": 0.2200576215982437,
+      "learning_rate": 3.761252446183953e-05,
+      "loss": 0.0392,
+      "step": 1900
+    },
+    {
+      "epoch": 8.721461187214611,
+      "grad_norm": 0.07725539803504944,
+      "learning_rate": 3.754729288975865e-05,
+      "loss": 0.0365,
+      "step": 1910
+    },
+    {
+      "epoch": 8.767123287671232,
+      "grad_norm": 0.17150481045246124,
+      "learning_rate": 3.748206131767776e-05,
+      "loss": 0.0402,
+      "step": 1920
+    },
+    {
+      "epoch": 8.812785388127853,
+      "grad_norm": 0.09744448214769363,
+      "learning_rate": 3.741682974559687e-05,
+      "loss": 0.0345,
+      "step": 1930
+    },
+    {
+      "epoch": 8.858447488584474,
+      "grad_norm": 0.13161571323871613,
+      "learning_rate": 3.7351598173515985e-05,
+      "loss": 0.0294,
+      "step": 1940
+    },
+    {
+      "epoch": 8.904109589041095,
+      "grad_norm": 0.12368807941675186,
+      "learning_rate": 3.72863666014351e-05,
+      "loss": 0.0402,
+      "step": 1950
+    },
+    {
+      "epoch": 8.949771689497716,
+      "grad_norm": 0.2823167145252228,
+      "learning_rate": 3.722113502935421e-05,
+      "loss": 0.0396,
+      "step": 1960
+    },
+    {
+      "epoch": 8.995433789954339,
+      "grad_norm": 0.0739816278219223,
+      "learning_rate": 3.715590345727332e-05,
+      "loss": 0.0367,
+      "step": 1970
+    },
+    {
+      "epoch": 9.0,
+      "eval_bertscore_f1": 0.8818957697766191,
+      "eval_bleu": 0.7007771398124651,
+      "eval_loss": 0.03325749561190605,
+      "eval_rougeL": 0.30904254601890724,
+      "eval_runtime": 85.8594,
+      "eval_samples_per_second": 17.505,
+      "eval_steps_per_second": 1.095,
+      "step": 1971
+    },
+    {
+      "epoch": 9.04109589041096,
+      "grad_norm": 0.1445605307817459,
+      "learning_rate": 3.709067188519243e-05,
+      "loss": 0.0325,
+      "step": 1980
+    },
+    {
+      "epoch": 9.08675799086758,
+      "grad_norm": 0.09838061034679413,
+      "learning_rate": 3.7025440313111545e-05,
+      "loss": 0.0313,
+      "step": 1990
+    },
+    {
+      "epoch": 9.132420091324201,
+      "grad_norm": 0.1458427757024765,
+      "learning_rate": 3.696020874103066e-05,
+      "loss": 0.0369,
+      "step": 2000
+    },
+    {
+      "epoch": 9.178082191780822,
+      "grad_norm": 0.08086104691028595,
+      "learning_rate": 3.6894977168949777e-05,
+      "loss": 0.0376,
+      "step": 2010
+    },
+    {
+      "epoch": 9.223744292237443,
+      "grad_norm": 0.08939240872859955,
+      "learning_rate": 3.682974559686889e-05,
+      "loss": 0.0366,
+      "step": 2020
+    },
+    {
+      "epoch": 9.269406392694064,
+      "grad_norm": 0.3728352189064026,
+      "learning_rate": 3.6764514024788e-05,
+      "loss": 0.0371,
+      "step": 2030
+    },
+    {
+      "epoch": 9.315068493150685,
+      "grad_norm": 0.17691729962825775,
+      "learning_rate": 3.669928245270711e-05,
+      "loss": 0.0393,
+      "step": 2040
+    },
+    {
+      "epoch": 9.360730593607306,
+      "grad_norm": 0.1187521442770958,
+      "learning_rate": 3.6634050880626225e-05,
+      "loss": 0.0392,
+      "step": 2050
+    },
+    {
+      "epoch": 9.406392694063927,
+      "grad_norm": 0.05309230834245682,
+      "learning_rate": 3.656881930854534e-05,
+      "loss": 0.0351,
+      "step": 2060
+    },
+    {
+      "epoch": 9.452054794520548,
+      "grad_norm": 0.11482471972703934,
+      "learning_rate": 3.650358773646445e-05,
+      "loss": 0.03,
+      "step": 2070
+    },
+    {
+      "epoch": 9.497716894977168,
+      "grad_norm": 0.1264190524816513,
+      "learning_rate": 3.643835616438356e-05,
+      "loss": 0.0391,
+      "step": 2080
+    },
+    {
+      "epoch": 9.54337899543379,
+      "grad_norm": 0.3746449947357178,
+      "learning_rate": 3.6373124592302674e-05,
+      "loss": 0.0303,
+      "step": 2090
+    },
+    {
+      "epoch": 9.58904109589041,
+      "grad_norm": 0.0804123654961586,
+      "learning_rate": 3.6307893020221786e-05,
+      "loss": 0.0307,
+      "step": 2100
+    },
+    {
+      "epoch": 9.634703196347033,
+      "grad_norm": 0.12022325396537781,
+      "learning_rate": 3.6242661448140905e-05,
+      "loss": 0.0419,
+      "step": 2110
+    },
+    {
+      "epoch": 9.680365296803654,
+      "grad_norm": 0.07230564951896667,
+      "learning_rate": 3.617742987606002e-05,
+      "loss": 0.0347,
+      "step": 2120
+    },
+    {
+      "epoch": 9.726027397260275,
+      "grad_norm": 0.0902918353676796,
+      "learning_rate": 3.611219830397913e-05,
+      "loss": 0.0311,
+      "step": 2130
+    },
+    {
+      "epoch": 9.771689497716896,
+      "grad_norm": 0.13435712456703186,
+      "learning_rate": 3.604696673189824e-05,
+      "loss": 0.0381,
+      "step": 2140
+    },
+    {
+      "epoch": 9.817351598173516,
+      "grad_norm": 0.24455106258392334,
+      "learning_rate": 3.5981735159817354e-05,
+      "loss": 0.0343,
+      "step": 2150
+    },
+    {
+      "epoch": 9.863013698630137,
+      "grad_norm": 0.4970339238643646,
+      "learning_rate": 3.5916503587736466e-05,
+      "loss": 0.0498,
+      "step": 2160
+    },
+    {
+      "epoch": 9.908675799086758,
+      "grad_norm": 0.08798874914646149,
+      "learning_rate": 3.585127201565558e-05,
+      "loss": 0.035,
+      "step": 2170
+    },
+    {
+      "epoch": 9.954337899543379,
+      "grad_norm": 0.10993966460227966,
+      "learning_rate": 3.578604044357469e-05,
+      "loss": 0.0378,
+      "step": 2180
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.09713996201753616,
+      "learning_rate": 3.57208088714938e-05,
+      "loss": 0.0311,
+      "step": 2190
+    },
+    {
+      "epoch": 10.0,
+      "eval_bertscore_f1": 0.8794062694786234,
+      "eval_bleu": 0.533594752651969,
+      "eval_loss": 0.03252607583999634,
+      "eval_rougeL": 0.3176795038867278,
+      "eval_runtime": 86.4689,
+      "eval_samples_per_second": 17.382,
+      "eval_steps_per_second": 1.087,
+      "step": 2190
+    },
+    {
+      "epoch": 10.045662100456621,
+      "grad_norm": 0.15792271494865417,
+      "learning_rate": 3.5655577299412915e-05,
+      "loss": 0.0351,
+      "step": 2200
+    },
+    {
+      "epoch": 10.091324200913242,
+      "grad_norm": 0.13184691965579987,
+      "learning_rate": 3.5590345727332033e-05,
+      "loss": 0.0374,
+      "step": 2210
+    },
+    {
+      "epoch": 10.136986301369863,
+      "grad_norm": 0.22351279854774475,
+      "learning_rate": 3.5525114155251146e-05,
+      "loss": 0.0315,
+      "step": 2220
+    },
+    {
+      "epoch": 10.182648401826484,
+      "grad_norm": 0.07609914988279343,
+      "learning_rate": 3.545988258317026e-05,
+      "loss": 0.0347,
+      "step": 2230
+    },
+    {
+      "epoch": 10.228310502283104,
+      "grad_norm": 0.10054602473974228,
+      "learning_rate": 3.539465101108937e-05,
+      "loss": 0.0328,
+      "step": 2240
+    },
+    {
+      "epoch": 10.273972602739725,
+      "grad_norm": 0.06633490324020386,
+      "learning_rate": 3.532941943900848e-05,
+      "loss": 0.0312,
+      "step": 2250
+    },
+    {
+      "epoch": 10.319634703196346,
+      "grad_norm": 0.09284522384405136,
+      "learning_rate": 3.5264187866927594e-05,
+      "loss": 0.0345,
+      "step": 2260
+    },
+    {
+      "epoch": 10.365296803652967,
+      "grad_norm": 0.09927036613225937,
+      "learning_rate": 3.5198956294846707e-05,
+      "loss": 0.0353,
+      "step": 2270
+    },
+    {
+      "epoch": 10.41095890410959,
+      "grad_norm": 0.32926803827285767,
+      "learning_rate": 3.513372472276582e-05,
+      "loss": 0.0362,
+      "step": 2280
+    },
+    {
+      "epoch": 10.45662100456621,
+      "grad_norm": 0.1537335067987442,
+      "learning_rate": 3.506849315068493e-05,
+      "loss": 0.038,
+      "step": 2290
+    },
+    {
+      "epoch": 10.502283105022832,
+      "grad_norm": 0.27000853419303894,
+      "learning_rate": 3.500326157860404e-05,
+      "loss": 0.0372,
+      "step": 2300
+    },
+    {
+      "epoch": 10.547945205479452,
+      "grad_norm": 0.19232715666294098,
+      "learning_rate": 3.493803000652316e-05,
+      "loss": 0.0325,
+      "step": 2310
+    },
+    {
+      "epoch": 10.593607305936073,
+      "grad_norm": 0.09830646961927414,
+      "learning_rate": 3.4872798434442274e-05,
+      "loss": 0.0399,
+      "step": 2320
+    },
+    {
+      "epoch": 10.639269406392694,
+      "grad_norm": 0.16243436932563782,
+      "learning_rate": 3.4807566862361386e-05,
+      "loss": 0.0358,
+      "step": 2330
+    },
+    {
+      "epoch": 10.684931506849315,
+      "grad_norm": 0.11637797206640244,
+      "learning_rate": 3.47423352902805e-05,
+      "loss": 0.0397,
+      "step": 2340
+    },
+    {
+      "epoch": 10.730593607305936,
+      "grad_norm": 0.06575558334589005,
+      "learning_rate": 3.467710371819961e-05,
+      "loss": 0.0313,
+      "step": 2350
+    },
+    {
+      "epoch": 10.776255707762557,
+      "grad_norm": 0.18646180629730225,
+      "learning_rate": 3.461187214611872e-05,
+      "loss": 0.0334,
+      "step": 2360
+    },
+    {
+      "epoch": 10.821917808219178,
+      "grad_norm": 0.14705593883991241,
+      "learning_rate": 3.4546640574037835e-05,
+      "loss": 0.0389,
+      "step": 2370
+    },
+    {
+      "epoch": 10.867579908675799,
+      "grad_norm": 0.14052802324295044,
+      "learning_rate": 3.448140900195695e-05,
+      "loss": 0.0331,
+      "step": 2380
+    },
+    {
+      "epoch": 10.91324200913242,
+      "grad_norm": 0.0970313772559166,
+      "learning_rate": 3.441617742987606e-05,
+      "loss": 0.0331,
+      "step": 2390
+    },
+    {
+      "epoch": 10.95890410958904,
+      "grad_norm": 0.0760510265827179,
+      "learning_rate": 3.435094585779517e-05,
+      "loss": 0.0339,
+      "step": 2400
+    },
+    {
+      "epoch": 11.0,
+      "eval_bertscore_f1": 0.8777465161211239,
+      "eval_bleu": 0.8421703823372678,
+      "eval_loss": 0.032254405319690704,
+      "eval_rougeL": 0.28874573142254134,
+      "eval_runtime": 85.9081,
+      "eval_samples_per_second": 17.495,
+      "eval_steps_per_second": 1.094,
+      "step": 2409
+    },
+    {
+      "epoch": 11.004566210045661,
+      "grad_norm": 0.07779296487569809,
+      "learning_rate": 3.428571428571429e-05,
+      "loss": 0.0346,
+      "step": 2410
+    },
+    {
+      "epoch": 11.050228310502282,
+      "grad_norm": 0.25582781434059143,
+      "learning_rate": 3.42204827136334e-05,
+      "loss": 0.0362,
+      "step": 2420
+    },
+    {
+      "epoch": 11.095890410958905,
+      "grad_norm": 0.09182050079107285,
+      "learning_rate": 3.4155251141552515e-05,
+      "loss": 0.0326,
+      "step": 2430
+    },
+    {
+      "epoch": 11.141552511415526,
+      "grad_norm": 0.35790833830833435,
+      "learning_rate": 3.409001956947163e-05,
+      "loss": 0.0337,
+      "step": 2440
+    },
+    {
+      "epoch": 11.187214611872147,
+      "grad_norm": 0.3586607873439789,
+      "learning_rate": 3.402478799739074e-05,
+      "loss": 0.0427,
+      "step": 2450
+    },
+    {
+      "epoch": 11.232876712328768,
+      "grad_norm": 0.17306306958198547,
+      "learning_rate": 3.395955642530985e-05,
+      "loss": 0.0311,
+      "step": 2460
+    },
+    {
+      "epoch": 11.278538812785389,
+      "grad_norm": 0.08027535676956177,
+      "learning_rate": 3.3894324853228963e-05,
+      "loss": 0.0298,
+      "step": 2470
+    },
+    {
+      "epoch": 11.32420091324201,
+      "grad_norm": 0.05666491016745567,
+      "learning_rate": 3.3829093281148076e-05,
+      "loss": 0.0286,
+      "step": 2480
+    },
+    {
+      "epoch": 11.36986301369863,
+      "grad_norm": 0.0904633179306984,
+      "learning_rate": 3.376386170906719e-05,
+      "loss": 0.0339,
+      "step": 2490
+    },
+    {
+      "epoch": 11.415525114155251,
+      "grad_norm": 0.12512832880020142,
+      "learning_rate": 3.36986301369863e-05,
+      "loss": 0.0451,
+      "step": 2500
+    },
+    {
+      "epoch": 11.461187214611872,
+      "grad_norm": 0.061826564371585846,
+      "learning_rate": 3.363339856490542e-05,
+      "loss": 0.0355,
+      "step": 2510
+    },
+    {
+      "epoch": 11.506849315068493,
+      "grad_norm": 0.19998124241828918,
+      "learning_rate": 3.356816699282453e-05,
+      "loss": 0.0308,
+      "step": 2520
+    },
+    {
+      "epoch": 11.552511415525114,
+      "grad_norm": 0.08238628506660461,
+      "learning_rate": 3.350293542074364e-05,
+      "loss": 0.0336,
+      "step": 2530
+    },
+    {
+      "epoch": 11.598173515981735,
+      "grad_norm": 0.18144136667251587,
+      "learning_rate": 3.3437703848662755e-05,
+      "loss": 0.0303,
+      "step": 2540
+    },
+    {
+      "epoch": 11.643835616438356,
+      "grad_norm": 0.13037236034870148,
+      "learning_rate": 3.337247227658187e-05,
+      "loss": 0.0308,
+      "step": 2550
+    },
+    {
+      "epoch": 11.689497716894977,
+      "grad_norm": 0.16914191842079163,
+      "learning_rate": 3.330724070450098e-05,
+      "loss": 0.0302,
+      "step": 2560
+    },
+    {
+      "epoch": 11.735159817351597,
+      "grad_norm": 0.24281352758407593,
+      "learning_rate": 3.324200913242009e-05,
+      "loss": 0.0306,
+      "step": 2570
+    },
+    {
+      "epoch": 11.780821917808218,
+      "grad_norm": 0.22488336265087128,
+      "learning_rate": 3.3176777560339204e-05,
+      "loss": 0.0362,
+      "step": 2580
+    },
+    {
+      "epoch": 11.826484018264841,
+      "grad_norm": 0.06625436991453171,
+      "learning_rate": 3.3111545988258316e-05,
+      "loss": 0.0321,
+      "step": 2590
+    },
+    {
+      "epoch": 11.872146118721462,
+      "grad_norm": 0.07758279889822006,
+      "learning_rate": 3.304631441617743e-05,
+      "loss": 0.0372,
+      "step": 2600
+    },
+    {
+      "epoch": 11.917808219178083,
+      "grad_norm": 0.08518276363611221,
+      "learning_rate": 3.298108284409655e-05,
+      "loss": 0.0378,
+      "step": 2610
+    },
+    {
+      "epoch": 11.963470319634704,
+      "grad_norm": 0.0794219970703125,
+      "learning_rate": 3.291585127201566e-05,
+      "loss": 0.0294,
+      "step": 2620
+    },
+    {
+      "epoch": 12.0,
+      "eval_bertscore_f1": 0.877530678739884,
+      "eval_bleu": 0.8141384615121525,
+      "eval_loss": 0.032114505767822266,
+      "eval_rougeL": 0.2890521718339253,
+      "eval_runtime": 85.8584,
+      "eval_samples_per_second": 17.506,
+      "eval_steps_per_second": 1.095,
+      "step": 2628
+    },
+    {
+      "epoch": 12.009132420091325,
+      "grad_norm": 0.1260496973991394,
+      "learning_rate": 3.285061969993477e-05,
+      "loss": 0.0365,
+      "step": 2630
+    },
+    {
+      "epoch": 12.054794520547945,
+      "grad_norm": 0.09780646860599518,
+      "learning_rate": 3.2785388127853884e-05,
+      "loss": 0.0331,
+      "step": 2640
+    },
+    {
+      "epoch": 12.100456621004566,
+      "grad_norm": 0.14175942540168762,
+      "learning_rate": 3.2720156555772996e-05,
+      "loss": 0.0364,
+      "step": 2650
+    },
+    {
+      "epoch": 12.146118721461187,
+      "grad_norm": 0.12439887225627899,
+      "learning_rate": 3.265492498369211e-05,
+      "loss": 0.0291,
+      "step": 2660
+    },
+    {
+      "epoch": 12.191780821917808,
+      "grad_norm": 0.09981077909469604,
+      "learning_rate": 3.258969341161122e-05,
+      "loss": 0.0363,
+      "step": 2670
+    },
+    {
+      "epoch": 12.237442922374429,
+      "grad_norm": 0.1319703310728073,
+      "learning_rate": 3.252446183953033e-05,
+      "loss": 0.0307,
+      "step": 2680
+    },
+    {
+      "epoch": 12.28310502283105,
+      "grad_norm": 0.10203209519386292,
+      "learning_rate": 3.2459230267449445e-05,
+      "loss": 0.0308,
+      "step": 2690
+    },
+    {
+      "epoch": 12.32876712328767,
+      "grad_norm": 0.3156202435493469,
+      "learning_rate": 3.239399869536856e-05,
+      "loss": 0.0333,
+      "step": 2700
+    },
+    {
+      "epoch": 12.374429223744292,
+      "grad_norm": 0.08748015016317368,
+      "learning_rate": 3.2328767123287676e-05,
+      "loss": 0.0389,
+      "step": 2710
+    },
+    {
+      "epoch": 12.420091324200913,
+      "grad_norm": 0.1384272575378418,
+      "learning_rate": 3.226353555120679e-05,
+      "loss": 0.0282,
+      "step": 2720
+    },
+    {
+      "epoch": 12.465753424657533,
+      "grad_norm": 0.052130818367004395,
+      "learning_rate": 3.21983039791259e-05,
+      "loss": 0.0283,
+      "step": 2730
+    },
+    {
+      "epoch": 12.511415525114156,
+      "grad_norm": 0.14489006996154785,
+      "learning_rate": 3.213307240704501e-05,
+      "loss": 0.0398,
+      "step": 2740
+    },
+    {
+      "epoch": 12.557077625570777,
+      "grad_norm": 0.09531650692224503,
+      "learning_rate": 3.2067840834964125e-05,
+      "loss": 0.0289,
+      "step": 2750
+    },
+    {
+      "epoch": 12.602739726027398,
+      "grad_norm": 0.0696859359741211,
+      "learning_rate": 3.200260926288324e-05,
+      "loss": 0.034,
+      "step": 2760
+    },
+    {
+      "epoch": 12.648401826484019,
+      "grad_norm": 0.14592404663562775,
+      "learning_rate": 3.193737769080235e-05,
+      "loss": 0.0334,
+      "step": 2770
+    },
+    {
+      "epoch": 12.69406392694064,
+      "grad_norm": 0.08082027733325958,
+      "learning_rate": 3.187214611872146e-05,
+      "loss": 0.0295,
+      "step": 2780
+    },
+    {
+      "epoch": 12.73972602739726,
+      "grad_norm": 0.08294638246297836,
+      "learning_rate": 3.180691454664057e-05,
+      "loss": 0.0276,
+      "step": 2790
+    },
+    {
+      "epoch": 12.785388127853881,
+      "grad_norm": 0.05551337078213692,
+      "learning_rate": 3.174168297455969e-05,
+      "loss": 0.0297,
+      "step": 2800
+    },
+    {
+      "epoch": 12.831050228310502,
+      "grad_norm": 0.09836950153112411,
+      "learning_rate": 3.1676451402478804e-05,
+      "loss": 0.0374,
+      "step": 2810
+    },
+    {
+      "epoch": 12.876712328767123,
+      "grad_norm": 0.13783608376979828,
+      "learning_rate": 3.1611219830397917e-05,
+      "loss": 0.0366,
+      "step": 2820
+    },
+    {
+      "epoch": 12.922374429223744,
+      "grad_norm": 0.11985262483358383,
+      "learning_rate": 3.154598825831703e-05,
+      "loss": 0.0351,
+      "step": 2830
+    },
+    {
+      "epoch": 12.968036529680365,
+      "grad_norm": 0.18932116031646729,
+      "learning_rate": 3.148075668623614e-05,
+      "loss": 0.0406,
+      "step": 2840
+    },
+    {
+      "epoch": 13.0,
+      "eval_bertscore_f1": 0.8794350181907633,
+      "eval_bleu": 0.7714270941582966,
+      "eval_loss": 0.03142493963241577,
+      "eval_rougeL": 0.2958524493369111,
+      "eval_runtime": 85.3679,
+      "eval_samples_per_second": 17.606,
+      "eval_steps_per_second": 1.101,
+      "step": 2847
+    },
+    {
+      "epoch": 13.013698630136986,
+      "grad_norm": 0.09223882853984833,
+      "learning_rate": 3.141552511415525e-05,
+      "loss": 0.0323,
+      "step": 2850
+    },
+    {
+      "epoch": 13.059360730593607,
+      "grad_norm": 0.10966315865516663,
+      "learning_rate": 3.1350293542074365e-05,
+      "loss": 0.0308,
+      "step": 2860
+    },
+    {
+      "epoch": 13.105022831050228,
+      "grad_norm": 0.09420251101255417,
+      "learning_rate": 3.128506196999348e-05,
+      "loss": 0.0323,
+      "step": 2870
+    },
+    {
+      "epoch": 13.150684931506849,
+      "grad_norm": 0.14217647910118103,
+      "learning_rate": 3.121983039791259e-05,
+      "loss": 0.0341,
+      "step": 2880
+    },
+    {
+      "epoch": 13.19634703196347,
+      "grad_norm": 0.13941463828086853,
+      "learning_rate": 3.11545988258317e-05,
+      "loss": 0.0322,
+      "step": 2890
+    },
+    {
+      "epoch": 13.242009132420092,
+      "grad_norm": 0.05103166028857231,
+      "learning_rate": 3.108936725375082e-05,
+      "loss": 0.0323,
+      "step": 2900
+    },
+    {
+      "epoch": 13.287671232876713,
+      "grad_norm": 0.13112640380859375,
+      "learning_rate": 3.102413568166993e-05,
+      "loss": 0.0291,
+      "step": 2910
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 0.10072794556617737,
+      "learning_rate": 3.0958904109589045e-05,
+      "loss": 0.0336,
+      "step": 2920
+    },
+    {
+      "epoch": 13.378995433789955,
+      "grad_norm": 0.05783500149846077,
+      "learning_rate": 3.089367253750816e-05,
+      "loss": 0.0308,
+      "step": 2930
+    },
+    {
+      "epoch": 13.424657534246576,
+      "grad_norm": 0.06233891472220421,
+      "learning_rate": 3.082844096542727e-05,
+      "loss": 0.0316,
+      "step": 2940
+    },
+    {
+      "epoch": 13.470319634703197,
+      "grad_norm": 0.08696655184030533,
+      "learning_rate": 3.076320939334638e-05,
+      "loss": 0.0303,
+      "step": 2950
+    },
+    {
+      "epoch": 13.515981735159817,
+      "grad_norm": 0.12923157215118408,
+      "learning_rate": 3.0697977821265494e-05,
+      "loss": 0.0317,
+      "step": 2960
+    },
+    {
+      "epoch": 13.561643835616438,
+      "grad_norm": 0.06819671392440796,
+      "learning_rate": 3.0632746249184606e-05,
+      "loss": 0.0338,
+      "step": 2970
+    },
+    {
+      "epoch": 13.60730593607306,
+      "grad_norm": 0.05921826884150505,
+      "learning_rate": 3.056751467710372e-05,
+      "loss": 0.0306,
+      "step": 2980
+    },
+    {
+      "epoch": 13.65296803652968,
+      "grad_norm": 0.12270516902208328,
+      "learning_rate": 3.0502283105022834e-05,
+      "loss": 0.0354,
+      "step": 2990
+    },
+    {
+      "epoch": 13.698630136986301,
+      "grad_norm": 0.08732906728982925,
+      "learning_rate": 3.0437051532941946e-05,
+      "loss": 0.0357,
+      "step": 3000
+    },
+    {
+      "epoch": 13.744292237442922,
+      "grad_norm": 0.23829276859760284,
+      "learning_rate": 3.0371819960861058e-05,
+      "loss": 0.0343,
+      "step": 3010
+    },
+    {
+      "epoch": 13.789954337899543,
+      "grad_norm": 0.05741345137357712,
+      "learning_rate": 3.030658838878017e-05,
+      "loss": 0.0296,
+      "step": 3020
+    },
+    {
+      "epoch": 13.835616438356164,
+      "grad_norm": 0.08565385639667511,
+      "learning_rate": 3.0241356816699286e-05,
+      "loss": 0.0355,
+      "step": 3030
+    },
+    {
+      "epoch": 13.881278538812785,
+      "grad_norm": 0.07784215360879898,
+      "learning_rate": 3.0176125244618398e-05,
+      "loss": 0.0339,
+      "step": 3040
+    },
+    {
+      "epoch": 13.926940639269407,
+      "grad_norm": 0.059760138392448425,
+      "learning_rate": 3.011089367253751e-05,
+      "loss": 0.0294,
+      "step": 3050
+    },
+    {
+      "epoch": 13.972602739726028,
+      "grad_norm": 0.11744826287031174,
+      "learning_rate": 3.0045662100456622e-05,
+      "loss": 0.0359,
+      "step": 3060
+    },
+    {
+      "epoch": 14.0,
+      "eval_bertscore_f1": 0.8785086652872488,
+      "eval_bleu": 0.44968903656050474,
+      "eval_loss": 0.03112851269543171,
+      "eval_rougeL": 0.3259152581485867,
+      "eval_runtime": 86.2048,
+      "eval_samples_per_second": 17.435,
+      "eval_steps_per_second": 1.09,
+      "step": 3066
+    },
+    {
+      "epoch": 14.018264840182649,
+      "grad_norm": 0.08186227828264236,
+      "learning_rate": 2.9980430528375734e-05,
+      "loss": 0.0382,
+      "step": 3070
+    },
+    {
+      "epoch": 14.06392694063927,
+      "grad_norm": 0.3178916871547699,
+      "learning_rate": 2.991519895629485e-05,
+      "loss": 0.0377,
+      "step": 3080
+    },
+    {
+      "epoch": 14.10958904109589,
+      "grad_norm": 0.07393322885036469,
+      "learning_rate": 2.9849967384213962e-05,
+      "loss": 0.0284,
+      "step": 3090
+    },
+    {
+      "epoch": 14.155251141552512,
+      "grad_norm": 0.07416381686925888,
+      "learning_rate": 2.9784735812133074e-05,
+      "loss": 0.0301,
+      "step": 3100
+    },
+    {
+      "epoch": 14.200913242009133,
+      "grad_norm": 0.1198846623301506,
+      "learning_rate": 2.9719504240052186e-05,
+      "loss": 0.0316,
+      "step": 3110
+    },
+    {
+      "epoch": 14.246575342465754,
+      "grad_norm": 0.07470612972974777,
+      "learning_rate": 2.96542726679713e-05,
+      "loss": 0.0335,
+      "step": 3120
+    },
+    {
+      "epoch": 14.292237442922374,
+      "grad_norm": 0.18774984776973724,
+      "learning_rate": 2.9589041095890414e-05,
+      "loss": 0.0366,
+      "step": 3130
+    },
+    {
+      "epoch": 14.337899543378995,
+      "grad_norm": 0.11751188337802887,
+      "learning_rate": 2.9523809523809526e-05,
+      "loss": 0.0382,
+      "step": 3140
+    },
+    {
+      "epoch": 14.383561643835616,
+      "grad_norm": 0.08535225689411163,
+      "learning_rate": 2.945857795172864e-05,
+      "loss": 0.0331,
+      "step": 3150
+    },
+    {
+      "epoch": 14.429223744292237,
+      "grad_norm": 0.07403095066547394,
+      "learning_rate": 2.939334637964775e-05,
+      "loss": 0.0261,
+      "step": 3160
+    },
+    {
+      "epoch": 14.474885844748858,
+      "grad_norm": 0.19732074439525604,
+      "learning_rate": 2.9328114807566863e-05,
+      "loss": 0.0332,
+      "step": 3170
+    },
+    {
+      "epoch": 14.520547945205479,
+      "grad_norm": 0.2637515366077423,
+      "learning_rate": 2.926288323548598e-05,
+      "loss": 0.0327,
+      "step": 3180
+    },
+    {
+      "epoch": 14.5662100456621,
+      "grad_norm": 0.057027578353881836,
+      "learning_rate": 2.919765166340509e-05,
+      "loss": 0.0304,
+      "step": 3190
+    },
+    {
+      "epoch": 14.61187214611872,
+      "grad_norm": 0.0682603120803833,
+      "learning_rate": 2.9132420091324203e-05,
+      "loss": 0.0286,
+      "step": 3200
+    },
+    {
+      "epoch": 14.657534246575342,
+      "grad_norm": 0.07055100798606873,
+      "learning_rate": 2.9067188519243315e-05,
+      "loss": 0.0332,
+      "step": 3210
+    },
+    {
+      "epoch": 14.703196347031964,
+      "grad_norm": 0.3047225773334503,
+      "learning_rate": 2.9001956947162427e-05,
+      "loss": 0.0364,
+      "step": 3220
+    },
+    {
+      "epoch": 14.748858447488585,
+      "grad_norm": 0.052217088639736176,
+      "learning_rate": 2.8936725375081543e-05,
+      "loss": 0.0305,
+      "step": 3230
+    },
+    {
+      "epoch": 14.794520547945206,
+      "grad_norm": 0.13912373781204224,
+      "learning_rate": 2.8871493803000655e-05,
+      "loss": 0.0356,
+      "step": 3240
+    },
+    {
+      "epoch": 14.840182648401827,
+      "grad_norm": 0.06369337439537048,
+      "learning_rate": 2.8806262230919767e-05,
+      "loss": 0.0304,
+      "step": 3250
+    },
+    {
+      "epoch": 14.885844748858448,
+      "grad_norm": 0.09859542548656464,
+      "learning_rate": 2.874103065883888e-05,
+      "loss": 0.0305,
+      "step": 3260
+    },
+    {
+      "epoch": 14.931506849315069,
+      "grad_norm": 0.08918995410203934,
+      "learning_rate": 2.867579908675799e-05,
+      "loss": 0.0282,
+      "step": 3270
+    },
+    {
+      "epoch": 14.97716894977169,
+      "grad_norm": 0.09965560585260391,
+      "learning_rate": 2.8610567514677107e-05,
+      "loss": 0.0298,
+      "step": 3280
+    },
+    {
+      "epoch": 15.0,
+      "eval_bertscore_f1": 0.8813119770603345,
+      "eval_bleu": 0.6140646535986611,
+      "eval_loss": 0.030848076567053795,
+      "eval_rougeL": 0.3100828866535566,
+      "eval_runtime": 85.4538,
+      "eval_samples_per_second": 17.588,
+      "eval_steps_per_second": 1.1,
+      "step": 3285
+    },
+    {
+      "epoch": 15.02283105022831,
+      "grad_norm": 0.07896128296852112,
+      "learning_rate": 2.854533594259622e-05,
+      "loss": 0.0273,
+      "step": 3290
+    },
+    {
+      "epoch": 15.068493150684931,
+      "grad_norm": 0.25050729513168335,
+      "learning_rate": 2.848010437051533e-05,
+      "loss": 0.0339,
+      "step": 3300
+    },
+    {
+      "epoch": 15.114155251141552,
+      "grad_norm": 0.19186432659626007,
+      "learning_rate": 2.8414872798434443e-05,
+      "loss": 0.0273,
+      "step": 3310
+    },
+    {
+      "epoch": 15.159817351598173,
+      "grad_norm": 0.09528006613254547,
+      "learning_rate": 2.8349641226353556e-05,
+      "loss": 0.0354,
+      "step": 3320
+    },
+    {
+      "epoch": 15.205479452054794,
+      "grad_norm": 0.09040423482656479,
+      "learning_rate": 2.828440965427267e-05,
+      "loss": 0.0311,
+      "step": 3330
+    },
+    {
+      "epoch": 15.251141552511415,
+      "grad_norm": 0.0519646555185318,
+      "learning_rate": 2.8219178082191783e-05,
+      "loss": 0.0275,
+      "step": 3340
+    },
+    {
+      "epoch": 15.296803652968036,
+      "grad_norm": 0.17647899687290192,
+      "learning_rate": 2.8153946510110896e-05,
+      "loss": 0.0358,
+      "step": 3350
+    },
+    {
+      "epoch": 15.342465753424657,
+      "grad_norm": 0.099759042263031,
+      "learning_rate": 2.8088714938030008e-05,
+      "loss": 0.0277,
+      "step": 3360
+    },
+    {
+      "epoch": 15.38812785388128,
+      "grad_norm": 0.060709647834300995,
+      "learning_rate": 2.802348336594912e-05,
+      "loss": 0.029,
+      "step": 3370
+    },
+    {
+      "epoch": 15.4337899543379,
+      "grad_norm": 0.08363103866577148,
+      "learning_rate": 2.7958251793868235e-05,
+      "loss": 0.0289,
+      "step": 3380
+    },
+    {
+      "epoch": 15.479452054794521,
+      "grad_norm": 0.0995960682630539,
+      "learning_rate": 2.7893020221787348e-05,
+      "loss": 0.0342,
+      "step": 3390
+    },
+    {
+      "epoch": 15.525114155251142,
+      "grad_norm": 0.0788845494389534,
+      "learning_rate": 2.782778864970646e-05,
+      "loss": 0.0286,
+      "step": 3400
+    },
+    {
+      "epoch": 15.570776255707763,
+      "grad_norm": 0.07989612221717834,
+      "learning_rate": 2.7762557077625572e-05,
+      "loss": 0.0308,
+      "step": 3410
+    },
+    {
+      "epoch": 15.616438356164384,
+      "grad_norm": 0.05240185931324959,
+      "learning_rate": 2.7697325505544684e-05,
+      "loss": 0.0282,
+      "step": 3420
+    },
+    {
+      "epoch": 15.662100456621005,
+      "grad_norm": 0.12877096235752106,
+      "learning_rate": 2.76320939334638e-05,
+      "loss": 0.0288,
+      "step": 3430
+    },
+    {
+      "epoch": 15.707762557077626,
+      "grad_norm": 0.25699493288993835,
+      "learning_rate": 2.7566862361382912e-05,
+      "loss": 0.0346,
+      "step": 3440
+    },
+    {
+      "epoch": 15.753424657534246,
+      "grad_norm": 0.37610942125320435,
+      "learning_rate": 2.7501630789302024e-05,
+      "loss": 0.0325,
+      "step": 3450
+    },
+    {
+      "epoch": 15.799086757990867,
+      "grad_norm": 0.19347558915615082,
+      "learning_rate": 2.7436399217221136e-05,
+      "loss": 0.0332,
+      "step": 3460
+    },
+    {
+      "epoch": 15.844748858447488,
+      "grad_norm": 0.148736372590065,
+      "learning_rate": 2.737116764514025e-05,
+      "loss": 0.0269,
+      "step": 3470
+    },
+    {
+      "epoch": 15.89041095890411,
+      "grad_norm": 0.3108772039413452,
+      "learning_rate": 2.7305936073059364e-05,
+      "loss": 0.0376,
+      "step": 3480
+    },
+    {
+      "epoch": 15.93607305936073,
+      "grad_norm": 0.1941206008195877,
+      "learning_rate": 2.7240704500978476e-05,
+      "loss": 0.034,
+      "step": 3490
+    },
+    {
+      "epoch": 15.981735159817351,
+      "grad_norm": 0.1268136203289032,
+      "learning_rate": 2.7175472928897588e-05,
+      "loss": 0.0308,
+      "step": 3500
+    },
+    {
+      "epoch": 16.0,
+      "eval_bertscore_f1": 0.8762329206098657,
+      "eval_bleu": 0.5864006312262637,
+      "eval_loss": 0.030562568455934525,
+      "eval_rougeL": 0.2886638006885884,
+      "eval_runtime": 85.2823,
+      "eval_samples_per_second": 17.624,
+      "eval_steps_per_second": 1.102,
+      "step": 3504
+    },
+    {
+      "epoch": 16.027397260273972,
+      "grad_norm": 0.07895983755588531,
+      "learning_rate": 2.71102413568167e-05,
+      "loss": 0.0374,
+      "step": 3510
+    },
+    {
+      "epoch": 16.073059360730593,
+      "grad_norm": 0.09038267284631729,
+      "learning_rate": 2.7045009784735813e-05,
+      "loss": 0.0304,
+      "step": 3520
+    },
+    {
+      "epoch": 16.118721461187214,
+      "grad_norm": 0.06773549318313599,
+      "learning_rate": 2.6979778212654928e-05,
+      "loss": 0.0301,
+      "step": 3530
+    },
+    {
+      "epoch": 16.164383561643834,
+      "grad_norm": 0.08219348639249802,
+      "learning_rate": 2.691454664057404e-05,
+      "loss": 0.03,
+      "step": 3540
+    },
+    {
+      "epoch": 16.210045662100455,
+      "grad_norm": 0.08681906014680862,
+      "learning_rate": 2.6849315068493153e-05,
+      "loss": 0.0337,
+      "step": 3550
+    },
+    {
+      "epoch": 16.255707762557076,
+      "grad_norm": 0.07358887046575546,
+      "learning_rate": 2.6784083496412265e-05,
+      "loss": 0.0281,
+      "step": 3560
+    },
+    {
+      "epoch": 16.301369863013697,
+      "grad_norm": 0.07765143364667892,
+      "learning_rate": 2.6718851924331377e-05,
+      "loss": 0.0321,
+      "step": 3570
+    },
+    {
+      "epoch": 16.347031963470318,
+      "grad_norm": 0.05562649667263031,
+      "learning_rate": 2.6653620352250492e-05,
+      "loss": 0.0308,
+      "step": 3580
+    },
+    {
+      "epoch": 16.39269406392694,
+      "grad_norm": 0.055072009563446045,
+      "learning_rate": 2.6588388780169605e-05,
+      "loss": 0.0306,
+      "step": 3590
+    },
+    {
+      "epoch": 16.438356164383563,
+      "grad_norm": 0.07029841840267181,
+      "learning_rate": 2.6523157208088717e-05,
+      "loss": 0.0304,
+      "step": 3600
+    },
+    {
+      "epoch": 16.484018264840184,
+      "grad_norm": 0.06442932784557343,
+      "learning_rate": 2.645792563600783e-05,
+      "loss": 0.0285,
+      "step": 3610
+    },
+    {
+      "epoch": 16.529680365296805,
+      "grad_norm": 0.10228294134140015,
+      "learning_rate": 2.639269406392694e-05,
+      "loss": 0.0299,
+      "step": 3620
+    },
+    {
+      "epoch": 16.575342465753426,
+      "grad_norm": 0.060761261731386185,
+      "learning_rate": 2.6327462491846057e-05,
+      "loss": 0.0288,
+      "step": 3630
+    },
+    {
+      "epoch": 16.621004566210047,
+      "grad_norm": 0.1510060727596283,
+      "learning_rate": 2.626223091976517e-05,
+      "loss": 0.0286,
+      "step": 3640
+    },
+    {
+      "epoch": 16.666666666666668,
+      "grad_norm": 0.09816893935203552,
+      "learning_rate": 2.619699934768428e-05,
+      "loss": 0.0293,
+      "step": 3650
+    },
+    {
+      "epoch": 16.71232876712329,
+      "grad_norm": 0.10027530789375305,
+      "learning_rate": 2.6131767775603393e-05,
+      "loss": 0.0287,
+      "step": 3660
+    },
+    {
+      "epoch": 16.75799086757991,
+      "grad_norm": 0.08933715522289276,
+      "learning_rate": 2.6066536203522505e-05,
+      "loss": 0.03,
+      "step": 3670
+    },
+    {
+      "epoch": 16.80365296803653,
+      "grad_norm": 0.06297653168439865,
+      "learning_rate": 2.600130463144162e-05,
+      "loss": 0.0291,
+      "step": 3680
+    },
+    {
+      "epoch": 16.84931506849315,
+      "grad_norm": 0.1338263303041458,
+      "learning_rate": 2.5936073059360733e-05,
+      "loss": 0.0369,
+      "step": 3690
+    },
+    {
+      "epoch": 16.894977168949772,
+      "grad_norm": 0.25006791949272156,
+      "learning_rate": 2.5870841487279845e-05,
+      "loss": 0.0301,
+      "step": 3700
+    },
+    {
+      "epoch": 16.940639269406393,
+      "grad_norm": 0.26022225618362427,
+      "learning_rate": 2.5805609915198957e-05,
+      "loss": 0.0335,
+      "step": 3710
+    },
+    {
+      "epoch": 16.986301369863014,
+      "grad_norm": 0.10207168757915497,
+      "learning_rate": 2.574037834311807e-05,
+      "loss": 0.03,
+      "step": 3720
+    },
+    {
+      "epoch": 17.0,
+      "eval_bertscore_f1": 0.879923531196947,
+      "eval_bleu": 0.6651508967347922,
+      "eval_loss": 0.030300738289952278,
+      "eval_rougeL": 0.3055340180010153,
+      "eval_runtime": 85.0894,
+      "eval_samples_per_second": 17.664,
+      "eval_steps_per_second": 1.105,
+      "step": 3723
+    },
+    {
+      "epoch": 17.031963470319635,
+      "grad_norm": 0.05873997136950493,
+      "learning_rate": 2.5675146771037185e-05,
+      "loss": 0.0333,
+      "step": 3730
+    },
+    {
+      "epoch": 17.077625570776256,
+      "grad_norm": 0.1252429187297821,
+      "learning_rate": 2.5609915198956297e-05,
+      "loss": 0.0299,
+      "step": 3740
+    },
+    {
+      "epoch": 17.123287671232877,
+      "grad_norm": 0.07967006415128708,
+      "learning_rate": 2.554468362687541e-05,
+      "loss": 0.0295,
+      "step": 3750
+    },
+    {
+      "epoch": 17.168949771689498,
+      "grad_norm": 0.09141400456428528,
+      "learning_rate": 2.547945205479452e-05,
+      "loss": 0.0282,
+      "step": 3760
+    },
+    {
+      "epoch": 17.21461187214612,
+      "grad_norm": 0.18087051808834076,
+      "learning_rate": 2.5414220482713634e-05,
+      "loss": 0.0342,
+      "step": 3770
+    },
+    {
+      "epoch": 17.26027397260274,
+      "grad_norm": 0.06933823972940445,
+      "learning_rate": 2.534898891063275e-05,
+      "loss": 0.0334,
+      "step": 3780
+    },
+    {
+      "epoch": 17.30593607305936,
+      "grad_norm": 0.06062929704785347,
+      "learning_rate": 2.528375733855186e-05,
+      "loss": 0.0304,
+      "step": 3790
+    },
+    {
+      "epoch": 17.35159817351598,
+      "grad_norm": 0.05274573713541031,
+      "learning_rate": 2.5218525766470974e-05,
+      "loss": 0.0299,
+      "step": 3800
+    },
+    {
+      "epoch": 17.397260273972602,
+      "grad_norm": 0.11018598824739456,
+      "learning_rate": 2.5153294194390086e-05,
+      "loss": 0.0297,
+      "step": 3810
+    },
+    {
+      "epoch": 17.442922374429223,
+      "grad_norm": 0.2097170352935791,
+      "learning_rate": 2.5088062622309198e-05,
+      "loss": 0.0311,
+      "step": 3820
+    },
+    {
+      "epoch": 17.488584474885844,
+      "grad_norm": 0.06161818653345108,
+      "learning_rate": 2.5022831050228314e-05,
+      "loss": 0.0261,
+      "step": 3830
+    },
+    {
+      "epoch": 17.534246575342465,
+      "grad_norm": 0.10092420876026154,
+      "learning_rate": 2.4957599478147426e-05,
+      "loss": 0.0348,
+      "step": 3840
+    },
+    {
+      "epoch": 17.579908675799086,
+      "grad_norm": 0.06519993394613266,
+      "learning_rate": 2.4892367906066538e-05,
+      "loss": 0.0296,
+      "step": 3850
+    },
+    {
+      "epoch": 17.625570776255707,
+      "grad_norm": 0.08260706067085266,
+      "learning_rate": 2.482713633398565e-05,
+      "loss": 0.0362,
+      "step": 3860
+    },
+    {
+      "epoch": 17.671232876712327,
+      "grad_norm": 0.07475756853818893,
+      "learning_rate": 2.4761904761904762e-05,
+      "loss": 0.0296,
+      "step": 3870
+    },
+    {
+      "epoch": 17.71689497716895,
+      "grad_norm": 0.05199064686894417,
+      "learning_rate": 2.4696673189823878e-05,
+      "loss": 0.0281,
+      "step": 3880
+    },
+    {
+      "epoch": 17.76255707762557,
+      "grad_norm": 0.09524153172969818,
+      "learning_rate": 2.463144161774299e-05,
+      "loss": 0.031,
+      "step": 3890
+    },
+    {
+      "epoch": 17.80821917808219,
+      "grad_norm": 0.05123337730765343,
+      "learning_rate": 2.4566210045662102e-05,
+      "loss": 0.0306,
+      "step": 3900
+    },
+    {
+      "epoch": 17.853881278538815,
+      "grad_norm": 0.06459668278694153,
+      "learning_rate": 2.4500978473581214e-05,
+      "loss": 0.0287,
+      "step": 3910
+    },
+    {
+      "epoch": 17.899543378995435,
+      "grad_norm": 0.06326840072870255,
+      "learning_rate": 2.4435746901500327e-05,
+      "loss": 0.0287,
+      "step": 3920
+    },
+    {
+      "epoch": 17.945205479452056,
+      "grad_norm": 0.060352522879838943,
+      "learning_rate": 2.4370515329419442e-05,
+      "loss": 0.0344,
+      "step": 3930
+    },
+    {
+      "epoch": 17.990867579908677,
+      "grad_norm": 0.08868108689785004,
+      "learning_rate": 2.4305283757338554e-05,
+      "loss": 0.0276,
+      "step": 3940
+    },
+    {
+      "epoch": 18.0,
+      "eval_bertscore_f1": 0.8797207474311985,
+      "eval_bleu": 0.5060863215577377,
+      "eval_loss": 0.030214933678507805,
+      "eval_rougeL": 0.3247787979416723,
+      "eval_runtime": 85.499,
+      "eval_samples_per_second": 17.579,
+      "eval_steps_per_second": 1.099,
+      "step": 3942
+    },
+    {
+      "epoch": 18.036529680365298,
+      "grad_norm": 0.10745666921138763,
+      "learning_rate": 2.4240052185257666e-05,
+      "loss": 0.0318,
+      "step": 3950
+    },
+    {
+      "epoch": 18.08219178082192,
+      "grad_norm": 0.05487235262989998,
+      "learning_rate": 2.417482061317678e-05,
+      "loss": 0.0279,
+      "step": 3960
+    },
+    {
+      "epoch": 18.12785388127854,
+      "grad_norm": 0.14988921582698822,
+      "learning_rate": 2.410958904109589e-05,
+      "loss": 0.0323,
+      "step": 3970
+    },
+    {
+      "epoch": 18.17351598173516,
+      "grad_norm": 0.05322180688381195,
+      "learning_rate": 2.4044357469015006e-05,
+      "loss": 0.0305,
+      "step": 3980
+    },
+    {
+      "epoch": 18.21917808219178,
+      "grad_norm": 0.08688576519489288,
+      "learning_rate": 2.397912589693412e-05,
+      "loss": 0.0285,
+      "step": 3990
+    },
+    {
+      "epoch": 18.264840182648403,
+      "grad_norm": 0.064597949385643,
+      "learning_rate": 2.391389432485323e-05,
+      "loss": 0.0283,
+      "step": 4000
+    },
+    {
+      "epoch": 18.310502283105023,
+      "grad_norm": 0.23966668546199799,
+      "learning_rate": 2.3848662752772343e-05,
+      "loss": 0.0307,
+      "step": 4010
+    },
+    {
+      "epoch": 18.356164383561644,
+      "grad_norm": 0.27511870861053467,
+      "learning_rate": 2.3783431180691455e-05,
+      "loss": 0.0281,
+      "step": 4020
+    },
+    {
+      "epoch": 18.401826484018265,
+      "grad_norm": 0.23338817059993744,
+      "learning_rate": 2.371819960861057e-05,
+      "loss": 0.0345,
+      "step": 4030
+    },
+    {
+      "epoch": 18.447488584474886,
+      "grad_norm": 0.07026497274637222,
+      "learning_rate": 2.3652968036529683e-05,
+      "loss": 0.0333,
+      "step": 4040
+    },
+    {
+      "epoch": 18.493150684931507,
+      "grad_norm": 0.11002654582262039,
+      "learning_rate": 2.3587736464448795e-05,
+      "loss": 0.0278,
+      "step": 4050
+    },
+    {
+      "epoch": 18.538812785388128,
+      "grad_norm": 0.18548107147216797,
+      "learning_rate": 2.3522504892367907e-05,
+      "loss": 0.0299,
+      "step": 4060
+    },
+    {
+      "epoch": 18.58447488584475,
+      "grad_norm": 0.053734730929136276,
+      "learning_rate": 2.345727332028702e-05,
+      "loss": 0.029,
+      "step": 4070
+    },
+    {
+      "epoch": 18.63013698630137,
+      "grad_norm": 0.08840513974428177,
+      "learning_rate": 2.3392041748206135e-05,
+      "loss": 0.0276,
+      "step": 4080
+    },
+    {
+      "epoch": 18.67579908675799,
+      "grad_norm": 0.07163436710834503,
+      "learning_rate": 2.3326810176125247e-05,
+      "loss": 0.0327,
+      "step": 4090
+    },
+    {
+      "epoch": 18.72146118721461,
+      "grad_norm": 0.07794025540351868,
+      "learning_rate": 2.326157860404436e-05,
+      "loss": 0.0277,
+      "step": 4100
+    },
+    {
+      "epoch": 18.767123287671232,
+      "grad_norm": 0.25026071071624756,
+      "learning_rate": 2.319634703196347e-05,
+      "loss": 0.0271,
+      "step": 4110
+    },
+    {
+      "epoch": 18.812785388127853,
+      "grad_norm": 0.05697787553071976,
+      "learning_rate": 2.3131115459882584e-05,
+      "loss": 0.0368,
+      "step": 4120
+    },
+    {
+      "epoch": 18.858447488584474,
+      "grad_norm": 0.31116795539855957,
+      "learning_rate": 2.30658838878017e-05,
+      "loss": 0.0358,
+      "step": 4130
+    },
+    {
+      "epoch": 18.904109589041095,
+      "grad_norm": 0.07639028131961823,
+      "learning_rate": 2.300065231572081e-05,
+      "loss": 0.0268,
+      "step": 4140
+    },
+    {
+      "epoch": 18.949771689497716,
+      "grad_norm": 0.10291515290737152,
+      "learning_rate": 2.2935420743639923e-05,
+      "loss": 0.03,
+      "step": 4150
+    },
+    {
+      "epoch": 18.995433789954337,
+      "grad_norm": 0.06285667419433594,
+      "learning_rate": 2.2870189171559036e-05,
+      "loss": 0.0293,
+      "step": 4160
+    },
+    {
+      "epoch": 19.0,
+      "eval_bertscore_f1": 0.8820803382360214,
+      "eval_bleu": 0.6120517633158683,
+      "eval_loss": 0.03010978177189827,
+      "eval_rougeL": 0.3162528158829476,
+      "eval_runtime": 85.0314,
+      "eval_samples_per_second": 17.676,
+      "eval_steps_per_second": 1.105,
+      "step": 4161
+    },
+    {
+      "epoch": 19.041095890410958,
+      "grad_norm": 0.06922904402017593,
+      "learning_rate": 2.2804957599478148e-05,
+      "loss": 0.0306,
+      "step": 4170
+    },
+    {
+      "epoch": 19.08675799086758,
+      "grad_norm": 0.07178625464439392,
+      "learning_rate": 2.2739726027397263e-05,
+      "loss": 0.0296,
+      "step": 4180
+    },
+    {
+      "epoch": 19.1324200913242,
+      "grad_norm": 0.18297427892684937,
+      "learning_rate": 2.2674494455316376e-05,
+      "loss": 0.0277,
+      "step": 4190
+    },
+    {
+      "epoch": 19.17808219178082,
+      "grad_norm": 0.08464095741510391,
+      "learning_rate": 2.2609262883235488e-05,
+      "loss": 0.0276,
+      "step": 4200
+    },
+    {
+      "epoch": 19.22374429223744,
+      "grad_norm": 0.09229449927806854,
+      "learning_rate": 2.25440313111546e-05,
+      "loss": 0.0269,
+      "step": 4210
+    },
+    {
+      "epoch": 19.269406392694062,
+      "grad_norm": 0.05837394297122955,
+      "learning_rate": 2.2478799739073715e-05,
+      "loss": 0.0292,
+      "step": 4220
+    },
+    {
+      "epoch": 19.315068493150687,
+      "grad_norm": 0.0631113052368164,
+      "learning_rate": 2.2413568166992828e-05,
+      "loss": 0.0316,
+      "step": 4230
+    },
+    {
+      "epoch": 19.360730593607308,
+      "grad_norm": 0.06033403053879738,
+      "learning_rate": 2.234833659491194e-05,
+      "loss": 0.0341,
+      "step": 4240
+    },
+    {
+      "epoch": 19.40639269406393,
+      "grad_norm": 0.04350203275680542,
+      "learning_rate": 2.2283105022831052e-05,
+      "loss": 0.0261,
+      "step": 4250
+    },
+    {
+      "epoch": 19.45205479452055,
+      "grad_norm": 0.07761911302804947,
+      "learning_rate": 2.2217873450750164e-05,
+      "loss": 0.0331,
+      "step": 4260
+    },
+    {
+      "epoch": 19.49771689497717,
+      "grad_norm": 0.06852933019399643,
+      "learning_rate": 2.215264187866928e-05,
+      "loss": 0.0302,
+      "step": 4270
+    },
+    {
+      "epoch": 19.54337899543379,
+      "grad_norm": 0.06854939460754395,
+      "learning_rate": 2.2087410306588392e-05,
+      "loss": 0.0349,
+      "step": 4280
+    },
+    {
+      "epoch": 19.589041095890412,
+      "grad_norm": 0.12086188793182373,
+      "learning_rate": 2.2022178734507504e-05,
+      "loss": 0.0291,
+      "step": 4290
+    },
+    {
+      "epoch": 19.634703196347033,
+      "grad_norm": 0.06224190816283226,
+      "learning_rate": 2.1956947162426616e-05,
+      "loss": 0.0264,
+      "step": 4300
+    },
+    {
+      "epoch": 19.680365296803654,
+      "grad_norm": 0.06803935021162033,
+      "learning_rate": 2.189171559034573e-05,
+      "loss": 0.0294,
+      "step": 4310
+    },
+    {
+      "epoch": 19.726027397260275,
+      "grad_norm": 0.26598790287971497,
+      "learning_rate": 2.1826484018264844e-05,
+      "loss": 0.0318,
+      "step": 4320
+    },
+    {
+      "epoch": 19.771689497716896,
+      "grad_norm": 0.05770856514573097,
+      "learning_rate": 2.1761252446183956e-05,
+      "loss": 0.0285,
+      "step": 4330
+    },
+    {
+      "epoch": 19.817351598173516,
+      "grad_norm": 0.06879496574401855,
+      "learning_rate": 2.1696020874103068e-05,
+      "loss": 0.0279,
+      "step": 4340
+    },
+    {
+      "epoch": 19.863013698630137,
+      "grad_norm": 0.14509794116020203,
+      "learning_rate": 2.163078930202218e-05,
+      "loss": 0.0309,
+      "step": 4350
+    },
+    {
+      "epoch": 19.908675799086758,
+      "grad_norm": 0.0720428079366684,
+      "learning_rate": 2.1565557729941293e-05,
+      "loss": 0.0289,
+      "step": 4360
+    },
+    {
+      "epoch": 19.95433789954338,
+      "grad_norm": 0.060434482991695404,
+      "learning_rate": 2.1500326157860408e-05,
+      "loss": 0.0315,
+      "step": 4370
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.06360732018947601,
+      "learning_rate": 2.143509458577952e-05,
+      "loss": 0.0296,
+      "step": 4380
+    },
+    {
+      "epoch": 20.0,
+      "eval_bertscore_f1": 0.8803962610280284,
+      "eval_bleu": 0.663448215955616,
+      "eval_loss": 0.030089378356933594,
+      "eval_rougeL": 0.31135197769275447,
+      "eval_runtime": 85.3087,
+      "eval_samples_per_second": 17.618,
+      "eval_steps_per_second": 1.102,
+      "step": 4380
+    },
+    {
+      "epoch": 20.04566210045662,
+      "grad_norm": 0.04677814245223999,
+      "learning_rate": 2.1369863013698632e-05,
+      "loss": 0.0296,
+      "step": 4390
+    },
+    {
+      "epoch": 20.091324200913242,
+      "grad_norm": 0.054254136979579926,
+      "learning_rate": 2.1304631441617745e-05,
+      "loss": 0.029,
+      "step": 4400
+    },
+    {
+      "epoch": 20.136986301369863,
+      "grad_norm": 0.0981835126876831,
+      "learning_rate": 2.1239399869536857e-05,
+      "loss": 0.0307,
+      "step": 4410
+    },
+    {
+      "epoch": 20.182648401826484,
+      "grad_norm": 0.11695002764463425,
+      "learning_rate": 2.1174168297455972e-05,
+      "loss": 0.0305,
+      "step": 4420
+    },
+    {
+      "epoch": 20.228310502283104,
+      "grad_norm": 0.07786712050437927,
+      "learning_rate": 2.1108936725375085e-05,
+      "loss": 0.0276,
+      "step": 4430
+    },
+    {
+      "epoch": 20.273972602739725,
+      "grad_norm": 0.07433762401342392,
+      "learning_rate": 2.1043705153294197e-05,
+      "loss": 0.0311,
+      "step": 4440
+    },
+    {
+      "epoch": 20.319634703196346,
+      "grad_norm": 0.13224515318870544,
+      "learning_rate": 2.097847358121331e-05,
+      "loss": 0.0305,
+      "step": 4450
+    },
+    {
+      "epoch": 20.365296803652967,
+      "grad_norm": 0.07441609352827072,
+      "learning_rate": 2.091324200913242e-05,
+      "loss": 0.0296,
+      "step": 4460
+    },
+    {
+      "epoch": 20.410958904109588,
+      "grad_norm": 0.17592406272888184,
+      "learning_rate": 2.0848010437051537e-05,
+      "loss": 0.0298,
+      "step": 4470
+    },
+    {
+      "epoch": 20.45662100456621,
+      "grad_norm": 0.0610370934009552,
+      "learning_rate": 2.078277886497065e-05,
+      "loss": 0.0273,
+      "step": 4480
+    },
+    {
+      "epoch": 20.50228310502283,
+      "grad_norm": 0.09273794293403625,
+      "learning_rate": 2.071754729288976e-05,
+      "loss": 0.0292,
+      "step": 4490
+    },
+    {
+      "epoch": 20.54794520547945,
+      "grad_norm": 0.07045309990644455,
+      "learning_rate": 2.0652315720808873e-05,
+      "loss": 0.031,
+      "step": 4500
+    },
+    {
+      "epoch": 20.59360730593607,
+      "grad_norm": 0.09215089678764343,
+      "learning_rate": 2.0587084148727985e-05,
+      "loss": 0.0275,
+      "step": 4510
+    },
+    {
+      "epoch": 20.639269406392692,
+      "grad_norm": 0.1282932609319687,
+      "learning_rate": 2.05218525766471e-05,
+      "loss": 0.0289,
+      "step": 4520
+    },
+    {
+      "epoch": 20.684931506849313,
+      "grad_norm": 0.05365551635622978,
+      "learning_rate": 2.0456621004566213e-05,
+      "loss": 0.0294,
+      "step": 4530
+    },
+    {
+      "epoch": 20.730593607305934,
+      "grad_norm": 0.06082382798194885,
+      "learning_rate": 2.0391389432485325e-05,
+      "loss": 0.0327,
+      "step": 4540
+    },
+    {
+      "epoch": 20.77625570776256,
+      "grad_norm": 0.09889397025108337,
+      "learning_rate": 2.0326157860404437e-05,
+      "loss": 0.0286,
+      "step": 4550
+    },
+    {
+      "epoch": 20.82191780821918,
+      "grad_norm": 0.06901293992996216,
+      "learning_rate": 2.026092628832355e-05,
+      "loss": 0.0297,
+      "step": 4560
+    },
+    {
+      "epoch": 20.8675799086758,
+      "grad_norm": 0.05301612243056297,
+      "learning_rate": 2.0195694716242665e-05,
+      "loss": 0.0284,
+      "step": 4570
+    },
+    {
+      "epoch": 20.91324200913242,
+      "grad_norm": 0.07805132865905762,
+      "learning_rate": 2.0130463144161777e-05,
+      "loss": 0.0289,
+      "step": 4580
+    },
+    {
+      "epoch": 20.958904109589042,
+      "grad_norm": 0.16798809170722961,
+      "learning_rate": 2.006523157208089e-05,
+      "loss": 0.0293,
+      "step": 4590
+    },
+    {
+      "epoch": 21.0,
+      "eval_bertscore_f1": 0.8794706105710027,
+      "eval_bleu": 0.6654093778968074,
+      "eval_loss": 0.030202506110072136,
+      "eval_rougeL": 0.306777092559367,
+      "eval_runtime": 85.0172,
+      "eval_samples_per_second": 17.679,
+      "eval_steps_per_second": 1.106,
+      "step": 4599
+    },
+    {
+      "epoch": 21.004566210045663,
+      "grad_norm": 0.11417368054389954,
+      "learning_rate": 2e-05,
+      "loss": 0.0266,
+      "step": 4600
+    },
+    {
+      "epoch": 21.050228310502284,
+      "grad_norm": 0.12795647978782654,
+      "learning_rate": 1.9934768427919114e-05,
+      "loss": 0.0325,
+      "step": 4610
+    },
+    {
+      "epoch": 21.095890410958905,
+      "grad_norm": 0.15962223708629608,
+      "learning_rate": 1.986953685583823e-05,
+      "loss": 0.0324,
+      "step": 4620
+    },
+    {
+      "epoch": 21.141552511415526,
+      "grad_norm": 0.06648146361112595,
+      "learning_rate": 1.980430528375734e-05,
+      "loss": 0.0275,
+      "step": 4630
+    },
+    {
+      "epoch": 21.187214611872147,
+      "grad_norm": 0.10432185977697372,
+      "learning_rate": 1.9739073711676454e-05,
+      "loss": 0.0274,
+      "step": 4640
+    },
+    {
+      "epoch": 21.232876712328768,
+      "grad_norm": 0.05011922866106033,
+      "learning_rate": 1.9673842139595566e-05,
+      "loss": 0.0324,
+      "step": 4650
+    },
+    {
+      "epoch": 21.27853881278539,
+      "grad_norm": 0.07254044711589813,
+      "learning_rate": 1.9608610567514678e-05,
+      "loss": 0.0271,
+      "step": 4660
+    },
+    {
+      "epoch": 21.32420091324201,
+      "grad_norm": 0.279234379529953,
+      "learning_rate": 1.9543378995433794e-05,
+      "loss": 0.0283,
+      "step": 4670
+    },
+    {
+      "epoch": 21.36986301369863,
+      "grad_norm": 0.05222180485725403,
+      "learning_rate": 1.9478147423352906e-05,
+      "loss": 0.0279,
+      "step": 4680
+    },
+    {
+      "epoch": 21.41552511415525,
+      "grad_norm": 0.07844868302345276,
+      "learning_rate": 1.9412915851272018e-05,
+      "loss": 0.0302,
+      "step": 4690
+    },
+    {
+      "epoch": 21.461187214611872,
+      "grad_norm": 0.04424309730529785,
+      "learning_rate": 1.934768427919113e-05,
+      "loss": 0.0277,
+      "step": 4700
+    },
+    {
+      "epoch": 21.506849315068493,
+      "grad_norm": 0.06602602452039719,
+      "learning_rate": 1.9282452707110242e-05,
+      "loss": 0.0277,
+      "step": 4710
+    },
+    {
+      "epoch": 21.552511415525114,
+      "grad_norm": 0.10084933042526245,
+      "learning_rate": 1.9217221135029358e-05,
+      "loss": 0.0292,
+      "step": 4720
+    },
+    {
+      "epoch": 21.598173515981735,
+      "grad_norm": 0.1485004872083664,
+      "learning_rate": 1.915198956294847e-05,
+      "loss": 0.0312,
+      "step": 4730
+    },
+    {
+      "epoch": 21.643835616438356,
+      "grad_norm": 0.1026349812746048,
+      "learning_rate": 1.9086757990867582e-05,
+      "loss": 0.03,
+      "step": 4740
+    },
+    {
+      "epoch": 21.689497716894977,
+      "grad_norm": 0.10063963383436203,
+      "learning_rate": 1.9021526418786694e-05,
+      "loss": 0.0294,
+      "step": 4750
+    },
+    {
+      "epoch": 21.735159817351597,
+      "grad_norm": 0.08801602572202682,
+      "learning_rate": 1.8956294846705807e-05,
+      "loss": 0.0315,
+      "step": 4760
+    },
+    {
+      "epoch": 21.78082191780822,
+      "grad_norm": 0.060193344950675964,
+      "learning_rate": 1.8891063274624922e-05,
+      "loss": 0.0274,
+      "step": 4770
+    },
+    {
+      "epoch": 21.82648401826484,
+      "grad_norm": 0.08400905132293701,
+      "learning_rate": 1.8825831702544034e-05,
+      "loss": 0.0307,
+      "step": 4780
+    },
+    {
+      "epoch": 21.87214611872146,
+      "grad_norm": 0.07627220451831818,
+      "learning_rate": 1.8760600130463146e-05,
+      "loss": 0.0268,
+      "step": 4790
+    },
+    {
+      "epoch": 21.91780821917808,
+      "grad_norm": 0.11953066289424896,
+      "learning_rate": 1.869536855838226e-05,
+      "loss": 0.0286,
+      "step": 4800
+    },
+    {
+      "epoch": 21.963470319634702,
+      "grad_norm": 0.1060253456234932,
+      "learning_rate": 1.863013698630137e-05,
+      "loss": 0.0282,
+      "step": 4810
+    },
+    {
+      "epoch": 22.0,
+      "eval_bertscore_f1": 0.8792276669167235,
+      "eval_bleu": 0.7824206736837964,
+      "eval_loss": 0.029947301372885704,
+      "eval_rougeL": 0.2971952858857796,
+      "eval_runtime": 85.058,
+      "eval_samples_per_second": 17.67,
+      "eval_steps_per_second": 1.105,
+      "step": 4818
+    },
+    {
+      "epoch": 22.009132420091323,
+      "grad_norm": 0.06057864427566528,
+      "learning_rate": 1.8564905414220486e-05,
+      "loss": 0.0275,
+      "step": 4820
+    },
+    {
+      "epoch": 22.054794520547944,
+      "grad_norm": 0.0509127639234066,
+      "learning_rate": 1.84996738421396e-05,
+      "loss": 0.0277,
+      "step": 4830
+    },
+    {
+      "epoch": 22.100456621004565,
+      "grad_norm": 0.3389517068862915,
+      "learning_rate": 1.843444227005871e-05,
+      "loss": 0.0297,
+      "step": 4840
+    },
+    {
+      "epoch": 22.146118721461185,
+      "grad_norm": 0.10847995430231094,
+      "learning_rate": 1.8369210697977823e-05,
+      "loss": 0.0292,
+      "step": 4850
+    },
+    {
+      "epoch": 22.19178082191781,
+      "grad_norm": 0.06611143052577972,
+      "learning_rate": 1.8303979125896935e-05,
+      "loss": 0.0288,
+      "step": 4860
+    },
+    {
+      "epoch": 22.23744292237443,
+      "grad_norm": 0.19594036042690277,
+      "learning_rate": 1.823874755381605e-05,
+      "loss": 0.0319,
+      "step": 4870
+    },
+    {
+      "epoch": 22.28310502283105,
+      "grad_norm": 0.18287107348442078,
+      "learning_rate": 1.8173515981735163e-05,
+      "loss": 0.0346,
+      "step": 4880
+    },
+    {
+      "epoch": 22.328767123287673,
+      "grad_norm": 0.06755329668521881,
+      "learning_rate": 1.8108284409654275e-05,
+      "loss": 0.0297,
+      "step": 4890
+    },
+    {
+      "epoch": 22.374429223744293,
+      "grad_norm": 0.062369752675294876,
+      "learning_rate": 1.8043052837573387e-05,
+      "loss": 0.0283,
+      "step": 4900
+    },
+    {
+      "epoch": 22.420091324200914,
+      "grad_norm": 0.05487988516688347,
+      "learning_rate": 1.79778212654925e-05,
+      "loss": 0.0271,
+      "step": 4910
+    },
+    {
+      "epoch": 22.465753424657535,
+      "grad_norm": 0.2627151906490326,
+      "learning_rate": 1.7912589693411615e-05,
+      "loss": 0.0279,
+      "step": 4920
+    },
+    {
+      "epoch": 22.511415525114156,
+      "grad_norm": 0.053799696266651154,
+      "learning_rate": 1.7847358121330727e-05,
+      "loss": 0.0267,
+      "step": 4930
+    },
+    {
+      "epoch": 22.557077625570777,
+      "grad_norm": 0.21573284268379211,
+      "learning_rate": 1.778212654924984e-05,
+      "loss": 0.0293,
+      "step": 4940
+    },
+    {
+      "epoch": 22.602739726027398,
+      "grad_norm": 0.07190805673599243,
+      "learning_rate": 1.771689497716895e-05,
+      "loss": 0.0265,
+      "step": 4950
+    },
+    {
+      "epoch": 22.64840182648402,
+      "grad_norm": 0.12876766920089722,
+      "learning_rate": 1.7651663405088064e-05,
+      "loss": 0.0322,
+      "step": 4960
+    },
+    {
+      "epoch": 22.69406392694064,
+      "grad_norm": 0.05925724655389786,
+      "learning_rate": 1.758643183300718e-05,
+      "loss": 0.0287,
+      "step": 4970
+    },
+    {
+      "epoch": 22.73972602739726,
+      "grad_norm": 0.24361100792884827,
+      "learning_rate": 1.752120026092629e-05,
+      "loss": 0.0279,
+      "step": 4980
+    },
+    {
+      "epoch": 22.78538812785388,
+      "grad_norm": 0.151267409324646,
+      "learning_rate": 1.7455968688845403e-05,
+      "loss": 0.0268,
+      "step": 4990
+    },
+    {
+      "epoch": 22.831050228310502,
+      "grad_norm": 0.05790381506085396,
+      "learning_rate": 1.7390737116764516e-05,
+      "loss": 0.0291,
+      "step": 5000
+    },
+    {
+      "epoch": 22.876712328767123,
+      "grad_norm": 0.05819353461265564,
+      "learning_rate": 1.7325505544683628e-05,
+      "loss": 0.0279,
+      "step": 5010
+    },
+    {
+      "epoch": 22.922374429223744,
+      "grad_norm": 0.10005196928977966,
+      "learning_rate": 1.7260273972602743e-05,
+      "loss": 0.026,
+      "step": 5020
+    },
+    {
+      "epoch": 22.968036529680365,
+      "grad_norm": 0.13961489498615265,
+      "learning_rate": 1.7195042400521855e-05,
+      "loss": 0.0317,
+      "step": 5030
+    },
+    {
+      "epoch": 23.0,
+      "eval_bertscore_f1": 0.8818914938274417,
+      "eval_bleu": 0.6471478207401697,
+      "eval_loss": 0.029684867709875107,
+      "eval_rougeL": 0.31794502636592814,
+      "eval_runtime": 86.5126,
+      "eval_samples_per_second": 17.373,
+      "eval_steps_per_second": 1.087,
+      "step": 5037
+    },
+    {
+      "epoch": 23.013698630136986,
+      "grad_norm": 0.04921013489365578,
+      "learning_rate": 1.7129810828440968e-05,
+      "loss": 0.0301,
+      "step": 5040
+    },
+    {
+      "epoch": 23.059360730593607,
+      "grad_norm": 0.11158487945795059,
+      "learning_rate": 1.706457925636008e-05,
+      "loss": 0.0273,
+      "step": 5050
+    },
+    {
+      "epoch": 23.105022831050228,
+      "grad_norm": 0.05320843309164047,
+      "learning_rate": 1.6999347684279192e-05,
+      "loss": 0.0283,
+      "step": 5060
+    },
+    {
+      "epoch": 23.15068493150685,
+      "grad_norm": 0.08403103053569794,
+      "learning_rate": 1.6934116112198308e-05,
+      "loss": 0.0276,
+      "step": 5070
+    },
+    {
+      "epoch": 23.19634703196347,
+      "grad_norm": 0.05202396214008331,
+      "learning_rate": 1.686888454011742e-05,
+      "loss": 0.0268,
+      "step": 5080
+    },
+    {
+      "epoch": 23.24200913242009,
+      "grad_norm": 0.07416887581348419,
+      "learning_rate": 1.6803652968036532e-05,
+      "loss": 0.0289,
+      "step": 5090
+    },
+    {
+      "epoch": 23.28767123287671,
+      "grad_norm": 0.08157260715961456,
+      "learning_rate": 1.6738421395955644e-05,
+      "loss": 0.0273,
+      "step": 5100
+    },
+    {
+      "epoch": 23.333333333333332,
+      "grad_norm": 0.115360789000988,
+      "learning_rate": 1.6673189823874756e-05,
+      "loss": 0.0302,
+      "step": 5110
+    },
+    {
+      "epoch": 23.378995433789953,
+      "grad_norm": 0.09168818593025208,
+      "learning_rate": 1.6607958251793872e-05,
+      "loss": 0.0296,
+      "step": 5120
+    },
+    {
+      "epoch": 23.424657534246574,
+      "grad_norm": 0.06763426214456558,
+      "learning_rate": 1.6542726679712984e-05,
+      "loss": 0.0284,
+      "step": 5130
+    },
+    {
+      "epoch": 23.470319634703195,
+      "grad_norm": 0.06270505487918854,
+      "learning_rate": 1.6477495107632096e-05,
+      "loss": 0.0284,
+      "step": 5140
+    },
+    {
+      "epoch": 23.515981735159816,
+      "grad_norm": 0.08347708731889725,
+      "learning_rate": 1.641226353555121e-05,
+      "loss": 0.0309,
+      "step": 5150
+    },
+    {
+      "epoch": 23.561643835616437,
+      "grad_norm": 0.07440556585788727,
+      "learning_rate": 1.634703196347032e-05,
+      "loss": 0.0266,
+      "step": 5160
+    },
+    {
+      "epoch": 23.60730593607306,
+      "grad_norm": 0.07723133265972137,
+      "learning_rate": 1.6281800391389436e-05,
+      "loss": 0.0286,
+      "step": 5170
+    },
+    {
+      "epoch": 23.652968036529682,
+      "grad_norm": 0.08790794014930725,
+      "learning_rate": 1.6216568819308548e-05,
+      "loss": 0.0324,
+      "step": 5180
+    },
+    {
+      "epoch": 23.698630136986303,
+      "grad_norm": 0.04647298902273178,
+      "learning_rate": 1.615133724722766e-05,
+      "loss": 0.0287,
+      "step": 5190
+    },
+    {
+      "epoch": 23.744292237442924,
+      "grad_norm": 0.07351569831371307,
+      "learning_rate": 1.6086105675146773e-05,
+      "loss": 0.0278,
+      "step": 5200
+    },
+    {
+      "epoch": 23.789954337899545,
+      "grad_norm": 0.055103108286857605,
+      "learning_rate": 1.6020874103065885e-05,
+      "loss": 0.0282,
+      "step": 5210
+    },
+    {
+      "epoch": 23.835616438356166,
+      "grad_norm": 0.12256161123514175,
+      "learning_rate": 1.5955642530985e-05,
+      "loss": 0.0321,
+      "step": 5220
+    },
+    {
+      "epoch": 23.881278538812786,
+      "grad_norm": 0.062129609286785126,
+      "learning_rate": 1.5890410958904112e-05,
+      "loss": 0.0281,
+      "step": 5230
+    },
+    {
+      "epoch": 23.926940639269407,
+      "grad_norm": 0.057793620973825455,
+      "learning_rate": 1.5825179386823225e-05,
+      "loss": 0.0287,
+      "step": 5240
+    },
+    {
+      "epoch": 23.972602739726028,
+      "grad_norm": 0.068938247859478,
+      "learning_rate": 1.5759947814742337e-05,
+      "loss": 0.0282,
+      "step": 5250
+    },
+    {
+      "epoch": 24.0,
+      "eval_bertscore_f1": 0.8786919926375607,
+      "eval_bleu": 0.6852253219479566,
+      "eval_loss": 0.02980269491672516,
+      "eval_rougeL": 0.3053440930561995,
+      "eval_runtime": 85.9989,
+      "eval_samples_per_second": 17.477,
+      "eval_steps_per_second": 1.093,
+      "step": 5256
+    },
+    {
+      "epoch": 24.01826484018265,
+      "grad_norm": 0.06383677572011948,
+      "learning_rate": 1.569471624266145e-05,
+      "loss": 0.027,
+      "step": 5260
+    },
+    {
+      "epoch": 24.06392694063927,
+      "grad_norm": 0.26667171716690063,
+      "learning_rate": 1.5629484670580565e-05,
+      "loss": 0.0288,
+      "step": 5270
+    },
+    {
+      "epoch": 24.10958904109589,
+      "grad_norm": 0.0889834463596344,
+      "learning_rate": 1.5564253098499677e-05,
+      "loss": 0.0287,
+      "step": 5280
+    },
+    {
+      "epoch": 24.15525114155251,
+      "grad_norm": 0.06234045326709747,
+      "learning_rate": 1.549902152641879e-05,
+      "loss": 0.0287,
+      "step": 5290
+    },
+    {
+      "epoch": 24.200913242009133,
+      "grad_norm": 0.05488771200180054,
+      "learning_rate": 1.54337899543379e-05,
+      "loss": 0.0276,
+      "step": 5300
+    },
+    {
+      "epoch": 24.246575342465754,
+      "grad_norm": 0.05854570493102074,
+      "learning_rate": 1.5368558382257013e-05,
+      "loss": 0.0292,
+      "step": 5310
+    },
+    {
+      "epoch": 24.292237442922374,
+      "grad_norm": 0.07843585312366486,
+      "learning_rate": 1.530332681017613e-05,
+      "loss": 0.0262,
+      "step": 5320
+    },
+    {
+      "epoch": 24.337899543378995,
+      "grad_norm": 0.1690542995929718,
+      "learning_rate": 1.5238095238095241e-05,
+      "loss": 0.0292,
+      "step": 5330
+    },
+    {
+      "epoch": 24.383561643835616,
+      "grad_norm": 0.05105036869645119,
+      "learning_rate": 1.5172863666014353e-05,
+      "loss": 0.0256,
+      "step": 5340
+    },
+    {
+      "epoch": 24.429223744292237,
+      "grad_norm": 0.05323030427098274,
+      "learning_rate": 1.5107632093933465e-05,
+      "loss": 0.031,
+      "step": 5350
+    },
+    {
+      "epoch": 24.474885844748858,
+      "grad_norm": 0.29972654581069946,
+      "learning_rate": 1.504240052185258e-05,
+      "loss": 0.0295,
+      "step": 5360
+    },
+    {
+      "epoch": 24.52054794520548,
+      "grad_norm": 0.13861894607543945,
+      "learning_rate": 1.497716894977169e-05,
+      "loss": 0.029,
+      "step": 5370
+    },
+    {
+      "epoch": 24.5662100456621,
+      "grad_norm": 0.11305614560842514,
+      "learning_rate": 1.4911937377690802e-05,
+      "loss": 0.0296,
+      "step": 5380
+    },
+    {
+      "epoch": 24.61187214611872,
+      "grad_norm": 0.06037652865052223,
+      "learning_rate": 1.4846705805609914e-05,
+      "loss": 0.0278,
+      "step": 5390
+    },
+    {
+      "epoch": 24.65753424657534,
+      "grad_norm": 0.09368506073951721,
+      "learning_rate": 1.4781474233529028e-05,
+      "loss": 0.0259,
+      "step": 5400
+    },
+    {
+      "epoch": 24.703196347031962,
+      "grad_norm": 0.16461175680160522,
+      "learning_rate": 1.471624266144814e-05,
+      "loss": 0.0288,
+      "step": 5410
+    },
+    {
+      "epoch": 24.748858447488583,
+      "grad_norm": 0.1278267502784729,
+      "learning_rate": 1.4651011089367254e-05,
+      "loss": 0.0303,
+      "step": 5420
+    },
+    {
+      "epoch": 24.794520547945204,
+      "grad_norm": 0.0516529381275177,
+      "learning_rate": 1.4585779517286366e-05,
+      "loss": 0.0296,
+      "step": 5430
+    },
+    {
+      "epoch": 24.840182648401825,
+      "grad_norm": 0.05440378189086914,
+      "learning_rate": 1.4520547945205478e-05,
+      "loss": 0.0285,
+      "step": 5440
+    },
+    {
+      "epoch": 24.885844748858446,
+      "grad_norm": 0.053505733609199524,
+      "learning_rate": 1.4455316373124592e-05,
+      "loss": 0.031,
+      "step": 5450
+    },
+    {
+      "epoch": 24.931506849315067,
+      "grad_norm": 0.0745423436164856,
+      "learning_rate": 1.4390084801043704e-05,
+      "loss": 0.0287,
+      "step": 5460
+    },
+    {
+      "epoch": 24.977168949771688,
+      "grad_norm": 0.0573294572532177,
+      "learning_rate": 1.4324853228962818e-05,
+      "loss": 0.0292,
+      "step": 5470
+    },
+    {
+      "epoch": 25.0,
+      "eval_bertscore_f1": 0.882176701418178,
+      "eval_bleu": 0.6490651628544384,
+      "eval_loss": 0.02972874790430069,
+      "eval_rougeL": 0.3165639622967543,
+      "eval_runtime": 85.5868,
+      "eval_samples_per_second": 17.561,
+      "eval_steps_per_second": 1.098,
+      "step": 5475
+    },
+    {
+      "epoch": 25.022831050228312,
+      "grad_norm": 0.057134486734867096,
+      "learning_rate": 1.425962165688193e-05,
+      "loss": 0.0276,
+      "step": 5480
+    },
+    {
+      "epoch": 25.068493150684933,
+      "grad_norm": 0.06873016059398651,
+      "learning_rate": 1.4194390084801042e-05,
+      "loss": 0.029,
+      "step": 5490
+    },
+    {
+      "epoch": 25.114155251141554,
+      "grad_norm": 0.06870689243078232,
+      "learning_rate": 1.4129158512720156e-05,
+      "loss": 0.0287,
+      "step": 5500
+    },
+    {
+      "epoch": 25.159817351598175,
+      "grad_norm": 0.0451415553689003,
+      "learning_rate": 1.4063926940639269e-05,
+      "loss": 0.028,
+      "step": 5510
+    },
+    {
+      "epoch": 25.205479452054796,
+      "grad_norm": 0.051221951842308044,
+      "learning_rate": 1.3998695368558382e-05,
+      "loss": 0.0263,
+      "step": 5520
+    },
+    {
+      "epoch": 25.251141552511417,
+      "grad_norm": 0.4794943928718567,
+      "learning_rate": 1.3933463796477495e-05,
+      "loss": 0.031,
+      "step": 5530
+    },
+    {
+      "epoch": 25.296803652968038,
+      "grad_norm": 0.08168008923530579,
+      "learning_rate": 1.3868232224396607e-05,
+      "loss": 0.0288,
+      "step": 5540
+    },
+    {
+      "epoch": 25.34246575342466,
+      "grad_norm": 0.10532938688993454,
+      "learning_rate": 1.380300065231572e-05,
+      "loss": 0.028,
+      "step": 5550
+    },
+    {
+      "epoch": 25.38812785388128,
+      "grad_norm": 0.06582871824502945,
+      "learning_rate": 1.3737769080234833e-05,
+      "loss": 0.0282,
+      "step": 5560
+    },
+    {
+      "epoch": 25.4337899543379,
+      "grad_norm": 0.07675176113843918,
+      "learning_rate": 1.3672537508153947e-05,
+      "loss": 0.0275,
+      "step": 5570
+    },
+    {
+      "epoch": 25.47945205479452,
+      "grad_norm": 0.25649493932724,
+      "learning_rate": 1.3607305936073059e-05,
+      "loss": 0.0308,
+      "step": 5580
+    },
+    {
+      "epoch": 25.525114155251142,
+      "grad_norm": 0.081456758081913,
+      "learning_rate": 1.3542074363992171e-05,
+      "loss": 0.027,
+      "step": 5590
+    },
+    {
+      "epoch": 25.570776255707763,
+      "grad_norm": 0.14920422434806824,
+      "learning_rate": 1.3476842791911285e-05,
+      "loss": 0.0275,
+      "step": 5600
+    },
+    {
+      "epoch": 25.616438356164384,
+      "grad_norm": 0.08439820259809494,
+      "learning_rate": 1.3411611219830397e-05,
+      "loss": 0.0269,
+      "step": 5610
+    },
+    {
+      "epoch": 25.662100456621005,
+      "grad_norm": 0.06275507062673569,
+      "learning_rate": 1.3346379647749511e-05,
+      "loss": 0.0274,
+      "step": 5620
+    },
+    {
+      "epoch": 25.707762557077626,
+      "grad_norm": 0.05262503772974014,
+      "learning_rate": 1.3281148075668623e-05,
+      "loss": 0.0277,
+      "step": 5630
+    },
+    {
+      "epoch": 25.753424657534246,
+      "grad_norm": 0.09741270542144775,
+      "learning_rate": 1.3215916503587735e-05,
+      "loss": 0.0287,
+      "step": 5640
+    },
+    {
+      "epoch": 25.799086757990867,
+      "grad_norm": 0.0535399504005909,
+      "learning_rate": 1.3150684931506849e-05,
+      "loss": 0.0271,
+      "step": 5650
+    },
+    {
+      "epoch": 25.84474885844749,
+      "grad_norm": 0.09080829471349716,
+      "learning_rate": 1.3085453359425961e-05,
+      "loss": 0.0293,
+      "step": 5660
+    },
+    {
+      "epoch": 25.89041095890411,
+      "grad_norm": 0.08309127390384674,
+      "learning_rate": 1.3020221787345075e-05,
+      "loss": 0.0304,
+      "step": 5670
+    },
+    {
+      "epoch": 25.93607305936073,
+      "grad_norm": 0.09403306990861893,
+      "learning_rate": 1.2954990215264187e-05,
+      "loss": 0.0282,
+      "step": 5680
+    },
+    {
+      "epoch": 25.98173515981735,
+      "grad_norm": 0.06915637105703354,
+      "learning_rate": 1.28897586431833e-05,
+      "loss": 0.0288,
+      "step": 5690
+    },
+    {
+      "epoch": 26.0,
+      "eval_bertscore_f1": 0.8812559430232781,
+      "eval_bleu": 0.6023790904677201,
+      "eval_loss": 0.029696911573410034,
+      "eval_rougeL": 0.3190547097361511,
+      "eval_runtime": 85.4112,
+      "eval_samples_per_second": 17.597,
+      "eval_steps_per_second": 1.101,
+      "step": 5694
+    },
+    {
+      "epoch": 26.027397260273972,
+      "grad_norm": 0.06425880640745163,
+      "learning_rate": 1.2824527071102413e-05,
+      "loss": 0.0267,
+      "step": 5700
+    },
+    {
+      "epoch": 26.073059360730593,
+      "grad_norm": 0.04396981745958328,
+      "learning_rate": 1.2759295499021525e-05,
+      "loss": 0.0268,
+      "step": 5710
+    },
+    {
+      "epoch": 26.118721461187214,
+      "grad_norm": 0.05449533835053444,
+      "learning_rate": 1.269406392694064e-05,
+      "loss": 0.0269,
+      "step": 5720
+    },
+    {
+      "epoch": 26.164383561643834,
+      "grad_norm": 0.06189217418432236,
+      "learning_rate": 1.2628832354859752e-05,
+      "loss": 0.0295,
+      "step": 5730
+    },
+    {
+      "epoch": 26.210045662100455,
+      "grad_norm": 0.0630607083439827,
+      "learning_rate": 1.2563600782778864e-05,
+      "loss": 0.0303,
+      "step": 5740
+    },
+    {
+      "epoch": 26.255707762557076,
+      "grad_norm": 0.05802557244896889,
+      "learning_rate": 1.249836921069798e-05,
+      "loss": 0.0268,
+      "step": 5750
+    },
+    {
+      "epoch": 26.301369863013697,
+      "grad_norm": 0.045306626707315445,
+      "learning_rate": 1.2433137638617091e-05,
+      "loss": 0.0288,
+      "step": 5760
+    },
+    {
+      "epoch": 26.347031963470318,
+      "grad_norm": 0.06658345460891724,
+      "learning_rate": 1.2367906066536204e-05,
+      "loss": 0.0274,
+      "step": 5770
+    },
+    {
+      "epoch": 26.39269406392694,
+      "grad_norm": 0.06213853880763054,
+      "learning_rate": 1.2302674494455317e-05,
+      "loss": 0.028,
+      "step": 5780
+    },
+    {
+      "epoch": 26.438356164383563,
+      "grad_norm": 0.07253895699977875,
+      "learning_rate": 1.223744292237443e-05,
+      "loss": 0.0261,
+      "step": 5790
+    },
+    {
+      "epoch": 26.484018264840184,
+      "grad_norm": 0.0809069573879242,
+      "learning_rate": 1.2172211350293543e-05,
+      "loss": 0.0284,
+      "step": 5800
+    },
+    {
+      "epoch": 26.529680365296805,
+      "grad_norm": 0.15386025607585907,
+      "learning_rate": 1.2106979778212656e-05,
+      "loss": 0.0271,
+      "step": 5810
+    },
+    {
+      "epoch": 26.575342465753426,
+      "grad_norm": 0.0711253210902214,
+      "learning_rate": 1.2041748206131768e-05,
+      "loss": 0.027,
+      "step": 5820
+    },
+    {
+      "epoch": 26.621004566210047,
+      "grad_norm": 0.06340400129556656,
+      "learning_rate": 1.1976516634050882e-05,
+      "loss": 0.0286,
+      "step": 5830
+    },
+    {
+      "epoch": 26.666666666666668,
+      "grad_norm": 0.06583566963672638,
+      "learning_rate": 1.1911285061969994e-05,
+      "loss": 0.0265,
+      "step": 5840
+    },
+    {
+      "epoch": 26.71232876712329,
+      "grad_norm": 0.09953002631664276,
+      "learning_rate": 1.1846053489889108e-05,
+      "loss": 0.0299,
+      "step": 5850
+    },
+    {
+      "epoch": 26.75799086757991,
+      "grad_norm": 0.10960971564054489,
+      "learning_rate": 1.178082191780822e-05,
+      "loss": 0.0297,
+      "step": 5860
+    },
+    {
+      "epoch": 26.80365296803653,
+      "grad_norm": 0.15185241401195526,
+      "learning_rate": 1.1715590345727332e-05,
+      "loss": 0.029,
+      "step": 5870
+    },
+    {
+      "epoch": 26.84931506849315,
+      "grad_norm": 0.07303405553102493,
+      "learning_rate": 1.1650358773646446e-05,
+      "loss": 0.0269,
+      "step": 5880
+    },
+    {
+      "epoch": 26.894977168949772,
+      "grad_norm": 0.12658797204494476,
+      "learning_rate": 1.1585127201565558e-05,
+      "loss": 0.0284,
+      "step": 5890
+    },
+    {
+      "epoch": 26.940639269406393,
+      "grad_norm": 0.05162033811211586,
+      "learning_rate": 1.1519895629484672e-05,
+      "loss": 0.0279,
+      "step": 5900
+    },
+    {
+      "epoch": 26.986301369863014,
+      "grad_norm": 0.0728631243109703,
+      "learning_rate": 1.1454664057403784e-05,
+      "loss": 0.0266,
+      "step": 5910
+    },
+    {
+      "epoch": 27.0,
+      "eval_bertscore_f1": 0.8816696117896679,
+      "eval_bleu": 0.7002372942228009,
+      "eval_loss": 0.029564756900072098,
+      "eval_rougeL": 0.3117885832149029,
+      "eval_runtime": 85.283,
+      "eval_samples_per_second": 17.624,
+      "eval_steps_per_second": 1.102,
+      "step": 5913
+    },
+    {
+      "epoch": 27.031963470319635,
+      "grad_norm": 0.05337512493133545,
+      "learning_rate": 1.1389432485322896e-05,
+      "loss": 0.03,
+      "step": 5920
+    },
+    {
+      "epoch": 27.077625570776256,
+      "grad_norm": 0.08450283855199814,
+      "learning_rate": 1.132420091324201e-05,
+      "loss": 0.028,
+      "step": 5930
+    },
+    {
+      "epoch": 27.123287671232877,
+      "grad_norm": 0.15338727831840515,
+      "learning_rate": 1.1258969341161122e-05,
+      "loss": 0.028,
+      "step": 5940
+    },
+    {
+      "epoch": 27.168949771689498,
+      "grad_norm": 0.13320617377758026,
+      "learning_rate": 1.1193737769080236e-05,
+      "loss": 0.0283,
+      "step": 5950
+    },
+    {
+      "epoch": 27.21461187214612,
+      "grad_norm": 0.13354717195034027,
+      "learning_rate": 1.1128506196999348e-05,
+      "loss": 0.0293,
+      "step": 5960
+    },
+    {
+      "epoch": 27.26027397260274,
+      "grad_norm": 0.06282184273004532,
+      "learning_rate": 1.106327462491846e-05,
+      "loss": 0.0264,
+      "step": 5970
+    },
+    {
+      "epoch": 27.30593607305936,
+      "grad_norm": 0.06496313959360123,
+      "learning_rate": 1.0998043052837574e-05,
+      "loss": 0.0294,
+      "step": 5980
+    },
+    {
+      "epoch": 27.35159817351598,
+      "grad_norm": 0.0739155188202858,
+      "learning_rate": 1.0932811480756687e-05,
+      "loss": 0.0289,
+      "step": 5990
+    },
+    {
+      "epoch": 27.397260273972602,
+      "grad_norm": 0.07013906538486481,
+      "learning_rate": 1.08675799086758e-05,
+      "loss": 0.0301,
+      "step": 6000
+    },
+    {
+      "epoch": 27.442922374429223,
+      "grad_norm": 0.27129918336868286,
+      "learning_rate": 1.0802348336594913e-05,
+      "loss": 0.0269,
+      "step": 6010
+    },
+    {
+      "epoch": 27.488584474885844,
+      "grad_norm": 0.11588162928819656,
+      "learning_rate": 1.0737116764514025e-05,
+      "loss": 0.0276,
+      "step": 6020
+    },
+    {
+      "epoch": 27.534246575342465,
+      "grad_norm": 0.0485413484275341,
+      "learning_rate": 1.0671885192433139e-05,
+      "loss": 0.0278,
+      "step": 6030
+    },
+    {
+      "epoch": 27.579908675799086,
+      "grad_norm": 0.1332385092973709,
+      "learning_rate": 1.0606653620352251e-05,
+      "loss": 0.0283,
+      "step": 6040
+    },
+    {
+      "epoch": 27.625570776255707,
+      "grad_norm": 0.04625769704580307,
+      "learning_rate": 1.0541422048271365e-05,
+      "loss": 0.0295,
+      "step": 6050
+    },
+    {
+      "epoch": 27.671232876712327,
+      "grad_norm": 0.0804968923330307,
+      "learning_rate": 1.0476190476190477e-05,
+      "loss": 0.0268,
+      "step": 6060
+    },
+    {
+      "epoch": 27.71689497716895,
+      "grad_norm": 0.07371534407138824,
+      "learning_rate": 1.0410958904109589e-05,
+      "loss": 0.0272,
+      "step": 6070
+    },
+    {
+      "epoch": 27.76255707762557,
+      "grad_norm": 0.09114833176136017,
+      "learning_rate": 1.0345727332028703e-05,
+      "loss": 0.0283,
+      "step": 6080
+    },
+    {
+      "epoch": 27.80821917808219,
+      "grad_norm": 0.06806255131959915,
+      "learning_rate": 1.0280495759947815e-05,
+      "loss": 0.027,
+      "step": 6090
+    },
+    {
+      "epoch": 27.853881278538815,
+      "grad_norm": 0.07149802148342133,
+      "learning_rate": 1.0215264187866929e-05,
+      "loss": 0.0273,
+      "step": 6100
+    },
+    {
+      "epoch": 27.899543378995435,
+      "grad_norm": 0.059655074030160904,
+      "learning_rate": 1.0150032615786041e-05,
+      "loss": 0.0266,
+      "step": 6110
+    },
+    {
+      "epoch": 27.945205479452056,
+      "grad_norm": 0.060154326260089874,
+      "learning_rate": 1.0084801043705153e-05,
+      "loss": 0.0241,
+      "step": 6120
+    },
+    {
+      "epoch": 27.990867579908677,
+      "grad_norm": 0.06297276169061661,
+      "learning_rate": 1.0019569471624267e-05,
+      "loss": 0.0265,
+      "step": 6130
+    },
+    {
+      "epoch": 28.0,
+      "eval_bertscore_f1": 0.8819132992528077,
+      "eval_bleu": 0.6419990268369562,
+      "eval_loss": 0.02953849919140339,
+      "eval_rougeL": 0.31736312067462313,
+      "eval_runtime": 85.6257,
+      "eval_samples_per_second": 17.553,
+      "eval_steps_per_second": 1.098,
+      "step": 6132
+    },
+    {
+      "epoch": 28.036529680365298,
+      "grad_norm": 0.06080065667629242,
+      "learning_rate": 9.95433789954338e-06,
+      "loss": 0.0281,
+      "step": 6140
+    },
+    {
+      "epoch": 28.08219178082192,
+      "grad_norm": 0.08424234390258789,
+      "learning_rate": 9.889106327462493e-06,
+      "loss": 0.0284,
+      "step": 6150
+    },
+    {
+      "epoch": 28.12785388127854,
+      "grad_norm": 0.19627715647220612,
+      "learning_rate": 9.823874755381605e-06,
+      "loss": 0.0276,
+      "step": 6160
+    },
+    {
+      "epoch": 28.17351598173516,
+      "grad_norm": 0.05729695409536362,
+      "learning_rate": 9.758643183300718e-06,
+      "loss": 0.0273,
+      "step": 6170
+    },
+    {
+      "epoch": 28.21917808219178,
+      "grad_norm": 0.056810833513736725,
+      "learning_rate": 9.693411611219831e-06,
+      "loss": 0.0272,
+      "step": 6180
+    },
+    {
+      "epoch": 28.264840182648403,
+      "grad_norm": 0.3247433602809906,
+      "learning_rate": 9.628180039138944e-06,
+      "loss": 0.0288,
+      "step": 6190
+    },
+    {
+      "epoch": 28.310502283105023,
+      "grad_norm": 0.054678067564964294,
+      "learning_rate": 9.562948467058057e-06,
+      "loss": 0.0284,
+      "step": 6200
+    },
+    {
+      "epoch": 28.356164383561644,
+      "grad_norm": 0.12813326716423035,
+      "learning_rate": 9.49771689497717e-06,
+      "loss": 0.0277,
+      "step": 6210
+    },
+    {
+      "epoch": 28.401826484018265,
+      "grad_norm": 0.09990496188402176,
+      "learning_rate": 9.432485322896282e-06,
+      "loss": 0.0294,
+      "step": 6220
+    },
+    {
+      "epoch": 28.447488584474886,
+      "grad_norm": 0.30804169178009033,
+      "learning_rate": 9.367253750815396e-06,
+      "loss": 0.0274,
+      "step": 6230
+    },
+    {
+      "epoch": 28.493150684931507,
+      "grad_norm": 0.0785110592842102,
+      "learning_rate": 9.302022178734508e-06,
+      "loss": 0.0279,
+      "step": 6240
+    },
+    {
+      "epoch": 28.538812785388128,
+      "grad_norm": 0.07497629523277283,
+      "learning_rate": 9.236790606653622e-06,
+      "loss": 0.0289,
+      "step": 6250
+    },
+    {
+      "epoch": 28.58447488584475,
+      "grad_norm": 0.048099491745233536,
+      "learning_rate": 9.171559034572734e-06,
+      "loss": 0.0265,
+      "step": 6260
+    },
+    {
+      "epoch": 28.63013698630137,
+      "grad_norm": 0.074525848031044,
+      "learning_rate": 9.106327462491846e-06,
+      "loss": 0.0268,
+      "step": 6270
+    },
+    {
+      "epoch": 28.67579908675799,
+      "grad_norm": 0.0570547953248024,
+      "learning_rate": 9.04109589041096e-06,
+      "loss": 0.03,
+      "step": 6280
+    },
+    {
+      "epoch": 28.72146118721461,
+      "grad_norm": 0.11205938458442688,
+      "learning_rate": 8.975864318330072e-06,
+      "loss": 0.0259,
+      "step": 6290
+    },
+    {
+      "epoch": 28.767123287671232,
+      "grad_norm": 0.07583874464035034,
+      "learning_rate": 8.910632746249186e-06,
+      "loss": 0.0273,
+      "step": 6300
+    },
+    {
+      "epoch": 28.812785388127853,
+      "grad_norm": 0.04650593921542168,
+      "learning_rate": 8.845401174168298e-06,
+      "loss": 0.0282,
+      "step": 6310
+    },
+    {
+      "epoch": 28.858447488584474,
+      "grad_norm": 0.10401676595211029,
+      "learning_rate": 8.78016960208741e-06,
+      "loss": 0.0281,
+      "step": 6320
+    },
+    {
+      "epoch": 28.904109589041095,
+      "grad_norm": 0.10608301311731339,
+      "learning_rate": 8.714938030006524e-06,
+      "loss": 0.0264,
+      "step": 6330
+    },
+    {
+      "epoch": 28.949771689497716,
+      "grad_norm": 0.06047683209180832,
+      "learning_rate": 8.649706457925636e-06,
+      "loss": 0.0287,
+      "step": 6340
+    },
+    {
+      "epoch": 28.995433789954337,
+      "grad_norm": 0.0955701544880867,
+      "learning_rate": 8.58447488584475e-06,
+      "loss": 0.026,
+      "step": 6350
+    },
+    {
+      "epoch": 29.0,
+      "eval_bertscore_f1": 0.8819990824320597,
+      "eval_bleu": 0.6091330252424353,
+      "eval_loss": 0.029434066265821457,
+      "eval_rougeL": 0.319823542874944,
+      "eval_runtime": 85.5987,
+      "eval_samples_per_second": 17.559,
+      "eval_steps_per_second": 1.098,
+      "step": 6351
+    },
+    {
+      "epoch": 29.041095890410958,
+      "grad_norm": 0.047558221966028214,
+      "learning_rate": 8.519243313763862e-06,
+      "loss": 0.0272,
+      "step": 6360
+    },
+    {
+      "epoch": 29.08675799086758,
+      "grad_norm": 0.07335501164197922,
+      "learning_rate": 8.454011741682975e-06,
+      "loss": 0.0274,
+      "step": 6370
+    },
+    {
+      "epoch": 29.1324200913242,
+      "grad_norm": 0.06457233428955078,
+      "learning_rate": 8.388780169602088e-06,
+      "loss": 0.03,
+      "step": 6380
+    },
+    {
+      "epoch": 29.17808219178082,
+      "grad_norm": 0.06746174395084381,
+      "learning_rate": 8.3235485975212e-06,
+      "loss": 0.031,
+      "step": 6390
+    },
+    {
+      "epoch": 29.22374429223744,
+      "grad_norm": 0.0505414679646492,
+      "learning_rate": 8.258317025440314e-06,
+      "loss": 0.026,
+      "step": 6400
+    },
+    {
+      "epoch": 29.269406392694062,
+      "grad_norm": 0.0832168459892273,
+      "learning_rate": 8.193085453359427e-06,
+      "loss": 0.0274,
+      "step": 6410
+    },
+    {
+      "epoch": 29.315068493150687,
+      "grad_norm": 0.055076733231544495,
+      "learning_rate": 8.127853881278539e-06,
+      "loss": 0.0267,
+      "step": 6420
+    },
+    {
+      "epoch": 29.360730593607308,
+      "grad_norm": 0.04963900148868561,
+      "learning_rate": 8.062622309197653e-06,
+      "loss": 0.0267,
+      "step": 6430
+    },
+    {
+      "epoch": 29.40639269406393,
+      "grad_norm": 0.06072743609547615,
+      "learning_rate": 7.997390737116765e-06,
+      "loss": 0.027,
+      "step": 6440
+    },
+    {
+      "epoch": 29.45205479452055,
+      "grad_norm": 0.046447575092315674,
+      "learning_rate": 7.932159165035879e-06,
+      "loss": 0.0252,
+      "step": 6450
+    },
+    {
+      "epoch": 29.49771689497717,
+      "grad_norm": 0.08060900121927261,
+      "learning_rate": 7.86692759295499e-06,
+      "loss": 0.0261,
+      "step": 6460
+    },
+    {
+      "epoch": 29.54337899543379,
+      "grad_norm": 0.05984543263912201,
+      "learning_rate": 7.801696020874103e-06,
+      "loss": 0.026,
+      "step": 6470
+    },
+    {
+      "epoch": 29.589041095890412,
+      "grad_norm": 0.044129859656095505,
+      "learning_rate": 7.736464448793217e-06,
+      "loss": 0.028,
+      "step": 6480
+    },
+    {
+      "epoch": 29.634703196347033,
+      "grad_norm": 0.06660878658294678,
+      "learning_rate": 7.671232876712329e-06,
+      "loss": 0.0281,
+      "step": 6490
+    },
+    {
+      "epoch": 29.680365296803654,
+      "grad_norm": 0.0642257034778595,
+      "learning_rate": 7.606001304631442e-06,
+      "loss": 0.0277,
+      "step": 6500
+    },
+    {
+      "epoch": 29.726027397260275,
+      "grad_norm": 0.06701590865850449,
+      "learning_rate": 7.540769732550555e-06,
+      "loss": 0.0283,
+      "step": 6510
+    },
+    {
+      "epoch": 29.771689497716896,
+      "grad_norm": 0.06078553572297096,
+      "learning_rate": 7.475538160469668e-06,
+      "loss": 0.0285,
+      "step": 6520
+    },
+    {
+      "epoch": 29.817351598173516,
+      "grad_norm": 0.07285226136445999,
+      "learning_rate": 7.410306588388781e-06,
+      "loss": 0.0267,
+      "step": 6530
+    },
+    {
+      "epoch": 29.863013698630137,
+      "grad_norm": 0.09485471993684769,
+      "learning_rate": 7.345075016307894e-06,
+      "loss": 0.028,
+      "step": 6540
+    },
+    {
+      "epoch": 29.908675799086758,
+      "grad_norm": 0.08331019431352615,
+      "learning_rate": 7.279843444227006e-06,
+      "loss": 0.028,
+      "step": 6550
+    },
+    {
+      "epoch": 29.95433789954338,
+      "grad_norm": 0.06982530653476715,
+      "learning_rate": 7.214611872146119e-06,
+      "loss": 0.0252,
+      "step": 6560
+    },
+    {
+      "epoch": 30.0,
+      "grad_norm": 0.19558538496494293,
+      "learning_rate": 7.149380300065232e-06,
+      "loss": 0.0297,
+      "step": 6570
+    },
+    {
+      "epoch": 30.0,
+      "eval_bertscore_f1": 0.8825751674310096,
+      "eval_bleu": 0.6418448303227442,
+      "eval_loss": 0.029364319518208504,
+      "eval_rougeL": 0.3167741253484545,
+      "eval_runtime": 85.3228,
+      "eval_samples_per_second": 17.615,
+      "eval_steps_per_second": 1.102,
+      "step": 6570
+    },
+    {
+      "epoch": 30.04566210045662,
+      "grad_norm": 0.25312867760658264,
+      "learning_rate": 7.084148727984345e-06,
+      "loss": 0.0286,
+      "step": 6580
+    },
+    {
+      "epoch": 30.091324200913242,
+      "grad_norm": 0.07719423621892929,
+      "learning_rate": 7.018917155903458e-06,
+      "loss": 0.0269,
+      "step": 6590
+    },
+    {
+      "epoch": 30.136986301369863,
+      "grad_norm": 0.03981988877058029,
+      "learning_rate": 6.9536855838225706e-06,
+      "loss": 0.0252,
+      "step": 6600
+    },
+    {
+      "epoch": 30.182648401826484,
+      "grad_norm": 0.12466205656528473,
+      "learning_rate": 6.8884540117416836e-06,
+      "loss": 0.0275,
+      "step": 6610
+    },
+    {
+      "epoch": 30.228310502283104,
+      "grad_norm": 0.044825151562690735,
+      "learning_rate": 6.823222439660797e-06,
+      "loss": 0.0269,
+      "step": 6620
+    },
+    {
+      "epoch": 30.273972602739725,
+      "grad_norm": 0.11382755637168884,
+      "learning_rate": 6.75799086757991e-06,
+      "loss": 0.0299,
+      "step": 6630
+    },
+    {
+      "epoch": 30.319634703196346,
+      "grad_norm": 0.11189394444227219,
+      "learning_rate": 6.692759295499023e-06,
+      "loss": 0.0301,
+      "step": 6640
+    },
+    {
+      "epoch": 30.365296803652967,
+      "grad_norm": 0.059967365115880966,
+      "learning_rate": 6.627527723418135e-06,
+      "loss": 0.0273,
+      "step": 6650
+    },
+    {
+      "epoch": 30.410958904109588,
+      "grad_norm": 0.04897484555840492,
+      "learning_rate": 6.562296151337248e-06,
+      "loss": 0.0266,
+      "step": 6660
+    },
+    {
+      "epoch": 30.45662100456621,
+      "grad_norm": 0.16777561604976654,
+      "learning_rate": 6.497064579256361e-06,
+      "loss": 0.0282,
+      "step": 6670
+    },
+    {
+      "epoch": 30.50228310502283,
+      "grad_norm": 0.106645368039608,
+      "learning_rate": 6.431833007175474e-06,
+      "loss": 0.0289,
+      "step": 6680
+    },
+    {
+      "epoch": 30.54794520547945,
+      "grad_norm": 0.05931256338953972,
+      "learning_rate": 6.366601435094587e-06,
+      "loss": 0.0266,
+      "step": 6690
+    },
+    {
+      "epoch": 30.59360730593607,
+      "grad_norm": 0.0640476867556572,
+      "learning_rate": 6.301369863013699e-06,
+      "loss": 0.028,
+      "step": 6700
+    },
+    {
+      "epoch": 30.639269406392692,
+      "grad_norm": 0.05507276952266693,
+      "learning_rate": 6.236138290932811e-06,
+      "loss": 0.0272,
+      "step": 6710
+    },
+    {
+      "epoch": 30.684931506849313,
+      "grad_norm": 0.07414695620536804,
+      "learning_rate": 6.170906718851924e-06,
+      "loss": 0.0286,
+      "step": 6720
+    },
+    {
+      "epoch": 30.730593607305934,
+      "grad_norm": 0.08643855154514313,
+      "learning_rate": 6.105675146771037e-06,
+      "loss": 0.0303,
+      "step": 6730
+    },
+    {
+      "epoch": 30.77625570776256,
+      "grad_norm": 0.052528686821460724,
+      "learning_rate": 6.04044357469015e-06,
+      "loss": 0.0279,
+      "step": 6740
+    },
+    {
+      "epoch": 30.82191780821918,
+      "grad_norm": 0.0555759035050869,
+      "learning_rate": 5.975212002609263e-06,
+      "loss": 0.0266,
+      "step": 6750
+    },
+    {
+      "epoch": 30.8675799086758,
+      "grad_norm": 0.046230562031269073,
+      "learning_rate": 5.9099804305283755e-06,
+      "loss": 0.0274,
+      "step": 6760
+    },
+    {
+      "epoch": 30.91324200913242,
+      "grad_norm": 0.07497312128543854,
+      "learning_rate": 5.8447488584474885e-06,
+      "loss": 0.027,
+      "step": 6770
+    },
+    {
+      "epoch": 30.958904109589042,
+      "grad_norm": 0.11832709610462189,
+      "learning_rate": 5.7795172863666015e-06,
+      "loss": 0.0266,
+      "step": 6780
+    },
+    {
+      "epoch": 31.0,
+      "eval_bertscore_f1": 0.8814896751623985,
+      "eval_bleu": 0.58698815562408,
+      "eval_loss": 0.029345136135816574,
+      "eval_rougeL": 0.3197356893416895,
+      "eval_runtime": 85.6812,
+      "eval_samples_per_second": 17.542,
+      "eval_steps_per_second": 1.097,
+      "step": 6789
+    },
+    {
+      "epoch": 31.004566210045663,
+      "grad_norm": 0.06090688705444336,
+      "learning_rate": 5.7142857142857145e-06,
+      "loss": 0.0272,
+      "step": 6790
+    },
+    {
+      "epoch": 31.050228310502284,
+      "grad_norm": 0.11053823679685593,
+      "learning_rate": 5.6490541422048275e-06,
+      "loss": 0.0274,
+      "step": 6800
+    },
+    {
+      "epoch": 31.095890410958905,
+      "grad_norm": 0.06031995266675949,
+      "learning_rate": 5.58382257012394e-06,
+      "loss": 0.0259,
+      "step": 6810
+    },
+    {
+      "epoch": 31.141552511415526,
+      "grad_norm": 0.18411608040332794,
+      "learning_rate": 5.518590998043053e-06,
+      "loss": 0.0249,
+      "step": 6820
+    },
+    {
+      "epoch": 31.187214611872147,
+      "grad_norm": 0.066913902759552,
+      "learning_rate": 5.453359425962166e-06,
+      "loss": 0.0288,
+      "step": 6830
+    },
+    {
+      "epoch": 31.232876712328768,
+      "grad_norm": 0.14979858696460724,
+      "learning_rate": 5.388127853881279e-06,
+      "loss": 0.0256,
+      "step": 6840
+    },
+    {
+      "epoch": 31.27853881278539,
+      "grad_norm": 0.1862526834011078,
+      "learning_rate": 5.322896281800392e-06,
+      "loss": 0.0259,
+      "step": 6850
+    },
+    {
+      "epoch": 31.32420091324201,
+      "grad_norm": 0.20405951142311096,
+      "learning_rate": 5.257664709719504e-06,
+      "loss": 0.0273,
+      "step": 6860
+    },
+    {
+      "epoch": 31.36986301369863,
+      "grad_norm": 0.09556996822357178,
+      "learning_rate": 5.192433137638617e-06,
+      "loss": 0.0284,
+      "step": 6870
+    },
+    {
+      "epoch": 31.41552511415525,
+      "grad_norm": 0.0899861678481102,
+      "learning_rate": 5.12720156555773e-06,
+      "loss": 0.0283,
+      "step": 6880
+    },
+    {
+      "epoch": 31.461187214611872,
+      "grad_norm": 0.09562750160694122,
+      "learning_rate": 5.061969993476843e-06,
+      "loss": 0.0301,
+      "step": 6890
+    },
+    {
+      "epoch": 31.506849315068493,
+      "grad_norm": 0.05665372312068939,
+      "learning_rate": 4.996738421395956e-06,
+      "loss": 0.0296,
+      "step": 6900
+    },
+    {
+      "epoch": 31.552511415525114,
+      "grad_norm": 0.05309437960386276,
+      "learning_rate": 4.931506849315068e-06,
+      "loss": 0.0266,
+      "step": 6910
+    },
+    {
+      "epoch": 31.598173515981735,
+      "grad_norm": 0.0609840489923954,
+      "learning_rate": 4.866275277234181e-06,
+      "loss": 0.0278,
+      "step": 6920
+    },
+    {
+      "epoch": 31.643835616438356,
+      "grad_norm": 0.2213747799396515,
+      "learning_rate": 4.801043705153294e-06,
+      "loss": 0.0285,
+      "step": 6930
+    },
+    {
+      "epoch": 31.689497716894977,
+      "grad_norm": 0.2735954225063324,
+      "learning_rate": 4.735812133072407e-06,
+      "loss": 0.0286,
+      "step": 6940
+    },
+    {
+      "epoch": 31.735159817351597,
+      "grad_norm": 0.07147183269262314,
+      "learning_rate": 4.67058056099152e-06,
+      "loss": 0.0301,
+      "step": 6950
+    },
+    {
+      "epoch": 31.78082191780822,
+      "grad_norm": 0.05247138813138008,
+      "learning_rate": 4.6053489889106324e-06,
+      "loss": 0.0255,
+      "step": 6960
+    },
+    {
+      "epoch": 31.82648401826484,
+      "grad_norm": 0.04454132914543152,
+      "learning_rate": 4.5401174168297455e-06,
+      "loss": 0.0269,
+      "step": 6970
+    },
+    {
+      "epoch": 31.87214611872146,
+      "grad_norm": 0.06428872048854828,
+      "learning_rate": 4.4748858447488585e-06,
+      "loss": 0.029,
+      "step": 6980
+    },
+    {
+      "epoch": 31.91780821917808,
+      "grad_norm": 0.1623128354549408,
+      "learning_rate": 4.4096542726679715e-06,
+      "loss": 0.0271,
+      "step": 6990
+    },
+    {
+      "epoch": 31.963470319634702,
+      "grad_norm": 0.13374125957489014,
+      "learning_rate": 4.3444227005870845e-06,
+      "loss": 0.0296,
+      "step": 7000
+    },
+    {
+      "epoch": 32.0,
+      "eval_bertscore_f1": 0.882116102847114,
+      "eval_bleu": 0.6198143411594959,
+      "eval_loss": 0.029304716736078262,
+      "eval_rougeL": 0.31779224232169023,
+      "eval_runtime": 85.5713,
+      "eval_samples_per_second": 17.564,
+      "eval_steps_per_second": 1.098,
+      "step": 7008
+    },
+    {
+      "epoch": 32.009132420091326,
+      "grad_norm": 0.09440213441848755,
+      "learning_rate": 4.2791911285061975e-06,
+      "loss": 0.0285,
+      "step": 7010
+    },
+    {
+      "epoch": 32.054794520547944,
+      "grad_norm": 0.06098993867635727,
+      "learning_rate": 4.21395955642531e-06,
+      "loss": 0.027,
+      "step": 7020
+    },
+    {
+      "epoch": 32.10045662100457,
+      "grad_norm": 0.04748637229204178,
+      "learning_rate": 4.148727984344423e-06,
+      "loss": 0.0269,
+      "step": 7030
+    },
+    {
+      "epoch": 32.146118721461185,
+      "grad_norm": 0.049264486879110336,
+      "learning_rate": 4.083496412263536e-06,
+      "loss": 0.0273,
+      "step": 7040
+    },
+    {
+      "epoch": 32.19178082191781,
+      "grad_norm": 0.0713597983121872,
+      "learning_rate": 4.018264840182649e-06,
+      "loss": 0.0274,
+      "step": 7050
+    },
+    {
+      "epoch": 32.23744292237443,
+      "grad_norm": 0.06906388700008392,
+      "learning_rate": 3.953033268101762e-06,
+      "loss": 0.0288,
+      "step": 7060
+    },
+    {
+      "epoch": 32.28310502283105,
+      "grad_norm": 0.1591597944498062,
+      "learning_rate": 3.887801696020874e-06,
+      "loss": 0.0269,
+      "step": 7070
+    },
+    {
+      "epoch": 32.32876712328767,
+      "grad_norm": 0.04023635759949684,
+      "learning_rate": 3.822570123939987e-06,
+      "loss": 0.0279,
+      "step": 7080
+    },
+    {
+      "epoch": 32.37442922374429,
+      "grad_norm": 0.0428457073867321,
+      "learning_rate": 3.7573385518591e-06,
+      "loss": 0.0274,
+      "step": 7090
+    },
+    {
+      "epoch": 32.42009132420091,
+      "grad_norm": 0.09162160009145737,
+      "learning_rate": 3.6921069797782126e-06,
+      "loss": 0.0265,
+      "step": 7100
+    },
+    {
+      "epoch": 32.465753424657535,
+      "grad_norm": 0.05572756007313728,
+      "learning_rate": 3.6268754076973256e-06,
+      "loss": 0.0274,
+      "step": 7110
+    },
+    {
+      "epoch": 32.51141552511415,
+      "grad_norm": 0.05541946738958359,
+      "learning_rate": 3.5616438356164386e-06,
+      "loss": 0.027,
+      "step": 7120
+    },
+    {
+      "epoch": 32.55707762557078,
+      "grad_norm": 0.10358795523643494,
+      "learning_rate": 3.496412263535551e-06,
+      "loss": 0.0265,
+      "step": 7130
+    },
+    {
+      "epoch": 32.602739726027394,
+      "grad_norm": 0.10496276617050171,
+      "learning_rate": 3.4311806914546642e-06,
+      "loss": 0.0278,
+      "step": 7140
+    },
+    {
+      "epoch": 32.64840182648402,
+      "grad_norm": 0.049546003341674805,
+      "learning_rate": 3.365949119373777e-06,
+      "loss": 0.0268,
+      "step": 7150
+    },
+    {
+      "epoch": 32.694063926940636,
+      "grad_norm": 0.04895747825503349,
+      "learning_rate": 3.30071754729289e-06,
+      "loss": 0.0288,
+      "step": 7160
+    },
+    {
+      "epoch": 32.73972602739726,
+      "grad_norm": 0.046649519354104996,
+      "learning_rate": 3.235485975212003e-06,
+      "loss": 0.0267,
+      "step": 7170
+    },
+    {
+      "epoch": 32.78538812785388,
+      "grad_norm": 0.060476336628198624,
+      "learning_rate": 3.1702544031311154e-06,
+      "loss": 0.0275,
+      "step": 7180
+    },
+    {
+      "epoch": 32.8310502283105,
+      "grad_norm": 0.08833310008049011,
+      "learning_rate": 3.1050228310502285e-06,
+      "loss": 0.0282,
+      "step": 7190
+    },
+    {
+      "epoch": 32.87671232876713,
+      "grad_norm": 0.08543556928634644,
+      "learning_rate": 3.0397912589693415e-06,
+      "loss": 0.0272,
+      "step": 7200
+    },
+    {
+      "epoch": 32.922374429223744,
+      "grad_norm": 0.09319938719272614,
+      "learning_rate": 2.974559686888454e-06,
+      "loss": 0.0254,
+      "step": 7210
+    },
+    {
+      "epoch": 32.96803652968037,
+      "grad_norm": 0.07596831768751144,
+      "learning_rate": 2.909328114807567e-06,
+      "loss": 0.0267,
+      "step": 7220
+    },
+    {
+      "epoch": 33.0,
+      "eval_bertscore_f1": 0.8826242640108881,
+      "eval_bleu": 0.6506840160853707,
+      "eval_loss": 0.02929467149078846,
+      "eval_rougeL": 0.3175517644987171,
+      "eval_runtime": 85.9677,
+      "eval_samples_per_second": 17.483,
+      "eval_steps_per_second": 1.093,
+      "step": 7227
+    },
+    {
+      "epoch": 33.013698630136986,
+      "grad_norm": 0.2628081440925598,
+      "learning_rate": 2.8440965427266797e-06,
+      "loss": 0.0288,
+      "step": 7230
+    },
+    {
+      "epoch": 33.05936073059361,
+      "grad_norm": 0.05208074674010277,
+      "learning_rate": 2.7788649706457927e-06,
+      "loss": 0.026,
+      "step": 7240
+    },
+    {
+      "epoch": 33.10502283105023,
+      "grad_norm": 0.0972835123538971,
+      "learning_rate": 2.7136333985649057e-06,
+      "loss": 0.0266,
+      "step": 7250
+    },
+    {
+      "epoch": 33.15068493150685,
+      "grad_norm": 0.07271518558263779,
+      "learning_rate": 2.6484018264840183e-06,
+      "loss": 0.028,
+      "step": 7260
+    },
+    {
+      "epoch": 33.19634703196347,
+      "grad_norm": 0.196941077709198,
+      "learning_rate": 2.5831702544031313e-06,
+      "loss": 0.027,
+      "step": 7270
+    },
+    {
+      "epoch": 33.242009132420094,
+      "grad_norm": 0.07205737382173538,
+      "learning_rate": 2.517938682322244e-06,
+      "loss": 0.0279,
+      "step": 7280
+    },
+    {
+      "epoch": 33.28767123287671,
+      "grad_norm": 0.06259785592556,
+      "learning_rate": 2.452707110241357e-06,
+      "loss": 0.0266,
+      "step": 7290
+    },
+    {
+      "epoch": 33.333333333333336,
+      "grad_norm": 0.047566816210746765,
+      "learning_rate": 2.38747553816047e-06,
+      "loss": 0.0262,
+      "step": 7300
+    },
+    {
+      "epoch": 33.37899543378995,
+      "grad_norm": 0.06657677888870239,
+      "learning_rate": 2.3222439660795826e-06,
+      "loss": 0.0269,
+      "step": 7310
+    },
+    {
+      "epoch": 33.42465753424658,
+      "grad_norm": 0.13266189396381378,
+      "learning_rate": 2.2570123939986956e-06,
+      "loss": 0.0282,
+      "step": 7320
+    },
+    {
+      "epoch": 33.470319634703195,
+      "grad_norm": 0.08919128775596619,
+      "learning_rate": 2.191780821917808e-06,
+      "loss": 0.0297,
+      "step": 7330
+    },
+    {
+      "epoch": 33.51598173515982,
+      "grad_norm": 0.05524025857448578,
+      "learning_rate": 2.126549249836921e-06,
+      "loss": 0.029,
+      "step": 7340
+    },
+    {
+      "epoch": 33.56164383561644,
+      "grad_norm": 0.0752720758318901,
+      "learning_rate": 2.0613176777560342e-06,
+      "loss": 0.0278,
+      "step": 7350
+    },
+    {
+      "epoch": 33.60730593607306,
+      "grad_norm": 0.06446948647499084,
+      "learning_rate": 1.996086105675147e-06,
+      "loss": 0.0279,
+      "step": 7360
+    },
+    {
+      "epoch": 33.65296803652968,
+      "grad_norm": 0.1158689633011818,
+      "learning_rate": 1.93085453359426e-06,
+      "loss": 0.0273,
+      "step": 7370
+    },
+    {
+      "epoch": 33.6986301369863,
+      "grad_norm": 0.05515358969569206,
+      "learning_rate": 1.8656229615133726e-06,
+      "loss": 0.0276,
+      "step": 7380
+    },
+    {
+      "epoch": 33.74429223744292,
+      "grad_norm": 0.13118913769721985,
+      "learning_rate": 1.8003913894324854e-06,
+      "loss": 0.0253,
+      "step": 7390
+    },
+    {
+      "epoch": 33.789954337899545,
+      "grad_norm": 0.0866023376584053,
+      "learning_rate": 1.7351598173515982e-06,
+      "loss": 0.0269,
+      "step": 7400
+    },
+    {
+      "epoch": 33.83561643835616,
+      "grad_norm": 0.05457647144794464,
+      "learning_rate": 1.669928245270711e-06,
+      "loss": 0.0274,
+      "step": 7410
+    },
+    {
+      "epoch": 33.881278538812786,
+      "grad_norm": 0.1843695044517517,
+      "learning_rate": 1.604696673189824e-06,
+      "loss": 0.0281,
+      "step": 7420
+    },
+    {
+      "epoch": 33.926940639269404,
+      "grad_norm": 0.054692674428224564,
+      "learning_rate": 1.5394651011089369e-06,
+      "loss": 0.0301,
+      "step": 7430
+    },
+    {
+      "epoch": 33.97260273972603,
+      "grad_norm": 0.05386902019381523,
+      "learning_rate": 1.4742335290280497e-06,
+      "loss": 0.0263,
+      "step": 7440
+    },
+    {
+      "epoch": 34.0,
+      "eval_bertscore_f1": 0.8824349555743985,
+      "eval_bleu": 0.6426832403462529,
+      "eval_loss": 0.029273033142089844,
+      "eval_rougeL": 0.31879717895642,
+      "eval_runtime": 85.7948,
+      "eval_samples_per_second": 17.519,
+      "eval_steps_per_second": 1.096,
+      "step": 7446
+    },
+    {
+      "epoch": 34.018264840182646,
+      "grad_norm": 0.059233132749795914,
+      "learning_rate": 1.4090019569471625e-06,
+      "loss": 0.0275,
+      "step": 7450
+    },
+    {
+      "epoch": 34.06392694063927,
+      "grad_norm": 0.05460565164685249,
+      "learning_rate": 1.3437703848662755e-06,
+      "loss": 0.0268,
+      "step": 7460
+    },
+    {
+      "epoch": 34.10958904109589,
+      "grad_norm": 0.05039376765489578,
+      "learning_rate": 1.2785388127853883e-06,
+      "loss": 0.027,
+      "step": 7470
+    },
+    {
+      "epoch": 34.15525114155251,
+      "grad_norm": 0.061115965247154236,
+      "learning_rate": 1.2133072407045011e-06,
+      "loss": 0.0271,
+      "step": 7480
+    },
+    {
+      "epoch": 34.20091324200913,
+      "grad_norm": 0.13192027807235718,
+      "learning_rate": 1.148075668623614e-06,
+      "loss": 0.0273,
+      "step": 7490
+    },
+    {
+      "epoch": 34.24657534246575,
+      "grad_norm": 0.08353226631879807,
+      "learning_rate": 1.0828440965427267e-06,
+      "loss": 0.0258,
+      "step": 7500
+    },
+    {
+      "epoch": 34.29223744292237,
+      "grad_norm": 0.060591306537389755,
+      "learning_rate": 1.0176125244618398e-06,
+      "loss": 0.0264,
+      "step": 7510
+    },
+    {
+      "epoch": 34.337899543378995,
+      "grad_norm": 0.1371307224035263,
+      "learning_rate": 9.523809523809526e-07,
+      "loss": 0.0296,
+      "step": 7520
+    },
+    {
+      "epoch": 34.38356164383562,
+      "grad_norm": 0.07051808387041092,
+      "learning_rate": 8.871493803000652e-07,
+      "loss": 0.0265,
+      "step": 7530
+    },
+    {
+      "epoch": 34.42922374429224,
+      "grad_norm": 0.0630018338561058,
+      "learning_rate": 8.219178082191781e-07,
+      "loss": 0.0283,
+      "step": 7540
+    },
+    {
+      "epoch": 34.47488584474886,
+      "grad_norm": 0.05780971795320511,
+      "learning_rate": 7.56686236138291e-07,
+      "loss": 0.0265,
+      "step": 7550
+    },
+    {
+      "epoch": 34.52054794520548,
+      "grad_norm": 0.09374094754457474,
+      "learning_rate": 6.914546640574038e-07,
+      "loss": 0.0267,
+      "step": 7560
+    },
+    {
+      "epoch": 34.5662100456621,
+      "grad_norm": 0.06963168829679489,
+      "learning_rate": 6.262230919765167e-07,
+      "loss": 0.0287,
+      "step": 7570
+    },
+    {
+      "epoch": 34.61187214611872,
+      "grad_norm": 0.06316248327493668,
+      "learning_rate": 5.609915198956295e-07,
+      "loss": 0.0265,
+      "step": 7580
+    },
+    {
+      "epoch": 34.657534246575345,
+      "grad_norm": 0.09635402262210846,
+      "learning_rate": 4.957599478147424e-07,
+      "loss": 0.0276,
+      "step": 7590
+    },
+    {
+      "epoch": 34.70319634703196,
+      "grad_norm": 0.20677725970745087,
+      "learning_rate": 4.305283757338552e-07,
+      "loss": 0.0271,
+      "step": 7600
+    },
+    {
+      "epoch": 34.74885844748859,
+      "grad_norm": 0.22448600828647614,
+      "learning_rate": 3.6529680365296803e-07,
+      "loss": 0.0277,
+      "step": 7610
+    },
+    {
+      "epoch": 34.794520547945204,
+      "grad_norm": 0.05154965817928314,
+      "learning_rate": 3.000652315720809e-07,
+      "loss": 0.0276,
+      "step": 7620
+    },
+    {
+      "epoch": 34.84018264840183,
+      "grad_norm": 0.0522591657936573,
+      "learning_rate": 2.3483365949119375e-07,
+      "loss": 0.0277,
+      "step": 7630
+    },
+    {
+      "epoch": 34.885844748858446,
+      "grad_norm": 0.07666347175836563,
+      "learning_rate": 1.696020874103066e-07,
+      "loss": 0.0278,
+      "step": 7640
+    },
+    {
+      "epoch": 34.93150684931507,
+      "grad_norm": 0.055545128881931305,
+      "learning_rate": 1.0437051532941944e-07,
+      "loss": 0.0276,
+      "step": 7650
+    },
+    {
+      "epoch": 34.97716894977169,
+      "grad_norm": 0.12518319487571716,
+      "learning_rate": 3.9138943248532294e-08,
+      "loss": 0.0284,
+      "step": 7660
+    },
+    {
+      "epoch": 35.0,
+      "eval_bertscore_f1": 0.8826152413429138,
+      "eval_bleu": 0.6509786399199852,
+      "eval_loss": 0.02927256189286709,
+      "eval_rougeL": 0.3174815619181566,
+      "eval_runtime": 86.2786,
+      "eval_samples_per_second": 17.42,
+      "eval_steps_per_second": 1.089,
+      "step": 7665
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 7665,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 35,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.737006710980608e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}